diff --git a/.cproject b/.cproject index b93835e25..1db9a198e 100644 --- a/.cproject +++ b/.cproject @@ -66,7 +66,7 @@ - + diff --git a/.gitignore b/.gitignore index f1fd0c9c9..fd5def3b6 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,12 @@ src/async.c wolfssl/async.h wolfcrypt/src/async.c wolfssl/wolfcrypt/async.h +wolfcrypt/src/port/intel/quickassist.c +wolfcrypt/src/port/intel/quickassist_mem.c +wolfcrypt/src/port/cavium/cavium_nitrox.c +wolfssl/wolfcrypt/port/intel/quickassist.h +wolfssl/wolfcrypt/port/intel/quickassist_mem.h +wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h ctaocrypt/benchmark/benchmark ctaocrypt/test/testctaocrypt wolfcrypt/benchmark/benchmark @@ -191,3 +197,6 @@ wrapper/CSharp/x64/ # Visual Studio Code Workspace Files *.vscode IDE/INTIME-RTOS/Debug_* + +# Binaries +wolfcrypt/src/port/intel/qat_test diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h index 5641973c9..95a795e02 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h +++ b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h @@ -278,17 +278,21 @@ extern "C" { /* Size of returned HW RNG value */ #define CUSTOM_RAND_TYPE unsigned int +/* Seed source */ +extern unsigned int custom_rand_generate(void); +#undef CUSTOM_RAND_GENERATE +#define CUSTOM_RAND_GENERATE custom_rand_generate + /* Choose RNG method */ #if 1 /* Use built-in P-RNG (SHA256 based) with HW RNG */ /* P-RNG + HW RNG (P-RNG is ~8K) */ #undef HAVE_HASHDRBG #define HAVE_HASHDRBG - - extern unsigned int custom_rand_generate(void); - #undef CUSTOM_RAND_GENERATE - #define CUSTOM_RAND_GENERATE custom_rand_generate #else + #undef WC_NO_HASHDRBG + #define WC_NO_HASHDRBG + /* Bypass P-RNG and use only HW RNG */ extern int custom_rand_generate_block(unsigned char* output, unsigned int sz); #undef CUSTOM_RAND_GENERATE_BLOCK diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl.hzp b/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl.hzp index 3deb98b3e..8b228c4c4 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl.hzp +++ b/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl.hzp @@ -122,12 +122,26 @@ recurse="Yes" /> - + + + + + + + + + + + + + + + + + + + + diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp b/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp index 357ac26f3..30156bf33 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp +++ b/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp @@ -300,12 +300,26 @@ recurse="Yes" /> - + + + + + + + + + + + + + + + + + + + + diff --git a/autogen.sh b/autogen.sh index 6b08f3cd5..e5ea530fa 100755 --- a/autogen.sh +++ b/autogen.sh @@ -22,6 +22,20 @@ if test -e .git; then # touch async crypt files touch ./wolfcrypt/src/async.c touch ./wolfssl/wolfcrypt/async.h + + # touch async port files + touch ./wolfcrypt/src/port/intel/quickassist.c + touch ./wolfcrypt/src/port/intel/quickassist_mem.c + touch ./wolfcrypt/src/port/cavium/cavium_nitrox.c + if [ ! -d ./wolfssl/wolfcrypt/port/intel ]; then + mkdir ./wolfssl/wolfcrypt/port/intel + fi + touch ./wolfssl/wolfcrypt/port/intel/quickassist.h + touch ./wolfssl/wolfcrypt/port/intel/quickassist_mem.h + if [ ! -d ./wolfssl/wolfcrypt/port/cavium ]; then + mkdir ./wolfssl/wolfcrypt/port/cavium + fi + touch ./wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h else WARNINGS="all" fi diff --git a/configure.ac b/configure.ac index 12e6fed6f..4dcc90eb6 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ # # -AC_INIT([wolfssl],[3.10.3],[https://github.com/wolfssl/wolfssl/issues],[wolfssl],[http://www.wolfssl.com]) +AC_INIT([wolfssl],[3.10.4],[https://github.com/wolfssl/wolfssl/issues],[wolfssl],[http://www.wolfssl.com]) AC_CONFIG_AUX_DIR([build-aux]) @@ -35,7 +35,7 @@ AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_HEADERS([config.h:config.in])dnl Keep filename to 8.3 for MS-DOS. #shared library versioning -WOLFSSL_LIBRARY_VERSION=10:1:0 +WOLFSSL_LIBRARY_VERSION=11:0:0 # | | | # +------+ | +---+ # | | | @@ -151,6 +151,7 @@ then enable_camellia=yes enable_ripemd=yes enable_sha512=yes + enable_sha224=yes enable_sessioncerts=yes enable_keygen=yes enable_certgen=yes @@ -192,6 +193,9 @@ then enable_stunnel=yes enable_nginx=yes enable_pwdbased=yes + enable_aeskeywrap=yes + enable_x963kdf=yes + enable_scrypt=yes fi AM_CONDITIONAL([BUILD_DISTRO], [test "x$ENABLED_DISTRO" = "xyes"]) @@ -228,7 +232,7 @@ fi AC_ARG_ENABLE([rng], - [AS_HELP_STRING([ --enable-rng Enable compiling and using RNG (default: enabled)])], + [AS_HELP_STRING([--enable-rng Enable compiling and using RNG (default: enabled)])], [ ENABLED_RNG=$enableval ], [ ENABLED_RNG=yes ] ) @@ -334,7 +338,7 @@ AM_CONDITIONAL([BUILD_IPV6], [test "x$ENABLED_IPV6" = "xyes"]) # wpa_supplicant support AC_ARG_ENABLE([wpas], - [ --enable-wpas Enable wpa_supplicant support (default: disabled)], + [ --enable-wpas Enable wpa_supplicant support (default: disabled)], [ ENABLED_WPAS=$enableval ], [ ENABLED_WPAS=no ] ) @@ -342,7 +346,8 @@ if test "$ENABLED_WPAS" = "yes" then AM_CFLAGS="$AM_CFLAGS -DHAVE_SECRET_CALLBACK -DWOLFSSL_STATIC_RSA" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_PUBLIC_MP -DWOLFSSL_PUBLIC_ECC_ADD_DBL" - AM_CFLAGS="$AM_CFLAGS -DATOMIC_USER" + AM_CFLAGS="$AM_CFLAGS -DATOMIC_USER -DHAVE_EX_DATA -DWOLFSSL_KEEP_PEER_CERT" + AM_CFLAGS="$AM_CFLAGS -DHAVE_EXT_CACHE" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_WPAS" fi @@ -361,7 +366,7 @@ fi if test "$ENABLED_FORTRESS" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DFORTRESS -DWOLFSSL_ALWAYS_VERIFY_CB -DOPENSSL_EXTRA -DWOLFSSL_DES_ECB -DWOLFSSL_AES_COUNTER -DWOLFSSL_AES_DIRECT -DWOLFSSL_DER_LOAD -DWOLFSSL_SHA512 -DWOLFSSL_SHA384 -DWOLFSSL_KEY_GEN" + AM_CFLAGS="$AM_CFLAGS -DFORTRESS -DWOLFSSL_ALWAYS_VERIFY_CB -DOPENSSL_EXTRA -DWOLFSSL_AES_COUNTER -DWOLFSSL_AES_DIRECT -DWOLFSSL_DER_LOAD -DWOLFSSL_SHA512 -DWOLFSSL_SHA384 -DWOLFSSL_KEY_GEN" fi @@ -476,6 +481,19 @@ then fi +# Write duplicate WOLFSSL object +AC_ARG_ENABLE([writedup], + [ --enable-writedup Enable write duplication of WOLFSSL objects (default: disabled)], + [ ENABLED_WRITEDUP=$enableval ], + [ ENABLED_WRITEDUP=no ] + ) + +if test "$ENABLED_WRITEDUP" = "yes" +then + AM_CFLAGS="$AM_CFLAGS -DHAVE_WRITE_DUP" +fi + + # Atomic User Record Layer AC_ARG_ENABLE([atomicuser], [ --enable-atomicuser Enable Atomic User Record Layer (default: disabled)], @@ -612,7 +630,7 @@ fi AM_CONDITIONAL([BUILD_ARMASM], [test "x$ENABLED_ARMASM" = "xyes"]) -# AES-NI +# INTEL AES-NI AC_ARG_ENABLE([aesni], [AS_HELP_STRING([--enable-aesni],[Enable wolfSSL AES-NI support (default: disabled)])], [ ENABLED_AESNI=$enableval ], @@ -626,6 +644,7 @@ AC_ARG_ENABLE([intelasm], [ ENABLED_INTELASM=no ] ) + if test "$ENABLED_AESNI" = "yes" || test "$ENABLED_INTELASM" = "yes" then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESNI" @@ -643,10 +662,22 @@ fi if test "$ENABLED_INTELASM" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DHAVE_INTEL_RDGEN -DUSE_INTEL_SPEEDUP" + AM_CFLAGS="$AM_CFLAGS -DHAVE_INTEL_RDSEED -DUSE_INTEL_SPEEDUP" ENABLED_AESNI=yes fi +# INTEL RDRAND +AC_ARG_ENABLE([intelrand], + [AS_HELP_STRING([--enable-intelrand],[Enable Intel rdrand as preferred RNG source (default: disabled)])], + [ ENABLED_INTELRDRAND=$enableval ], + [ ENABLED_INTELRDRAND=no ] + ) + +if test "$ENABLED_INTELRDRAND" = "yes" +then + AM_CFLAGS="$AM_CFLAGS -DHAVE_INTEL_RDRAND" +fi + AM_CONDITIONAL([BUILD_AESNI], [test "x$ENABLED_AESNI" = "xyes"]) @@ -965,6 +996,12 @@ AC_ARG_ENABLE([ecccustcurves], if test "$ENABLED_ECCCUSTCURVES" = "yes" then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_CUSTOM_CURVES" + + # For distro build, enable all curve types + if test "$ENABLED_DISTRO" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC_SECPR2 -DHAVE_ECC_SECPR3 -DHAVE_ECC_BRAINPOOL -DHAVE_ECC_KOBLITZ" + fi fi @@ -1578,6 +1615,11 @@ then ENABLED_DES3="yes" fi AM_CFLAGS="$AM_CFLAGS -DHAVE_FIPS" +else + if test "x$ENABLED_FORTRESS" = "xyes" + then + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_DES_ECB" + fi fi AM_CONDITIONAL([BUILD_FIPS], [test "x$ENABLED_FIPS" = "xyes"]) @@ -1677,11 +1719,13 @@ if test "x$ENABLED_HASHDRBG" = "xyes" then AM_CFLAGS="$AM_CFLAGS -DHAVE_HASHDRBG" else - # turn on Hash DRBG if FIPS is on or ARC4 is off - if test "x$ENABLED_FIPS" = "xyes" || test "x$ENABLED_ARC4" = "xno" + # turn on Hash DRBG if FIPS is on + if test "x$ENABLED_FIPS" = "xyes" then AM_CFLAGS="$AM_CFLAGS -DHAVE_HASHDRBG" ENABLED_HASHDRBG=yes + else + AM_CFLAGS="$AM_CFLAGS -DWC_NO_HASHDRBG" fi fi @@ -2018,7 +2062,7 @@ AC_ARG_ENABLE([maxfragment], # ALPN AC_ARG_ENABLE([alpn], - [ --enable-alpn Enable ALPN (default: disabled)], + [ --enable-alpn Enable ALPN (default: disabled)], [ ENABLED_ALPN=$enableval ], [ ENABLED_ALPN=no ] ) @@ -2692,6 +2736,7 @@ AC_ARG_WITH([cavium], [ AC_MSG_CHECKING([for cavium]) CPPFLAGS="$CPPFLAGS -DHAVE_CAVIUM" + LIB_ADD="-lrt $LIB_ADD" if test "x$withval" == "xyes" ; then AC_MSG_ERROR([need a PATH for --with-cavium]) @@ -2708,6 +2753,8 @@ AC_ARG_WITH([cavium], if test "x$cavium_linked" == "xno" ; then AC_MSG_ERROR([cavium isn't found. If it's already installed, specify its path using --with-cavium=/dir/]) + else + AM_CFLAGS="$AM_CFLAGS -DHAVE_CAVIUM" fi AC_MSG_RESULT([yes]) enable_shared=no @@ -2724,6 +2771,7 @@ AC_ARG_WITH([cavium-v], [ AC_MSG_CHECKING([for cavium]) CPPFLAGS="$CPPFLAGS -DHAVE_CAVIUM -DHAVE_CAVIUM_V" + LIB_ADD="-lrt $LIB_ADD" if test "x$withval" == "xyes" ; then AC_MSG_ERROR([need a PATH for --with-cavium]) @@ -2732,7 +2780,7 @@ AC_ARG_WITH([cavium-v], trycaviumdir=$withval fi - LDFLAGS="$AM_LDFLAGS $trycaviumdir/utils/sample_tests/cavium_common.o $trycaviumdir/utils/sample_tests/cavium_sym_crypto.o $trycaviumdir/utils/sample_tests/cavium_asym_crypto.o" + LDFLAGS="$AM_LDFLAGS $trycaviumdir/api/obj/cavium_common.o $trycaviumdir/api/obj/cavium_sym_crypto.o $trycaviumdir/api/obj/cavium_asym_crypto.o" CPPFLAGS="$CPPFLAGS -I$trycaviumdir/include" #AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include "cavium_common.h"]], [[ CspShutdown(0); ]])],[ cavium_linked=yes ],[ cavium_linked=no ]) @@ -2740,6 +2788,8 @@ AC_ARG_WITH([cavium-v], if test "x$cavium_linked" == "xno" ; then AC_MSG_ERROR([cavium isn't found. If it's already installed, specify its path using --with-cavium-v=/dir/]) + else + AM_CFLAGS="$AM_CFLAGS -DHAVE_CAVIUM -DHAVE_CAVIUM_V" fi AC_MSG_RESULT([yes]) @@ -2757,6 +2807,46 @@ AC_ARG_WITH([cavium-v], AM_CONDITIONAL([BUILD_CAVIUM], [test "x$ENABLED_CAVIUM" = "xyes"]) +# Intel Quick Assist +tryqatdir="" +AC_ARG_WITH([intelqa], + [ --with-intelqa=PATH PATH to Intel QuickAssit (QAT) driver dir ], + [ + AC_MSG_CHECKING([for intelqa]) + CPPFLAGS="$CPPFLAGS -DHAVE_INTEL_QA -DDO_CRYPTO -DUSER_SPACE" + + if test "x$withval" == "xyes" ; then + AC_MSG_ERROR([need a PATH for --with-intelqa]) + fi + if test "x$withval" != "xno" ; then + tryqatdir=$withval + fi + + CPPFLAGS="$CPPFLAGS -I$tryqatdir/quickassist/include -I$tryqatdir/quickassist/include/lac -I$tryqatdir/quickassist/utilities/osal/include -I$tryqatdir/quickassist/utilities/osal/src/linux/user_space/include -I$tryqatdir/quickassist/lookaside/access_layer/include -I$tryqatdir/quickassist/lookaside/access_layer/src/common/include -I$srcdir/wolfssl -I$srcdir/wolfssl/wolfcrypt/port/intel" + LDFLAGS="$LDFLAGS -L$tryqatdir/build -Wl,-Map=output.map" + LIBS="$LIBS -licp_qa_al_s" + LIB_ADD="-ladf_proxy -losal -lrt $LIB_ADD" + + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include "cpa_cy_common.h"]], [[ Cpa16U count = 0; cpaCyGetNumInstances(&count); ]])],[ intelqa_linked=yes ],[ intelqa_linked=no ]) + + if test "x$intelqa_linked" == "xno" ; then + AC_MSG_ERROR([Intel QuickAssist not found. + If it's already installed, specify its path using --with-intelqa=/dir/]) + else + AM_CFLAGS="$AM_CFLAGS -DHAVE_INTEL_QA -DDO_CRYPTO -DUSER_SPACE" + fi + AC_MSG_RESULT([yes]) + + ENABLED_INTEL_QA=yes + ], + [ + ENABLED_INTEL_QA=no + ] +) + +AM_CONDITIONAL([BUILD_INTEL_QA], [test "x$ENABLED_INTEL_QA" = "xyes"]) + + # Fast RSA using Intel IPP ippdir="${srcdir}/IPP" ipplib="lib" # if autoconf guesses 32bit system changes lib directory @@ -2956,18 +3046,19 @@ AM_CONDITIONAL([BUILD_MCAPI], [test "x$ENABLED_MCAPI" = "xyes"]) # Asynchronous Crypto AC_ARG_ENABLE([asynccrypt], - [ --enable-asynccrypt Enable Asynchronous Crypto (default: disabled)], + [ --enable-asynccrypt Enable Asynchronous Crypto (default: disabled)], [ ENABLED_ASYNCCRYPT=$enableval ], [ ENABLED_ASYNCCRYPT=no ] ) if test "$ENABLED_ASYNCCRYPT" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT -DHAVE_WOLF_BIGINT" - # if Cavium not enabled the use async simulator for testing - if test "x$ENABLED_CAVIUM" = "xno" + # if no async hardware then use simulator for testing + if test "x$ENABLED_CAVIUM" = "xno" && test "x$ENABLED_INTEL_QA" = "xno" then + # Async threading is Linux specific AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT_TEST" fi fi @@ -2976,6 +3067,35 @@ AM_CONDITIONAL([BUILD_ASYNCCRYPT], [test "x$ENABLED_ASYNCCRYPT" = "xyes"]) AM_CONDITIONAL([BUILD_WOLFEVENT], [test "x$ENABLED_ASYNCCRYPT" = "xyes"]) +# check for async if using Intel QuckAssist or Cavium +if test "x$ENABLED_INTEL_QA" = "xyes" || test "x$ENABLED_CAVIUM" = "xyes" ; then + if test "x$ENABLED_ASYNCCRYPT" = "xno" ; then + AC_MSG_ERROR([Please enable enable asynchronous support using --enable-asynccrypt]) + fi +fi + + +# Asynchronous threading +AC_ARG_ENABLE([asyncthreads], + [ --enable-asyncthreads Enable Asynchronous Threading (default: enabled)], + [ ENABLED_ASYNCTHREADS=$enableval ], + [ ENABLED_ASYNCTHREADS=yes ] + ) + +if test "$ENABLED_ASYNCCRYPT" = "yes" && test "$ENABLED_ASYNCTHREADS" = "yes" +then + AX_PTHREAD([ENABLED_ASYNCTHREADS=yes],[ENABLED_ASYNCTHREADS=no]) +else + ENABLED_ASYNCTHREADS=no +fi + +if test "$ENABLED_ASYNCTHREADS" = "yes" +then + LIB_ADD="-lpthread $LIB_ADD" + AM_CFLAGS="$AM_CFLAGS -D_GNU_SOURCE" +else + AM_CFLAGS="$AM_CFLAGS -DWC_NO_ASYNC_THREADING" +fi # Session Export @@ -3002,7 +3122,7 @@ AC_ARG_ENABLE([aeskeywrap], [ ENABLED_AESKEYWRAP=no ] ) -if test "$ENABLED_WPAS" = "yes" +if test "$ENABLED_WPAS" = "yes" && test "$ENABLED_FIPS" = "no" then ENABLED_AESKEYWRAP="yes" fi @@ -3480,6 +3600,8 @@ echo " * Async Crypto: $ENABLED_ASYNCCRYPT" echo " * Cavium: $ENABLED_CAVIUM" echo " * ARM ASM: $ENABLED_ARMASM" echo " * AES Key Wrap: $ENABLED_AESKEYWRAP" +echo " * Write duplicate: $ENABLED_WRITEDUP" +echo " * Intel Quick Assist: $ENABLED_INTEL_QA" echo "" echo "---" diff --git a/examples/client/client.c b/examples/client/client.c index 6774ad08a..2dba8f32c 100644 --- a/examples/client/client.c +++ b/examples/client/client.c @@ -42,11 +42,6 @@ #include -#if !defined(WOLFSSL_TRACK_MEMORY) && !defined(NO_MAIN_DRIVER) - /* in case memory tracker wants stats */ - #define WOLFSSL_TRACK_MEMORY -#endif - #include #include @@ -75,7 +70,7 @@ #endif -static void NonBlockingSSL_Connect(WOLFSSL* ssl) +static int NonBlockingSSL_Connect(WOLFSSL* ssl) { #ifndef WOLFSSL_CALLBACKS int ret = wolfSSL_connect(ssl); @@ -98,7 +93,7 @@ static void NonBlockingSSL_Connect(WOLFSSL* ssl) #ifdef WOLFSSL_ASYNC_CRYPT else if (error == WC_PENDING_E) { ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } + if (ret < 0) break; } #endif @@ -110,11 +105,11 @@ static void NonBlockingSSL_Connect(WOLFSSL* ssl) } if ((select_ret == TEST_RECV_READY) || - (select_ret == TEST_ERROR_READY)) { + (select_ret == TEST_ERROR_READY) || error == WC_PENDING_E) { #ifndef WOLFSSL_CALLBACKS ret = wolfSSL_connect(ssl); #else - ret = wolfSSL_connect_ex(ssl,handShakeCB,timeoutCB,timeout); + ret = wolfSSL_connect_ex(ssl, handShakeCB, timeoutCB, timeout); #endif error = wolfSSL_get_error(ssl, 0); } @@ -131,8 +126,8 @@ static void NonBlockingSSL_Connect(WOLFSSL* ssl) error = SSL_FATAL_ERROR; } } - if (ret != SSL_SUCCESS) - err_sys("SSL_connect failed"); + + return ret; } @@ -166,7 +161,7 @@ static int ClientBenchmarkConnections(WOLFSSL_CTX* ctx, char* host, word16 port, /* time passed in number of connects give average */ int times = benchmark; int loops = resumeSession ? 2 : 1; - int i = 0; + int i = 0, err, ret; #ifndef NO_SESSION_CACHE WOLFSSL_SESSION* benchSession = NULL; #endif @@ -193,8 +188,23 @@ static int ClientBenchmarkConnections(WOLFSSL_CTX* ctx, char* host, word16 port, if (wolfSSL_set_fd(ssl, sockfd) != SSL_SUCCESS) { err_sys("error in setting fd"); } - if (wolfSSL_connect(ssl) != SSL_SUCCESS) + + do { + err = 0; /* reset error */ + ret = wolfSSL_connect(ssl); + if (ret != SSL_SUCCESS) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != SSL_SUCCESS) { err_sys("SSL_connect failed"); + } wolfSSL_shutdown(ssl); #ifndef NO_SESSION_CACHE @@ -226,7 +236,7 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, double start, conn_time = 0, tx_time = 0, rx_time = 0; SOCKET_T sockfd; WOLFSSL* ssl; - int ret; + int ret = 0, err = 0; start = current_time(1); ssl = wolfSSL_new(ctx); @@ -236,7 +246,21 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, if (wolfSSL_set_fd(ssl, sockfd) != SSL_SUCCESS) { err_sys("error in setting fd"); } - if (wolfSSL_connect(ssl) == SSL_SUCCESS) { + + do { + err = 0; /* reset error */ + ret = wolfSSL_connect(ssl); + if (ret != SSL_SUCCESS) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret == SSL_SUCCESS) { /* Perform throughput test */ char *tx_buffer, *rx_buffer; @@ -244,14 +268,18 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, conn_time = current_time(0) - start; /* Allocate TX/RX buffers */ - tx_buffer = (char*)malloc(TEST_BUFFER_SIZE); - rx_buffer = (char*)malloc(TEST_BUFFER_SIZE); - if(tx_buffer && rx_buffer) { + tx_buffer = (char*)XMALLOC(TEST_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + rx_buffer = (char*)XMALLOC(TEST_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (tx_buffer && rx_buffer) { WC_RNG rng; /* Startup the RNG */ + #if !defined(HAVE_FIPS) && defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_InitRng_ex(&rng, NULL, devId); + #else ret = wc_InitRng(&rng); - if(ret == 0) { + #endif + if (ret == 0) { int xfer_bytes; /* Generate random data to send */ @@ -263,7 +291,7 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, /* Perform TX and RX of bytes */ xfer_bytes = 0; - while(throughput > xfer_bytes) { + while (throughput > xfer_bytes) { int len, rx_pos, select_ret; /* Determine packet size */ @@ -271,10 +299,22 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, /* Perform TX */ start = current_time(1); - if (wolfSSL_write(ssl, tx_buffer, len) != len) { - int writeErr = wolfSSL_get_error(ssl, 0); - printf("wolfSSL_write error %d!\n", writeErr); - err_sys("wolfSSL_write failed"); + do { + err = 0; /* reset error */ + ret = wolfSSL_write(ssl, tx_buffer, len); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != len) { + printf("SSL_write bench error %d!\n", err); + err_sys("SSL_write failed"); } tx_time += current_time(0) - start; @@ -283,13 +323,21 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, if (select_ret == TEST_RECV_READY) { start = current_time(1); rx_pos = 0; - while(rx_pos < len) { - ret = wolfSSL_read(ssl, &rx_buffer[rx_pos], len - rx_pos); - if(ret <= 0) { - int readErr = wolfSSL_get_error(ssl, 0); - if (readErr != SSL_ERROR_WANT_READ) { - printf("wolfSSL_read error %d!\n", readErr); - err_sys("wolfSSL_read failed"); + while (rx_pos < len) { + ret = wolfSSL_read(ssl, &rx_buffer[rx_pos], + len - rx_pos); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + else + #endif + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read bench error %d\n", err); + err_sys("SSL_read failed"); } } else { @@ -319,8 +367,8 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, else { err_sys("Client buffer malloc failed"); } - if(tx_buffer) free(tx_buffer); - if(rx_buffer) free(rx_buffer); + if(tx_buffer) XFREE(tx_buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if(rx_buffer) XFREE(rx_buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); } else { err_sys("wolfSSL_connect failed"); @@ -412,7 +460,7 @@ static int StartTLS_Init(SOCKET_T* sockfd) /* Closes down the SMTP connection */ static int SMTP_Shutdown(WOLFSSL* ssl, int wc_shutdown) { - int ret; + int ret, err = 0; char tmpBuf[256]; if (ssl == NULL) @@ -423,13 +471,38 @@ static int SMTP_Shutdown(WOLFSSL* ssl, int wc_shutdown) XMEMSET(tmpBuf, 0, sizeof(tmpBuf)); /* C: QUIT */ - if (wolfSSL_write(ssl, starttlsCmd[5], (int)XSTRLEN(starttlsCmd[5])) != - (int)XSTRLEN(starttlsCmd[5])) + do { + ret = wolfSSL_write(ssl, starttlsCmd[5], (int)XSTRLEN(starttlsCmd[5])); + if (ret < 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != (int)XSTRLEN(starttlsCmd[5])) { err_sys("failed to send SMTP QUIT command\n"); + } /* S: 221 2.0.0 Service closing transmission channel */ - if (wolfSSL_read(ssl, tmpBuf, sizeof(tmpBuf)) < 0) + do { + ret = wolfSSL_read(ssl, tmpBuf, sizeof(tmpBuf)); + if (ret < 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret < 0) { err_sys("failed to read SMTP closing down response\n"); + } printf("%s\n", tmpBuf); @@ -452,9 +525,9 @@ static void Usage(void) CLIENT_DEFAULT_VERSION); printf("-V Prints valid ssl version numbers, SSLv3(0) - TLS1.2(3)\n"); printf("-l Cipher suite list (: delimited)\n"); - printf("-c Certificate file, default %s\n", cliCert); - printf("-k Key file, default %s\n", cliKey); - printf("-A Certificate Authority file, default %s\n", caCert); + printf("-c Certificate file, default %s\n", cliCertFile); + printf("-k Key file, default %s\n", cliKeyFile); + printf("-A Certificate Authority file, default %s\n", caCertFile); #ifndef NO_DH printf("-Z Minimum DH key bits, default %d\n", DEFAULT_MIN_DHKEY_BITS); @@ -551,7 +624,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #endif char reply[80]; - int input; int msgSz = (int)XSTRLEN(msg); int resumeSz = (int)XSTRLEN(resumeMsg); @@ -584,7 +656,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #endif int scr = 0; /* allow secure renegotiation */ int forceScr = 0; /* force client initiaed scr */ - int trackMemory = 0; int useClientCert = 1; int fewerPackets = 0; int atomicUser = 0; @@ -594,9 +665,10 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) char* alpnList = NULL; unsigned char alpn_opt = 0; char* cipherList = NULL; - const char* verifyCert = caCert; - const char* ourCert = cliCert; - const char* ourKey = cliKey; + int useDefCipherList = 0; + const char* verifyCert = caCertFile; + const char* ourCert = cliCertFile; + const char* ourKey = cliKeyFile; int doSTARTTLS = 0; char* starttlsProt = NULL; @@ -622,7 +694,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) byte disableExtMasterSecret = 0; #endif - #ifdef HAVE_OCSP int useOcsp = 0; char* ocspUrl = NULL; @@ -631,6 +702,7 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #ifdef HAVE_WNR const char* wnrConfigFile = wnrConfig; #endif + char buffer[WOLFSSL_MAX_ERROR_SZ]; int argc = ((func_args*)args)->argc; char** argv = ((func_args*)args)->argv; @@ -638,9 +710,9 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) ((func_args*)args)->return_code = -1; /* error state */ #ifdef NO_RSA - verifyCert = (char*)eccCert; - ourCert = (char*)cliEccCert; - ourKey = (char*)cliEccKey; + verifyCert = (char*)eccCertFile; + ourCert = (char*)cliEccCertFile; + ourKey = (char*)cliEccKeyFile; #endif (void)resumeSz; (void)session; @@ -662,9 +734,10 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) StackTrap(); #ifndef WOLFSSL_VXWORKS - while ((ch = mygetopt(argc, argv, - "?gdeDuGsmNrwRitfxXUPCVh:p:v:l:A:c:k:Z:b:zS:F:L:TnoO:aB:W:E:M:q:")) - != -1) { + /* Not used: j, t, y, I, J, K, Q, Y */ + while ((ch = mygetopt(argc, argv, "?" + "ab:c:defgh:ik:l:mnop:q:rsuv:wxz" + "A:B:CDE:F:GHL:M:NO:PRS:TUVW:XZ:")) != -1) { switch (ch) { case '?' : Usage(); @@ -708,12 +781,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) usePsk = 1; break; - case 't' : - #ifdef USE_WOLFSSL_MEMORY - trackMemory = 1; - #endif - break; - #ifdef WOLFSSL_TRUST_PEER_CERT case 'E' : trustCert = myoptarg; @@ -777,6 +844,10 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) cipherList = myoptarg; break; + case 'H' : + useDefCipherList = 1; + break; + case 'A' : verifyCert = myoptarg; break; @@ -1030,11 +1101,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } } -#if defined(USE_WOLFSSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY) - if (trackMemory) - InitMemoryTracker(); -#endif - #ifdef HAVE_WNR if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0) err_sys("can't load whitewood net random config file"); @@ -1097,7 +1163,7 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } #endif - if (cipherList) { + if (cipherList && !useDefCipherList) { if (wolfSSL_CTX_set_cipher_list(ctx, cipherList) != SSL_SUCCESS) { wolfSSL_CTX_free(ctx); err_sys("client can't set cipher list 1"); @@ -1149,7 +1215,7 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (useAnon) { #ifdef HAVE_ANON - if (cipherList == NULL) { + if (cipherList == NULL || (cipherList && useDefCipherList)) { wolfSSL_CTX_allow_anon_cipher(ctx); if (wolfSSL_CTX_set_cipher_list(ctx,"ADH-AES128-SHA") != SSL_SUCCESS) { @@ -1240,12 +1306,12 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #ifdef HAVE_ECC /* load ecc verify too, echoserver uses it by default w/ ecc */ #if !defined(NO_FILESYSTEM) - if (wolfSSL_CTX_load_verify_locations(ctx, eccCert, 0) != SSL_SUCCESS) { + if (wolfSSL_CTX_load_verify_locations(ctx, eccCertFile, 0) != SSL_SUCCESS) { wolfSSL_CTX_free(ctx); err_sys("can't load ecc ca file, Please run from wolfSSL home dir"); } #else - load_buffer(ctx, eccCert, WOLFSSL_CA); + load_buffer(ctx, eccCertFile, WOLFSSL_CA); #endif /* !defined(NO_FILESYSTEM) */ #endif /* HAVE_ECC */ #if defined(WOLFSSL_TRUST_PEER_CERT) && !defined(NO_FILESYSTEM) @@ -1266,9 +1332,8 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #ifdef WOLFSSL_ASYNC_CRYPT ret = wolfAsync_DevOpen(&devId); - if (ret != 0) { - wolfSSL_CTX_free(ctx); - err_sys("Async device open failed"); + if (ret < 0) { + printf("Async device open failed\nRunning without async\n"); } wolfSSL_CTX_UseAsync(ctx, devId); #endif /* WOLFSSL_ASYNC_CRYPT */ @@ -1509,38 +1574,38 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (nonBlocking) { wolfSSL_set_using_nonblock(ssl, 1); tcp_set_nonblocking(&sockfd); - NonBlockingSSL_Connect(ssl); + ret = NonBlockingSSL_Connect(ssl); } else { do { -#ifdef WOLFSSL_ASYNC_CRYPT - if (err == WC_PENDING_E) { - ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } - } -#endif - err = 0; /* Reset error */ + err = 0; /* reset error */ ret = wolfSSL_connect(ssl); if (ret != SSL_SUCCESS) { err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif } - } while (ret != SSL_SUCCESS && err == WC_PENDING_E); - - if (ret != SSL_SUCCESS) { - char buffer[WOLFSSL_MAX_ERROR_SZ]; - printf("err = %d, %s\n", err, wolfSSL_ERR_error_string(err, buffer)); - wolfSSL_free(ssl); - wolfSSL_CTX_free(ctx); - err_sys("wolfSSL_connect failed"); - /* see note at top of README */ - /* if you're getting an error here */ - } + } while (err == WC_PENDING_E); } #else timeout.tv_sec = DEFAULT_TIMEOUT_SEC; timeout.tv_usec = 0; - NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ + ret = NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ #endif + if (ret != SSL_SUCCESS) { + printf("wolfSSL_connect error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); + wolfSSL_free(ssl); + wolfSSL_CTX_free(ctx); + err_sys("wolfSSL_connect failed"); + /* see note at top of README */ + /* if you're getting an error here */ + } + showPeer(ssl); #ifdef OPENSSL_EXTRA @@ -1620,7 +1685,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) " nonblocking yet"); } else { if (wolfSSL_Rehandshake(ssl) != SSL_SUCCESS) { - char buffer[WOLFSSL_MAX_ERROR_SZ]; err = wolfSSL_get_error(ssl, 0); printf("err = %d, %s\n", err, wolfSSL_ERR_error_string(err, buffer)); @@ -1645,30 +1709,70 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) /* allow some time for exporting the session */ #ifdef WOLFSSL_SESSION_EXPORT_DEBUG - #ifdef USE_WINDOWS_API - Sleep(500); - #elif defined(WOLFSSL_TIRTOS) - Task_sleep(1); - #else - sleep(1); - #endif +#ifdef USE_WINDOWS_API + Sleep(500); +#elif defined(WOLFSSL_TIRTOS) + Task_sleep(1); +#else + sleep(1); +#endif #endif /* WOLFSSL_SESSION_EXPORT_DEBUG */ - if (wolfSSL_write(ssl, msg, msgSz) != msgSz) { + + do { + err = 0; /* reset error */ + ret = wolfSSL_write(ssl, msg, msgSz); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != msgSz) { + printf("SSL_write msg error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(ssl); wolfSSL_CTX_free(ctx); err_sys("SSL_write failed"); } - input = wolfSSL_read(ssl, reply, sizeof(reply)-1); - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(ssl, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("Server response: %s\n", reply); if (sendGET) { /* get html */ while (1) { - input = wolfSSL_read(ssl, reply, sizeof(reply)-1); - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(ssl, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("%s\n", reply); } else @@ -1676,13 +1780,13 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } } } - else if (input < 0) { - int readErr = wolfSSL_get_error(ssl, 0); - if (readErr != SSL_ERROR_WANT_READ) { - printf("wolfSSL_read error %d!\n", readErr); + if (ret < 0) { + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read reply error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(ssl); wolfSSL_CTX_free(ctx); - err_sys("wolfSSL_read failed"); + err_sys("SSL_read failed"); } } @@ -1793,18 +1897,37 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (nonBlocking) { wolfSSL_set_using_nonblock(sslResume, 1); tcp_set_nonblocking(&sockfd); - NonBlockingSSL_Connect(sslResume); + ret = NonBlockingSSL_Connect(sslResume); } - else if (wolfSSL_connect(sslResume) != SSL_SUCCESS) { - wolfSSL_free(sslResume); - wolfSSL_CTX_free(ctx); - err_sys("SSL resume failed"); + else { + do { + err = 0; /* reset error */ + ret = wolfSSL_connect(sslResume); + if (ret != SSL_SUCCESS) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, + WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); } #else timeout.tv_sec = DEFAULT_TIMEOUT_SEC; timeout.tv_usec = 0; - NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ + ret = NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ #endif + if (ret != SSL_SUCCESS) { + printf("wolfSSL_connect resume error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); + wolfSSL_free(sslResume); + wolfSSL_CTX_free(ctx); + err_sys("wolfSSL_connect resume failed"); + } + showPeer(sslResume); if (wolfSSL_session_reused(sslResume)) @@ -1841,7 +1964,22 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #endif #endif /* WOLFSSL_SESSION_EXPORT_DEBUG */ - if (wolfSSL_write(sslResume, resumeMsg, resumeSz) != resumeSz) { + do { + err = 0; /* reset error */ + ret = wolfSSL_write(sslResume, resumeMsg, resumeSz); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != resumeSz) { + printf("SSL_write resume error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(sslResume); wolfSSL_CTX_free(ctx); err_sys("SSL_write failed"); @@ -1849,26 +1987,50 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (nonBlocking) { /* give server a chance to bounce a message back to client */ - #ifdef USE_WINDOWS_API - Sleep(500); - #elif defined(WOLFSSL_TIRTOS) - Task_sleep(1); - #else - sleep(1); - #endif + #ifdef USE_WINDOWS_API + Sleep(500); + #elif defined(WOLFSSL_TIRTOS) + Task_sleep(1); + #else + sleep(1); + #endif } - input = wolfSSL_read(sslResume, reply, sizeof(reply)-1); - - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(sslResume, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("Server resume response: %s\n", reply); if (sendGET) { /* get html */ while (1) { - input = wolfSSL_read(sslResume, reply, sizeof(reply)-1); - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(sslResume, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, + WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("%s\n", reply); } else @@ -1876,18 +2038,30 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } } } - else if (input < 0) { - int readErr = wolfSSL_get_error(sslResume, 0); - if (readErr != SSL_ERROR_WANT_READ) { - printf("wolfSSL_read error %d!\n", readErr); + if (ret < 0) { + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read resume error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(sslResume); wolfSSL_CTX_free(ctx); - err_sys("wolfSSL_read failed"); + err_sys("SSL_read failed"); } } /* try to send session break */ - wolfSSL_write(sslResume, msg, msgSz); + do { + err = 0; /* reset error */ + ret = wolfSSL_write(sslResume, msg, msgSz); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); ret = wolfSSL_shutdown(sslResume); if (wc_shutdown && ret == SSL_SHUTDOWN_NOT_DONE) @@ -1906,11 +2080,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) wolfAsync_DevClose(&devId); #endif -#if defined(USE_WOLFSSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY) - if (trackMemory) - ShowMemoryTracker(); -#endif /* USE_WOLFSSL_MEMORY */ - /* There are use cases when these assignments are not read. To avoid * potential confusion those warnings have been handled here. */ @@ -1919,7 +2088,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) (void) verifyCert; (void) ourCert; (void) ourKey; - (void) trackMemory; #if !defined(WOLFSSL_TIRTOS) return 0; diff --git a/examples/echoclient/echoclient.c b/examples/echoclient/echoclient.c index 1c06efa83..09f0286ec 100644 --- a/examples/echoclient/echoclient.c +++ b/examples/echoclient/echoclient.c @@ -23,7 +23,7 @@ #ifdef HAVE_CONFIG_H #include #endif - + #include /* let's use cyassl layer AND cyassl openssl layer */ @@ -35,7 +35,7 @@ #include #if !defined(WOLFSSL_MDK_ARM) - #include "cmsis_os.h" + #include "cmsis_os.h" #include "rl_net.h" #else #include "rtl.h" @@ -81,16 +81,17 @@ void echoclient_test(void* args) int argc = 0; char** argv = 0; word16 port = yasslPort; + char buffer[CYASSL_MAX_ERROR_SZ]; ((func_args*)args)->return_code = -1; /* error state */ - + #ifndef WOLFSSL_MDK_SHELL argc = ((func_args*)args)->argc; argv = ((func_args*)args)->argv; #endif if (argc >= 2) { - fin = fopen(argv[1], "r"); + fin = fopen(argv[1], "r"); inCreated = 1; } if (argc >= 3) { @@ -105,7 +106,7 @@ void echoclient_test(void* args) doDTLS = 1; #endif -#ifdef CYASSL_LEANPSK +#ifdef CYASSL_LEANPSK doPSK = 1; #endif @@ -130,16 +131,16 @@ void echoclient_test(void* args) #ifndef NO_FILESYSTEM #ifndef NO_RSA - if (SSL_CTX_load_verify_locations(ctx, caCert, 0) != SSL_SUCCESS) + if (SSL_CTX_load_verify_locations(ctx, caCertFile, 0) != SSL_SUCCESS) err_sys("can't load ca file, Please run from wolfSSL home dir"); #endif #ifdef HAVE_ECC - if (SSL_CTX_load_verify_locations(ctx, eccCert, 0) != SSL_SUCCESS) + if (SSL_CTX_load_verify_locations(ctx, eccCertFile, 0) != SSL_SUCCESS) err_sys("can't load ca file, Please run from wolfSSL home dir"); #endif #elif !defined(NO_CERTS) if (!doPSK) - load_buffer(ctx, caCert, WOLFSSL_CA); + load_buffer(ctx, caCertFile, WOLFSSL_CA); #endif #if defined(CYASSL_SNIFFER) @@ -173,15 +174,15 @@ void echoclient_test(void* args) #ifdef WOLFSSL_ASYNC_CRYPT ret = wolfAsync_DevOpen(&devId); - if (ret != 0) { - err_sys("Async device open failed"); + if (ret < 0) { + printf("Async device open failed\nRunning without async\n"); } wolfSSL_CTX_UseAsync(ctx, devId); #endif /* WOLFSSL_ASYNC_CRYPT */ ssl = SSL_new(ctx); tcp_connect(&sockfd, yasslIP, port, doDTLS, 0, ssl); - + SSL_set_fd(ssl, sockfd); #if defined(USE_WINDOWS_API) && defined(CYASSL_DTLS) && defined(NO_MAIN_DRIVER) /* let echoserver bind first, TODO: add Windows signal like pthreads does */ @@ -189,31 +190,46 @@ void echoclient_test(void* args) #endif do { -#ifdef WOLFSSL_ASYNC_CRYPT - if (err == WC_PENDING_E) { - ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } - } -#endif err = 0; /* Reset error */ ret = SSL_connect(ssl); if (ret != SSL_SUCCESS) { err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif } - } while (ret != SSL_SUCCESS && err == WC_PENDING_E); - + } while (err == WC_PENDING_E); if (ret != SSL_SUCCESS) { - char buffer[CYASSL_MAX_ERROR_SZ]; - printf("err = %d, %s\n", err, ERR_error_string(err, buffer)); + printf("SSL_connect error %d, %s\n", err, + ERR_error_string(err, buffer)); err_sys("SSL_connect failed"); } while (fgets(msg, sizeof(msg), fin) != 0) { - + sendSz = (int)XSTRLEN(msg); - if (SSL_write(ssl, msg, sendSz) != sendSz) + do { + err = 0; /* reset error */ + ret = SSL_write(ssl, msg, sendSz); + if (ret <= 0) { + err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != sendSz) { + printf("SSL_write msg error %d, %s\n", err, + ERR_error_string(err, buffer)); err_sys("SSL_write failed"); + } if (strncmp(msg, "quit", 4) == 0) { fputs("sending server shutdown command: quit!\n", fout); @@ -225,29 +241,39 @@ void echoclient_test(void* args) break; } - #ifndef WOLFSSL_MDK_SHELL - while (sendSz) { - int got; - if ( (got = SSL_read(ssl, reply, sizeof(reply)-1)) > 0) { - reply[got] = 0; - fputs(reply, fout); - fflush(fout) ; - sendSz -= got; - } - else - break; - } - #else + #ifndef WOLFSSL_MDK_SHELL + while (sendSz) + #endif { - int got; - if ( (got = SSL_read(ssl, reply, sizeof(reply)-1)) > 0) { - reply[got] = 0; + do { + err = 0; /* reset error */ + ret = SSL_read(ssl, reply, sizeof(reply)-1); + if (ret <= 0) { + err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; fputs(reply, fout); fflush(fout) ; - sendSz -= got; + sendSz -= ret; + } + else { + printf("SSL_read msg error %d, %s\n", err, + ERR_error_string(err, buffer)); + err_sys("SSL_read failed"); + + #ifndef WOLFSSL_MDK_SHELL + break; + #endif } } - #endif } @@ -255,7 +281,19 @@ void echoclient_test(void* args) strncpy(msg, "break", 6); sendSz = (int)strlen(msg); /* try to tell server done */ - SSL_write(ssl, msg, sendSz); + do { + err = 0; /* reset error */ + ret = SSL_write(ssl, msg, sendSz); + if (ret <= 0) { + err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); #else SSL_shutdown(ssl); #endif @@ -272,7 +310,7 @@ void echoclient_test(void* args) if (outCreated) fclose(fout); CloseSocket(sockfd); - ((func_args*)args)->return_code = 0; + ((func_args*)args)->return_code = 0; } @@ -311,7 +349,7 @@ void echoclient_test(void* args) return args.return_code; } - + #endif /* NO_MAIN_DRIVER */ diff --git a/examples/echoserver/echoserver.c b/examples/echoserver/echoserver.c index 432525806..efbab5276 100644 --- a/examples/echoserver/echoserver.c +++ b/examples/echoserver/echoserver.c @@ -36,8 +36,8 @@ #if !defined(WOLFSSL_MDK_ARM) #include "cmsis_os.h" - #include "rl_fs.h" - #include "rl_net.h" + #include "rl_fs.h" + #include "rl_net.h" #else #include "rtl.h" #include "wolfssl_MDK_ARM.h" @@ -91,6 +91,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) word16 port; int argc = ((func_args*)args)->argc; char** argv = ((func_args*)args)->argv; + char buffer[CYASSL_MAX_ERROR_SZ]; #ifdef ECHO_OUT FILE* fout = stdout; @@ -165,23 +166,23 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) if (doPSK == 0) { #if defined(HAVE_NTRU) && defined(WOLFSSL_STATIC_RSA) /* ntru */ - if (CyaSSL_CTX_use_certificate_file(ctx, ntruCert, SSL_FILETYPE_PEM) + if (CyaSSL_CTX_use_certificate_file(ctx, ntruCertFile, SSL_FILETYPE_PEM) != SSL_SUCCESS) err_sys("can't load ntru cert file, " "Please run from wolfSSL home dir"); - if (CyaSSL_CTX_use_NTRUPrivateKey_file(ctx, ntruKey) + if (CyaSSL_CTX_use_NTRUPrivateKey_file(ctx, ntruKeyFile) != SSL_SUCCESS) err_sys("can't load ntru key file, " "Please run from wolfSSL home dir"); #elif defined(HAVE_ECC) && !defined(CYASSL_SNIFFER) /* ecc */ - if (CyaSSL_CTX_use_certificate_file(ctx, eccCert, SSL_FILETYPE_PEM) + if (CyaSSL_CTX_use_certificate_file(ctx, eccCertFile, SSL_FILETYPE_PEM) != SSL_SUCCESS) err_sys("can't load server cert file, " "Please run from wolfSSL home dir"); - if (CyaSSL_CTX_use_PrivateKey_file(ctx, eccKey, SSL_FILETYPE_PEM) + if (CyaSSL_CTX_use_PrivateKey_file(ctx, eccKeyFile, SSL_FILETYPE_PEM) != SSL_SUCCESS) err_sys("can't load server key file, " "Please run from wolfSSL home dir"); @@ -189,12 +190,12 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) /* do nothing, just don't load cert files */ #else /* normal */ - if (CyaSSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM) + if (CyaSSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM) != SSL_SUCCESS) err_sys("can't load server cert file, " "Please run from wolfSSL home dir"); - if (CyaSSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM) + if (CyaSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM) != SSL_SUCCESS) err_sys("can't load server key file, " "Please run from wolfSSL home dir"); @@ -202,8 +203,8 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) } /* doPSK */ #elif !defined(NO_CERTS) if (!doPSK) { - load_buffer(ctx, svrCert, WOLFSSL_CERT); - load_buffer(ctx, svrKey, WOLFSSL_KEY); + load_buffer(ctx, svrCertFile, WOLFSSL_CERT); + load_buffer(ctx, svrKeyFile, WOLFSSL_KEY); } #endif @@ -232,8 +233,8 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) #ifdef WOLFSSL_ASYNC_CRYPT ret = wolfAsync_DevOpen(&devId); - if (ret != 0) { - err_sys("Async device open failed"); + if (ret < 0) { + printf("Async device open failed\nRunning without async\n"); } wolfSSL_CTX_UseAsync(ctx, devId); #endif /* WOLFSSL_ASYNC_CRYPT */ @@ -241,7 +242,8 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) SignalReady(args, port); while (!shutDown) { - CYASSL* ssl = 0; + CYASSL* ssl = NULL; + CYASSL* write_ssl = NULL; /* may have separate w/ HAVE_WRITE_DUP */ char command[SVR_COMMAND_SIZE+1]; int echoSz = 0; int clientfd; @@ -276,29 +278,27 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) wolfSSL_dtls_set_peer(ssl, &client, client_len); #endif #if !defined(NO_FILESYSTEM) && !defined(NO_DH) && !defined(NO_ASN) - CyaSSL_SetTmpDH_file(ssl, dhParam, SSL_FILETYPE_PEM); + CyaSSL_SetTmpDH_file(ssl, dhParamFile, SSL_FILETYPE_PEM); #elif !defined(NO_DH) SetDH(ssl); /* will repick suites with DHE, higher than PSK */ #endif do { -#ifdef WOLFSSL_ASYNC_CRYPT - if (err == WC_PENDING_E) { - ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } - } -#endif err = 0; /* Reset error */ ret = CyaSSL_accept(ssl); if (ret != SSL_SUCCESS) { err = CyaSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif } - } while (ret != SSL_SUCCESS && err == WC_PENDING_E); - + } while (err == WC_PENDING_E); if (ret != SSL_SUCCESS) { - char buffer[CYASSL_MAX_ERROR_SZ]; - err = CyaSSL_get_error(ssl, 0); - printf("error = %d, %s\n", err, CyaSSL_ERR_error_string(err, buffer)); + printf("SSL_accept error = %d, %s\n", err, + CyaSSL_ERR_error_string(err, buffer)); printf("SSL_accept failed\n"); CyaSSL_free(ssl); CloseSocket(clientfd); @@ -308,7 +308,41 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) showPeer(ssl); #endif - while ( (echoSz = CyaSSL_read(ssl, command, sizeof(command)-1)) > 0) { +#ifdef HAVE_WRITE_DUP + write_ssl = wolfSSL_write_dup(ssl); + if (write_ssl == NULL) { + printf("wolfSSL_write_dup failed\n"); + CyaSSL_free(ssl); + CloseSocket(clientfd); + continue; + } +#else + write_ssl = ssl; +#endif + + while (1) { + do { + err = 0; /* reset error */ + ret = CyaSSL_read(ssl, command, sizeof(command)-1); + if (ret <= 0) { + err = CyaSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret <= 0) { + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read echo error %d, %s!\n", err, + CyaSSL_ERR_error_string(err, buffer)); + } + break; + } + + echoSz = ret; if (firstRead == 1) { firstRead = 0; /* browser may send 1 byte 'G' to start */ @@ -321,7 +355,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) strncpy(command, "GET", 4); /* fall through to normal GET */ } - + if ( strncmp(command, "quit", 4) == 0) { printf("client sent quit command: shutting down!\n"); shutDown = 1; @@ -343,7 +377,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) char header[] = "\n
\n";
                 char body[]   = "greetings from wolfSSL\n";
                 char footer[] = "\r\n\r\n";
-            
+
                 strncpy(command, type, sizeof(type));
                 echoSz = sizeof(type) - 1;
 
@@ -354,21 +388,57 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args)
                 strncpy(&command[echoSz], footer, sizeof(footer));
                 echoSz += (int)sizeof(footer);
 
-                if (CyaSSL_write(ssl, command, echoSz) != echoSz)
-                    err_sys("SSL_write failed");
+                do {
+                    err = 0; /* reset error */
+                    ret = CyaSSL_write(write_ssl, command, echoSz);
+                    if (ret <= 0) {
+                        err = CyaSSL_get_error(ssl, 0);
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (err == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPoll(write_ssl, WOLF_POLL_FLAG_CHECK_HW);
+                            if (ret < 0) break;
+                        }
+                    #endif
+                    }
+                } while (err == WC_PENDING_E);
+                if (ret != echoSz) {
+                    printf("SSL_write get error = %d, %s\n", err,
+                        CyaSSL_ERR_error_string(err, buffer));
+                    err_sys("SSL_write get failed");
+                }
                 break;
             }
             command[echoSz] = 0;
 
-            #ifdef ECHO_OUT
-                fputs(command, fout);
-            #endif
+        #ifdef ECHO_OUT
+            fputs(command, fout);
+        #endif
 
-            if (CyaSSL_write(ssl, command, echoSz) != echoSz)
-                err_sys("SSL_write failed");
+            do {
+                err = 0; /* reset error */
+                ret = CyaSSL_write(write_ssl, command, echoSz);
+                if (ret <= 0) {
+                    err = CyaSSL_get_error(write_ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(write_ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+
+            if (ret != echoSz) {
+                printf("SSL_write echo error = %d, %s\n", err,
+                        CyaSSL_ERR_error_string(err, buffer));
+                err_sys("SSL_write echo failed");
+            }
         }
 #ifndef CYASSL_DTLS
         CyaSSL_shutdown(ssl);
+#endif
+#ifdef HAVE_WRITE_DUP
+        CyaSSL_free(write_ssl);
 #endif
         CyaSSL_free(ssl);
         CloseSocket(clientfd);
@@ -445,7 +515,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args)
         return args.return_code;
     }
 
-        
+
 #endif /* NO_MAIN_DRIVER */
 
 
diff --git a/examples/server/server.c b/examples/server/server.c
index 0769207df..38dcd7136 100644
--- a/examples/server/server.c
+++ b/examples/server/server.c
@@ -30,11 +30,6 @@
     #include    /* ecc_fp_free */
 #endif
 
-#if !defined(WOLFSSL_TRACK_MEMORY) && !defined(NO_MAIN_DRIVER)
-    /* in case memory tracker wants stats */
-    #define WOLFSSL_TRACK_MEMORY
-#endif
-
 #if defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET)
         #include 
         #include 
@@ -97,25 +92,35 @@ static int NonBlockingSSL_Accept(SSL* ssl)
 #endif
     int error = SSL_get_error(ssl, 0);
     SOCKET_T sockfd = (SOCKET_T)CyaSSL_get_fd(ssl);
-    int select_ret;
+    int select_ret = 0;
 
     while (ret != SSL_SUCCESS && (error == SSL_ERROR_WANT_READ ||
-                                  error == SSL_ERROR_WANT_WRITE)) {
+                                  error == SSL_ERROR_WANT_WRITE ||
+                                  error == WC_PENDING_E)) {
         int currTimeout = 1;
 
         if (error == SSL_ERROR_WANT_READ) {
             /* printf("... server would read block\n"); */
-        } else {
+        }
+        else if (error == SSL_ERROR_WANT_WRITE) {
             /* printf("... server would write block\n"); */
         }
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        else if (error == WC_PENDING_E) {
+            ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+            if (ret < 0) break;
+        }
+    #endif
 
-#ifdef CYASSL_DTLS
-        currTimeout = CyaSSL_dtls_get_current_timeout(ssl);
-#endif
-        select_ret = tcp_select(sockfd, currTimeout);
+        if (error != WC_PENDING_E) {
+        #ifdef CYASSL_DTLS
+            currTimeout = CyaSSL_dtls_get_current_timeout(ssl);
+        #endif
+            select_ret = tcp_select(sockfd, currTimeout);
+        }
 
         if ((select_ret == TEST_RECV_READY) ||
-                                        (select_ret == TEST_ERROR_READY)) {
+            (select_ret == TEST_ERROR_READY) || error == WC_PENDING_E) {
             #ifndef CYASSL_CALLBACKS
                 ret = SSL_accept(ssl);
             #else
@@ -127,12 +132,12 @@ static int NonBlockingSSL_Accept(SSL* ssl)
         else if (select_ret == TEST_TIMEOUT && !CyaSSL_dtls(ssl)) {
             error = SSL_ERROR_WANT_READ;
         }
-#ifdef CYASSL_DTLS
+    #ifdef CYASSL_DTLS
         else if (select_ret == TEST_TIMEOUT && CyaSSL_dtls(ssl) &&
                                             CyaSSL_dtls_got_timeout(ssl) >= 0) {
             error = SSL_ERROR_WANT_READ;
         }
-#endif
+    #endif
         else {
             error = SSL_FATAL_ERROR;
         }
@@ -144,60 +149,92 @@ static int NonBlockingSSL_Accept(SSL* ssl)
 /* Echo number of bytes specified by -e arg */
 int ServerEchoData(SSL* ssl, int clientfd, int echoData, int throughput)
 {
-    int ret = 0;
-    char* buffer = (char*)malloc(TEST_BUFFER_SIZE);
-    if(buffer) {
-        double start = 0, rx_time = 0, tx_time = 0;
-        int xfer_bytes = 0;
-        while((echoData && throughput == 0) || (!echoData && xfer_bytes < throughput)) {
-            int select_ret = tcp_select(clientfd, 1); /* Timeout=1 second */
-            if (select_ret == TEST_RECV_READY) {
-                int len = min(TEST_BUFFER_SIZE, throughput - xfer_bytes);
-                int rx_pos = 0;
-                if(throughput) {
-                    start = current_time(1);
-                }
-                while(rx_pos < len) {
-                    ret = SSL_read(ssl, &buffer[rx_pos], len - rx_pos);
-                    if (ret <= 0) {
-                        int readErr = SSL_get_error(ssl, 0);
-                        if (readErr != SSL_ERROR_WANT_READ) {
-                            printf("SSL_read error %d!\n", readErr);
-                            err_sys("SSL_read failed");
-                        }
-                    }
-                    else {
-                        rx_pos += ret;
-                    }
-                }
-                if(throughput) {
-                    rx_time += current_time(0) - start;
-                    start = current_time(1);
-                }
-                if (SSL_write(ssl, buffer, len) != len) {
-                    err_sys("SSL_write failed");
-                }
-                if(throughput) {
-                    tx_time += current_time(0) - start;
-                }
+    int ret = 0, err;
+    double start = 0, rx_time = 0, tx_time = 0;
+    int xfer_bytes = 0, select_ret, len, rx_pos;
+    char* buffer;
 
-                xfer_bytes += len;
+    buffer = (char*)malloc(TEST_BUFFER_SIZE);
+    if (!buffer) {
+        err_sys("Server buffer malloc failed");
+    }
+
+    while ((echoData && throughput == 0) ||
+          (!echoData && xfer_bytes < throughput))
+    {
+        select_ret = tcp_select(clientfd, 1); /* Timeout=1 second */
+        if (select_ret == TEST_RECV_READY) {
+
+            len = min(TEST_BUFFER_SIZE, throughput - xfer_bytes);
+            rx_pos = 0;
+
+            if (throughput) {
+                start = current_time(1);
             }
-        }
-        free(buffer);
 
-        if(throughput) {
-            printf("wolfSSL Server Benchmark %d bytes\n"
-                "\tRX      %8.3f ms (%8.3f MBps)\n"
-                "\tTX      %8.3f ms (%8.3f MBps)\n",
-                throughput,
-                tx_time * 1000, throughput / tx_time / 1024 / 1024,
-                rx_time * 1000, throughput / rx_time / 1024 / 1024
-            );
+            /* Read data */
+            while (rx_pos < len) {
+                ret = SSL_read(ssl, &buffer[rx_pos], len - rx_pos);
+                if (ret < 0) {
+                    err = SSL_get_error(ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                    else
+                #endif
+                    if (err != SSL_ERROR_WANT_READ) {
+                        printf("SSL_read echo error %d\n", err);
+                        err_sys("SSL_read failed");
+                    }
+                }
+                else {
+                    rx_pos += ret;
+                }
+            }
+            if (throughput) {
+                rx_time += current_time(0) - start;
+                start = current_time(1);
+            }
+
+            /* Write data */
+            do {
+                err = 0; /* reset error */
+                ret = SSL_write(ssl, buffer, len);
+                if (ret <= 0) {
+                    err = SSL_get_error(ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+            if (ret != len) {
+                printf("SSL_write echo error %d\n", err);
+                err_sys("SSL_write failed");
+            }
+
+            if (throughput) {
+                tx_time += current_time(0) - start;
+            }
+
+            xfer_bytes += len;
         }
     }
-    else {
-        err_sys("Server buffer malloc failed");
+
+    free(buffer);
+
+    if (throughput) {
+        printf("wolfSSL Server Benchmark %d bytes\n"
+            "\tRX      %8.3f ms (%8.3f MBps)\n"
+            "\tTX      %8.3f ms (%8.3f MBps)\n",
+            throughput,
+            tx_time * 1000, throughput / tx_time / 1024 / 1024,
+            rx_time * 1000, throughput / rx_time / 1024 / 1024
+        );
     }
 
     return EXIT_SUCCESS;
@@ -213,12 +250,12 @@ static void Usage(void)
     printf("-v     SSL version [0-3], SSLv3(0) - TLS1.2(3)), default %d\n",
                                  SERVER_DEFAULT_VERSION);
     printf("-l     Cipher suite list (: delimited)\n");
-    printf("-c    Certificate file,           default %s\n", svrCert);
-    printf("-k    Key file,                   default %s\n", svrKey);
-    printf("-A    Certificate Authority file, default %s\n", cliCert);
+    printf("-c    Certificate file,           default %s\n", svrCertFile);
+    printf("-k    Key file,                   default %s\n", svrKeyFile);
+    printf("-A    Certificate Authority file, default %s\n", cliCertFile);
     printf("-R    Create Ready file for external monitor default none\n");
 #ifndef NO_DH
-    printf("-D    Diffie-Hellman Params file, default %s\n", dhParam);
+    printf("-D    Diffie-Hellman Params file, default %s\n", dhParamFile);
     printf("-Z     Minimum DH key bits,        default %d\n",
                                  DEFAULT_MIN_DHKEY_BITS);
 #endif
@@ -267,6 +304,7 @@ static void Usage(void)
 #endif
     printf("-g          Return basic HTML web page\n");
     printf("-C     The number of connections to accept, default: 1\n");
+    printf("-U          Force use of the default cipher suite list\n");
 }
 
 THREAD_RETURN CYASSL_THREAD server_test(void* args)
@@ -299,7 +337,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     int    needDH = 0;
     int    useNtruKey   = 0;
     int    nonBlocking  = 0;
-    int    trackMemory  = 0;
     int    fewerPackets = 0;
     int    pkCallbacks  = 0;
     int    wc_shutdown     = 0;
@@ -319,10 +356,11 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     char*  alpnList = NULL;
     unsigned char alpn_opt = 0;
     char*  cipherList = NULL;
-    const char* verifyCert = cliCert;
-    const char* ourCert    = svrCert;
-    const char* ourKey     = svrKey;
-    const char* ourDhParam = dhParam;
+    int    useDefCipherList = 0;
+    const char* verifyCert = cliCertFile;
+    const char* ourCert    = svrCertFile;
+    const char* ourKey     = svrKeyFile;
+    const char* ourDhParam = dhParamFile;
     tcp_ready*  readySignal = NULL;
     int    argc = ((func_args*)args)->argc;
     char** argv = ((func_args*)args)->argv;
@@ -347,6 +385,7 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
 #ifdef HAVE_WNR
     const char* wnrConfigFile = wnrConfig;
 #endif
+    char buffer[CYASSL_MAX_ERROR_SZ];
 
 #ifdef WOLFSSL_STATIC_MEMORY
     #if (defined(HAVE_ECC) && !defined(ALT_ECC_SIZE)) \
@@ -363,9 +402,9 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     ((func_args*)args)->return_code = -1; /* error state */
 
 #ifdef NO_RSA
-    verifyCert = (char*)cliEccCert;
-    ourCert    = (char*)eccCert;
-    ourKey     = (char*)eccKey;
+    verifyCert = (char*)cliEccCertFile;
+    ourCert    = (char*)eccCertFile;
+    ourKey     = (char*)eccKeyFile;
 #endif
     (void)pkCallbacks;
     (void)needDH;
@@ -390,8 +429,10 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
 #ifdef WOLFSSL_VXWORKS
     useAnyAddr = 1;
 #else
-    while ((ch = mygetopt(argc, argv,
-               "?jdbstnNuGfrawPIR:p:v:l:A:c:k:Z:S:oO:D:L:ieB:E:q:gC:")) != -1) {
+    /* Not Used: h, m, t, x, y, z, F, J, K, M, Q, T, U, V, W, X, Y */
+    while ((ch = mygetopt(argc, argv, "?"
+                "abc:defgijk:l:nop:q:rsuv:w"
+                "A:B:C:D:E:GHIL:NO:PR:S:YZ:")) != -1) {
         switch (ch) {
             case '?' :
                 Usage();
@@ -413,12 +454,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
                 usePskPlus = 1;
                 break;
 
-            case 't' :
-            #ifdef USE_WOLFSSL_MEMORY
-                trackMemory = 1;
-            #endif
-                break;
-
             case 'n' :
                 useNtruKey = 1;
                 break;
@@ -475,6 +510,10 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
                 cipherList = myoptarg;
                 break;
 
+            case 'H' :
+                useDefCipherList = 1;
+                break;
+
             case 'A' :
                 verifyCert = myoptarg;
                 break;
@@ -625,11 +664,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
         }
     }
 
-#if defined(USE_CYASSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
-    if (trackMemory)
-        InitMemoryTracker();
-#endif
-
 #ifdef HAVE_WNR
     if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0)
         err_sys("can't load whitewood net random config file");
@@ -716,9 +750,10 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     wolfSSL_CTX_set_TicketEncCb(ctx, myTicketEncCb);
 #endif
 
-    if (cipherList)
+    if (cipherList && !useDefCipherList) {
         if (SSL_CTX_set_cipher_list(ctx, cipherList) != SSL_SUCCESS)
             err_sys("server can't set cipher list 1");
+    }
 
 #ifdef CYASSL_LEANPSK
     if (!usePsk) {
@@ -822,7 +857,7 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     if (useAnon) {
 #ifdef HAVE_ANON
         CyaSSL_CTX_allow_anon_cipher(ctx);
-        if (cipherList == NULL) {
+        if (cipherList == NULL || (cipherList && useDefCipherList)) {
             if (SSL_CTX_set_cipher_list(ctx, "ADH-AES128-SHA") != SSL_SUCCESS)
                 err_sys("server can't set cipher list 4");
         }
@@ -873,25 +908,26 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevOpen(&devId);
-    if (ret != 0) {
-        err_sys("Async device open failed");
+    if (ret < 0) {
+        printf("Async device open failed\nRunning without async\n");
     }
     wolfSSL_CTX_UseAsync(ctx, devId);
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
     while (1) {
         /* allow resume option */
-        if(resumeCount > 1) {
+        if (resumeCount > 1) {
             if (dtlsUDP == 0) {
                 SOCKADDR_IN_T client;
                 socklen_t client_len = sizeof(client);
                 clientfd = accept(sockfd, (struct sockaddr*)&client,
                                  (ACCEPT_THIRD_T)&client_len);
-            } else {
+            }
+            else {
                 tcp_listen(&sockfd, &port, useAnyAddr, dtlsUDP, dtlsSCTP);
                 clientfd = sockfd;
             }
-            if(WOLFSSL_SOCKET_IS_INVALID(clientfd)) {
+            if (WOLFSSL_SOCKET_IS_INVALID(clientfd)) {
                 err_sys("tcp accept failed");
             }
         }
@@ -1020,34 +1056,32 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
         }
 #endif
 
-        do {
-#ifdef WOLFSSL_ASYNC_CRYPT
-            if (err == WC_PENDING_E) {
-                ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
-                if (ret < 0) { break; } else if (ret == 0) { continue; }
-            }
-#endif
-
-            err = 0; /* Reset error */
 #ifndef CYASSL_CALLBACKS
-            if (nonBlocking) {
-                ret = NonBlockingSSL_Accept(ssl);
-            }
-            else {
-                ret = SSL_accept(ssl);
-            }
-#else
+        if (nonBlocking) {
             ret = NonBlockingSSL_Accept(ssl);
+        }
+        else {
+            do {
+                err = 0; /* reset error */
+                ret = SSL_accept(ssl);
+                if (ret != SSL_SUCCESS) {
+                    err = SSL_get_error(ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+        }
+#else
+        ret = NonBlockingSSL_Accept(ssl);
 #endif
-            if (ret != SSL_SUCCESS) {
-                err = SSL_get_error(ssl, 0);
-            }
-        } while (ret != SSL_SUCCESS && err == WC_PENDING_E);
-
         if (ret != SSL_SUCCESS) {
-            char buffer[CYASSL_MAX_ERROR_SZ];
             err = SSL_get_error(ssl, 0);
-            printf("error = %d, %s\n", err, ERR_error_string(err, buffer));
+            printf("SSL_accept error %d, %s\n", err,
+                                                ERR_error_string(err, buffer));
             err_sys("SSL_accept failed");
         }
 
@@ -1110,27 +1144,63 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
             free(list);
         }
 #endif
-        if(echoData == 0 && throughput == 0) {
-            ret = SSL_read(ssl, input, sizeof(input)-1);
+        if (echoData == 0 && throughput == 0) {
+            const char* write_msg;
+            int write_msg_sz;
+
+            /* Read data */
+            do {
+                err = 0; /* reset error */
+                ret = SSL_read(ssl, input, sizeof(input)-1);
+                if (ret < 0) {
+                    err = SSL_get_error(ssl, 0);
+
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                    else
+                #endif
+                    if (err != SSL_ERROR_WANT_READ) {
+                        printf("SSL_read input error %d, %s\n", err,
+                                                ERR_error_string(err, buffer));
+                        err_sys("SSL_read failed");
+                    }
+                }
+            } while (err == WC_PENDING_E);
             if (ret > 0) {
-                input[ret] = 0;
+                input[ret] = 0; /* null terminate message */
                 printf("Client message: %s\n", input);
-
-            }
-            else if (ret < 0) {
-                int readErr = SSL_get_error(ssl, 0);
-                if (readErr != SSL_ERROR_WANT_READ)
-                    err_sys("SSL_read failed");
             }
 
+            /* Write data */
             if (!useWebServerMsg) {
-                if (SSL_write(ssl, msg, sizeof(msg)) != sizeof(msg))
-                    err_sys("SSL_write failed");
+                write_msg = msg;
+                write_msg_sz = sizeof(msg);
             }
             else {
-                if (SSL_write(ssl, webServerMsg, sizeof(webServerMsg))
-                                                        != sizeof(webServerMsg))
-                    err_sys("SSL_write failed");
+                write_msg = webServerMsg;
+                write_msg_sz = sizeof(webServerMsg);
+            }
+            do {
+                err = 0; /* reset error */
+                ret = SSL_write(ssl, write_msg, write_msg_sz);
+                if (ret <= 0) {
+                    err = SSL_get_error(ssl, 0);
+
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+            if (ret != write_msg_sz) {
+                printf("SSL_write msg error %d, %s\n", err,
+                                                ERR_error_string(err, buffer));
+                err_sys("SSL_write failed");
             }
         }
         else {
@@ -1190,11 +1260,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     ecc_fp_free();  /* free per thread cache */
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
-    if (trackMemory)
-        ShowMemoryTracker();
-#endif
-
 #ifdef CYASSL_TIRTOS
     fdCloseSession(Task_self());
 #endif
@@ -1217,7 +1282,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     (void) useNtruKey;
     (void) ourDhParam;
     (void) ourCert;
-    (void) trackMemory;
 #ifndef CYASSL_TIRTOS
     return 0;
 #endif
diff --git a/mcapi/crypto.h b/mcapi/crypto.h
index 6db1dd211..4d71a3f26 100644
--- a/mcapi/crypto.h
+++ b/mcapi/crypto.h
@@ -34,7 +34,7 @@
 
 /* MD5 */
 typedef struct CRYPT_MD5_CTX {
-    int holder[24];   /* big enough to hold internal, but check on init */
+    int holder[28];   /* big enough to hold internal, but check on init */
 } CRYPT_MD5_CTX;
 
 int CRYPT_MD5_Initialize(CRYPT_MD5_CTX*);
@@ -42,13 +42,13 @@ int CRYPT_MD5_DataAdd(CRYPT_MD5_CTX*, const unsigned char*, unsigned int);
 int CRYPT_MD5_Finalize(CRYPT_MD5_CTX*, unsigned char*);
 
 enum {
-    CRYPT_MD5_DIGEST_SIZE = 16 
+    CRYPT_MD5_DIGEST_SIZE = 16
 };
 
 
 /* SHA */
 typedef struct CRYPT_SHA_CTX {
-    int holder[24];   /* big enough to hold internal, but check on init */
+    int holder[28];   /* big enough to hold internal, but check on init */
 } CRYPT_SHA_CTX;
 
 int CRYPT_SHA_Initialize(CRYPT_SHA_CTX*);
@@ -62,7 +62,7 @@ enum {
 
 /* SHA-256 */
 typedef struct CRYPT_SHA256_CTX {
-    int holder[28];   /* big enough to hold internal, but check on init */
+    int holder[32];   /* big enough to hold internal, but check on init */
 } CRYPT_SHA256_CTX;
 
 int CRYPT_SHA256_Initialize(CRYPT_SHA256_CTX*);
@@ -70,13 +70,13 @@ int CRYPT_SHA256_DataAdd(CRYPT_SHA256_CTX*, const unsigned char*, unsigned int);
 int CRYPT_SHA256_Finalize(CRYPT_SHA256_CTX*, unsigned char*);
 
 enum {
-    CRYPT_SHA256_DIGEST_SIZE = 32 
+    CRYPT_SHA256_DIGEST_SIZE = 32
 };
 
 
 /* SHA-384 */
 typedef struct CRYPT_SHA384_CTX {
-    long long holder[32];   /* big enough to hold internal, but check on init */
+    long long holder[36];   /* big enough to hold internal, but check on init */
 } CRYPT_SHA384_CTX;
 
 int CRYPT_SHA384_Initialize(CRYPT_SHA384_CTX*);
@@ -98,13 +98,13 @@ int CRYPT_SHA512_DataAdd(CRYPT_SHA512_CTX*, const unsigned char*, unsigned int);
 int CRYPT_SHA512_Finalize(CRYPT_SHA512_CTX*, unsigned char*);
 
 enum {
-    CRYPT_SHA512_DIGEST_SIZE = 64 
+    CRYPT_SHA512_DIGEST_SIZE = 64
 };
 
 
 /* HMAC */
 typedef struct CRYPT_HMAC_CTX {
-    long long holder[69];   /* big enough to hold internal, but check on init */
+    long long holder[72];   /* big enough to hold internal, but check on init */
 } CRYPT_HMAC_CTX;
 
 int CRYPT_HMAC_SetKey(CRYPT_HMAC_CTX*, int, const unsigned char*, unsigned int);
@@ -113,10 +113,10 @@ int CRYPT_HMAC_Finalize(CRYPT_HMAC_CTX*, unsigned char*);
 
 /* HMAC types */
 enum {
-    CRYPT_HMAC_SHA    = 1, 
-    CRYPT_HMAC_SHA256 = 2, 
-    CRYPT_HMAC_SHA384 = 5, 
-    CRYPT_HMAC_SHA512 = 4 
+    CRYPT_HMAC_SHA    = 1,
+    CRYPT_HMAC_SHA256 = 2,
+    CRYPT_HMAC_SHA384 = 5,
+    CRYPT_HMAC_SHA512 = 4
 };
 
 
@@ -128,7 +128,7 @@ int CRYPT_HUFFMAN_DeCompress(unsigned char*, unsigned int, const unsigned char*,
 
 /* flag to use static huffman */
 enum {
-    CRYPT_HUFFMAN_COMPRESS_STATIC = 1 
+    CRYPT_HUFFMAN_COMPRESS_STATIC = 1
 };
 
 
@@ -144,7 +144,7 @@ int CRYPT_RNG_BlockGenerate(CRYPT_RNG_CTX*, unsigned char*, unsigned int);
 
 /* TDES */
 typedef struct CRYPT_TDES_CTX {
-    int holder[100];   /* big enough to hold internal, but check on init */
+    int holder[104];   /* big enough to hold internal, but check on init */
 } CRYPT_TDES_CTX;
 
 int CRYPT_TDES_KeySet(CRYPT_TDES_CTX*, const unsigned char*,
@@ -158,13 +158,13 @@ int CRYPT_TDES_CBC_Decrypt(CRYPT_TDES_CTX*, unsigned char*,
 /* key direction flags for setup */
 enum {
     CRYPT_TDES_ENCRYPTION = 0,
-    CRYPT_TDES_DECRYPTION = 1 
+    CRYPT_TDES_DECRYPTION = 1
 };
 
 
 /* AES */
 typedef struct CRYPT_AES_CTX {
-    int holder[76];   /* big enough to hold internal, but check on init */
+    int holder[78];   /* big enough to hold internal, but check on init */
 } CRYPT_AES_CTX;
 
 /* key */
@@ -262,7 +262,7 @@ int CRYPT_ERROR_StringGet(int, char*);
 
 
 #ifdef __cplusplus
-    }  /* extern "C" */ 
+    }  /* extern "C" */
 #endif
 
 
diff --git a/rpm/spec.in b/rpm/spec.in
index 9210c9778..26d57138b 100644
--- a/rpm/spec.in
+++ b/rpm/spec.in
@@ -72,8 +72,8 @@ mkdir -p $RPM_BUILD_ROOT/
 %{_docdir}/wolfssl/README.txt
 %{_libdir}/libwolfssl.la
 %{_libdir}/libwolfssl.so
-%{_libdir}/libwolfssl.so.10
-%{_libdir}/libwolfssl.so.10.0.1
+%{_libdir}/libwolfssl.so.11
+%{_libdir}/libwolfssl.so.11.0.0
 
 %files devel
 %defattr(-,root,root,-)
diff --git a/src/internal.c b/src/internal.c
old mode 100644
new mode 100755
index 2e395c39f..6d333e78b
--- a/src/internal.c
+++ b/src/internal.c
@@ -120,15 +120,44 @@ WOLFSSL_CALLBACKS needs LARGE_STATIC_BUFFERS, please add LARGE_STATIC_BUFFERS
 #endif
 
 
-typedef enum {
+enum processReply {
     doProcessInit = 0,
 #ifndef NO_WOLFSSL_SERVER
     runProcessOldClientHello,
 #endif
     getRecordLayerHeader,
     getData,
+    decryptMessage,
+    verifyMessage,
     runProcessingOneMessage
-} processReply;
+};
+
+/* sub-states for build message */
+enum buildMsgState {
+    BUILD_MSG_BEGIN = 0,
+    BUILD_MSG_SIZE,
+    BUILD_MSG_HASH,
+    BUILD_MSG_VERIFY_MAC,
+    BUILD_MSG_ENCRYPT,
+};
+
+/* sub-states for cipher operations */
+enum cipherState {
+    CIPHER_STATE_BEGIN = 0,
+    CIPHER_STATE_DO,
+    CIPHER_STATE_END,
+};
+
+/* sub-states for send/do key share (key exchange) */
+enum asyncState {
+    TLS_ASYNC_BEGIN = 0,
+    TLS_ASYNC_BUILD,
+    TLS_ASYNC_DO,
+    TLS_ASYNC_VERIFY,
+    TLS_ASYNC_FINALIZE,
+    TLS_ASYNC_END
+};
+
 
 #ifndef NO_OLD_TLS
 static int SSL_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
@@ -330,7 +359,7 @@ static INLINE void c16toa(word16 u16, byte* c)
 
 #if !defined(NO_OLD_TLS) || defined(HAVE_CHACHA) || defined(HAVE_AESCCM) \
     || defined(HAVE_AESGCM) || defined(WOLFSSL_SESSION_EXPORT) \
-    || defined(WOLFSSL_DTLS)
+    || defined(WOLFSSL_DTLS) || defined(HAVE_SESSION_TICKET)
 /* convert 32 bit integer to opaque */
 static INLINE void c32toa(word32 u32, byte* c)
 {
@@ -868,7 +897,7 @@ static int dtls_export_new(WOLFSSL* ssl, byte* exp, word32 len, byte ver)
     exp[idx++] = options->minDowngrade;
     exp[idx++] = options->connectState;
     exp[idx++] = options->acceptState;
-    exp[idx++] = options->keyShareState;
+    exp[idx++] = options->asyncState;
 
     /* version of connection */
     exp[idx++] = ssl->version.major;
@@ -989,7 +1018,7 @@ static int dtls_export_load(WOLFSSL* ssl, byte* exp, word32 len, byte ver)
     options->minDowngrade   = exp[idx++];
     options->connectState   = exp[idx++];
     options->acceptState    = exp[idx++];
-    options->keyShareState  = exp[idx++];
+    options->asyncState     = exp[idx++];
 
     /* version of connection */
     if (ssl->version.major != exp[idx++] || ssl->version.minor != exp[idx++]) {
@@ -1437,8 +1466,8 @@ void SSL_CtxResourceFree(WOLFSSL_CTX* ctx)
         XFREE(ctx->suites, ctx->heap, DYNAMIC_TYPE_SUITES);
 
 #ifndef NO_DH
-    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH);
-    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH);
+    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
 #endif /* !NO_DH */
 
 #ifdef SINGLE_THREADED
@@ -1601,31 +1630,25 @@ void FreeCiphers(WOLFSSL* ssl)
 {
     (void)ssl;
 #ifdef BUILD_ARC4
-    #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ssl->devId != INVALID_DEVID) {
-        wc_Arc4AsyncFree(ssl->encrypt.arc4);
-        wc_Arc4AsyncFree(ssl->decrypt.arc4);
-    }
-    #endif
+    wc_Arc4Free(ssl->encrypt.arc4);
+    wc_Arc4Free(ssl->decrypt.arc4);
     XFREE(ssl->encrypt.arc4, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.arc4, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
 #ifdef BUILD_DES3
-    #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ssl->devId != INVALID_DEVID) {
-        wc_Des3AsyncFree(ssl->encrypt.des3);
-        wc_Des3AsyncFree(ssl->decrypt.des3);
-    }
-    #endif
+    wc_Des3Free(ssl->encrypt.des3);
+    wc_Des3Free(ssl->decrypt.des3);
     XFREE(ssl->encrypt.des3, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.des3, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
 #ifdef BUILD_AES
-    #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ssl->devId != INVALID_DEVID) {
-        wc_AesAsyncFree(ssl->encrypt.aes);
-        wc_AesAsyncFree(ssl->decrypt.aes);
-    }
+    wc_AesFree(ssl->encrypt.aes);
+    wc_AesFree(ssl->decrypt.aes);
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        XFREE(ssl->decrypt.additional, ssl->heap, DYNAMIC_TYPE_AES);
+        XFREE(ssl->decrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES);
+        XFREE(ssl->encrypt.additional, ssl->heap, DYNAMIC_TYPE_AES);
+        XFREE(ssl->encrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES);
     #endif
     XFREE(ssl->encrypt.aes, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.aes, ssl->heap, DYNAMIC_TYPE_CIPHER);
@@ -2164,14 +2187,14 @@ void InitSuites(Suites* suites, ProtocolVersion pv, word16 haveRSA,
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256
-    if (tls1_2 && haveDH && haveRSA) {
+    if (tls && haveDH && haveRSA) {
         suites->suites[idx++] = 0;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_AES_256_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256
-    if (tls1_2 && haveDH && haveRSA) {
+    if (tls && haveDH && haveRSA) {
         suites->suites[idx++] = 0;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_AES_128_CBC_SHA256;
     }
@@ -2202,14 +2225,14 @@ void InitSuites(Suites* suites, ProtocolVersion pv, word16 haveRSA,
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_256_CBC_SHA256
-    if (tls1_2 && haveRSA) {
+    if (tls && haveRSA) {
         suites->suites[idx++] = 0;
         suites->suites[idx++] = TLS_RSA_WITH_AES_256_CBC_SHA256;
     }
 #endif
 
 #ifdef BUILD_TLS_RSA_WITH_AES_128_CBC_SHA256
-    if (tls1_2 && haveRSA) {
+    if (tls && haveRSA) {
         suites->suites[idx++] = 0;
         suites->suites[idx++] = TLS_RSA_WITH_AES_128_CBC_SHA256;
     }
@@ -2510,7 +2533,7 @@ void InitSuites(Suites* suites, ProtocolVersion pv, word16 haveRSA,
     }
 #endif
 
-#ifdef BUILD_TLS_DHE_WITH_RSA_CAMELLIA_256_CBC_SHA
+#ifdef BUILD_TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA
     if (tls && haveDH && haveRSA) {
         suites->suites[idx++] = 0;
         suites->suites[idx++] = TLS_DHE_RSA_WITH_CAMELLIA_256_CBC_SHA;
@@ -2697,11 +2720,7 @@ int RsaSign(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2741,11 +2760,7 @@ int RsaVerify(WOLFSSL* ssl, byte* in, word32 inSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2789,11 +2804,7 @@ int VerifyRsaSign(WOLFSSL* ssl, byte* verifySig, word32 sigSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2833,11 +2844,7 @@ int RsaDec(WOLFSSL* ssl, byte* in, word32 inSz, byte** out, word32* outSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2878,11 +2885,7 @@ int RsaEnc(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out, word32* outSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret =  WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2927,11 +2930,7 @@ int EccSign(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2944,7 +2943,7 @@ int EccVerify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* out,
     word32 outSz, ecc_key* key, byte* keyBuf, word32 keySz,
     void* ctx)
 {
-    int ret, verify;
+    int ret;
 
     (void)ssl;
     (void)keyBuf;
@@ -2956,27 +2955,23 @@ int EccVerify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* out,
 #ifdef HAVE_PK_CALLBACKS
     if (ssl->ctx->EccVerifyCb) {
         ret = ssl->ctx->EccVerifyCb(ssl, in, inSz, out, outSz, keyBuf, keySz,
-            &verify, ctx);
+            &ssl->eccVerifyRes, ctx);
     }
     else
 #endif /* HAVE_PK_CALLBACKS  */
     {
-        ret = wc_ecc_verify_hash(in, inSz, out, outSz, &verify, key);
+        ret = wc_ecc_verify_hash(in, inSz, out, outSz, &ssl->eccVerifyRes, key);
     }
 
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret =  WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
     else
 #endif /* WOLFSSL_ASYNC_CRYPT */
     {
-        ret = (ret != 0 || verify == 0) ? VERIFY_SIGN_ERROR : 0;
+        ret = (ret != 0 || ssl->eccVerifyRes == 0) ? VERIFY_SIGN_ERROR : 0;
     }
 
     WOLFSSL_LEAVE("EccVerify", ret);
@@ -3016,10 +3011,10 @@ int EccVerify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* out,
         }
         else if (ssl->options.side == WOLFSSL_SERVER_END) {
             if (ssl->specs.static_ecdh) {
-                if (ssl->sigKey == NULL) {
+                if (ssl->hsKey == NULL) {
                     return NO_PRIVATE_KEY;
                 }
-                tmpKey = (struct ecc_key*)ssl->sigKey;
+                tmpKey = (struct ecc_key*)ssl->hsKey;
             }
             else {
                 if (!ssl->eccTempKeyPresent) {
@@ -3071,11 +3066,8 @@ int EccSharedSecret(WOLFSSL* ssl, ecc_key* priv_key, ecc_key* pub_key,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &priv_key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &priv_key->asyncDev,
+                                                    WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -3111,11 +3103,7 @@ int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer)
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_NONE);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -3131,50 +3119,49 @@ int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer)
 #if !defined(NO_CERTS) || !defined(NO_PSK)
 #if !defined(NO_DH)
 
-int DhGenKeyPair(WOLFSSL* ssl,
-    byte* p, word32 pSz,
-    byte* g, word32 gSz,
+int DhGenKeyPair(WOLFSSL* ssl, DhKey* dhKey,
     byte* priv, word32* privSz,
     byte* pub, word32* pubSz)
 {
     int ret;
-    DhKey dhKey;
 
-    ret = wc_InitDhKey(&dhKey);
-    if (ret == 0) {
-        ret = wc_DhSetKey(&dhKey, p, pSz, g, gSz);
-        if (ret == 0) {
-            ret = wc_DhGenerateKeyPair(&dhKey, ssl->rng, priv, privSz, pub, pubSz);
-        }
-        wc_FreeDhKey(&dhKey);
+    WOLFSSL_ENTER("DhGenKeyPair");
+
+    ret = wc_DhGenerateKeyPair(dhKey, ssl->rng, priv, privSz, pub, pubSz);
+
+    /* Handle async pending response */
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &dhKey->asyncDev, WC_ASYNC_FLAG_NONE);
     }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("DhGenKeyPair", ret);
 
     return ret;
 }
 
-int DhAgree(WOLFSSL* ssl,
-    byte* p, word32 pSz,
-    byte* g, word32 gSz,
-    byte* priv, word32* privSz,
-    byte* pub, word32* pubSz,
+int DhAgree(WOLFSSL* ssl, DhKey* dhKey,
+    const byte* priv, word32 privSz,
     const byte* otherPub, word32 otherPubSz,
     byte* agree, word32* agreeSz)
 {
     int ret;
-    DhKey dhKey;
 
-    ret = wc_InitDhKey(&dhKey);
-    if (ret == 0) {
-        ret = wc_DhSetKey(&dhKey, p, pSz, g, gSz);
-        if (ret == 0 && pub) {
-            /* for DH, encSecret is Yc, agree is pre-master */
-            ret = wc_DhGenerateKeyPair(&dhKey, ssl->rng, priv, privSz, pub, pubSz);
-        }
-        if (ret == 0) {
-            ret = wc_DhAgree(&dhKey, agree, agreeSz, priv, *privSz, otherPub, otherPubSz);
-        }
-        wc_FreeDhKey(&dhKey);
+    (void)ssl;
+
+    WOLFSSL_ENTER("DhAgree");
+
+    ret = wc_DhAgree(dhKey, agree, agreeSz, priv, privSz, otherPub, otherPubSz);
+
+    /* Handle async pending response */
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &dhKey->asyncDev, WC_ASYNC_FLAG_NONE);
     }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("DhAgree", ret);
 
     return ret;
 }
@@ -3187,8 +3174,14 @@ int DhAgree(WOLFSSL* ssl,
 /* This function inherits a WOLFSSL_CTX's fields into an SSL object.
    It is used during initialization and to switch an ssl's CTX with
    wolfSSL_Set_SSL_CTX.  Requires ssl->suites alloc and ssl-arrays with PSK
+   unless writeDup is on.
+
+   ssl      object to initialize
+   ctx      parent factory
+   writeDup flag indicating this is a write dup only
+
    SSL_SUCCESS return value on success */
-int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
+int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
 {
     byte havePSK = 0;
     byte haveAnon = 0;
@@ -3196,13 +3189,16 @@ int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
     byte haveRSA = 0;
     (void) haveAnon; /* Squash unused var warnings */
 
-    if(!ssl || !ctx || ssl->suites == NULL)
+    if (!ssl || !ctx)
+        return BAD_FUNC_ARG;
+
+    if (ssl->suites == NULL && !writeDup)
         return BAD_FUNC_ARG;
 
     newSSL = ssl->ctx == NULL; /* Assign after null check */
 
 #ifndef NO_PSK
-    if (ctx->server_hint[0] && ssl->arrays == NULL) {
+    if (ctx->server_hint[0] && ssl->arrays == NULL && !writeDup) {
         return BAD_FUNC_ARG;  /* needed for copy below */
     }
 #endif
@@ -3307,41 +3303,45 @@ int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
     ssl->devId = ctx->devId;
 #endif
 
+    if (writeDup == 0) {
+
 #ifndef NO_PSK
-    if (ctx->server_hint[0]) {   /* set in CTX */
-        XSTRNCPY(ssl->arrays->server_hint, ctx->server_hint, MAX_PSK_ID_LEN);
-        ssl->arrays->server_hint[MAX_PSK_ID_LEN - 1] = '\0';
-    }
+        if (ctx->server_hint[0]) {   /* set in CTX */
+            XSTRNCPY(ssl->arrays->server_hint, ctx->server_hint,MAX_PSK_ID_LEN);
+            ssl->arrays->server_hint[MAX_PSK_ID_LEN - 1] = '\0';
+        }
 #endif /* NO_PSK */
 
-    if (ctx->suites)
-        *ssl->suites = *ctx->suites;
-    else
-        XMEMSET(ssl->suites, 0, sizeof(Suites));
+        if (ctx->suites)
+            *ssl->suites = *ctx->suites;
+        else
+            XMEMSET(ssl->suites, 0, sizeof(Suites));
 
-    /* make sure server has DH parms, and add PSK if there, add NTRU too */
-    if (ssl->options.side == WOLFSSL_SERVER_END)
-        InitSuites(ssl->suites, ssl->version, haveRSA, havePSK,
+        /* make sure server has DH parms, and add PSK if there, add NTRU too */
+        if (ssl->options.side == WOLFSSL_SERVER_END)
+            InitSuites(ssl->suites, ssl->version, haveRSA, havePSK,
                    ssl->options.haveDH, ssl->options.haveNTRU,
                    ssl->options.haveECDSAsig, ssl->options.haveECC,
                    ssl->options.haveStaticECC, ssl->options.side);
-    else
-        InitSuites(ssl->suites, ssl->version, haveRSA, havePSK, TRUE,
+        else
+            InitSuites(ssl->suites, ssl->version, haveRSA, havePSK, TRUE,
                    ssl->options.haveNTRU, ssl->options.haveECDSAsig,
                    ssl->options.haveECC, ssl->options.haveStaticECC,
                    ssl->options.side);
 
 #if !defined(NO_CERTS) && !defined(WOLFSSL_SESSION_EXPORT)
-    /* make sure server has cert and key unless using PSK or Anon
-     * This should be true even if just switching ssl ctx */
-    if (ssl->options.side == WOLFSSL_SERVER_END && !havePSK && !haveAnon)
-        if (!ssl->buffers.certificate || !ssl->buffers.certificate->buffer ||
-            !ssl->buffers.key || !ssl->buffers.key->buffer) {
-            WOLFSSL_MSG("Server missing certificate and/or private key");
-            return NO_PRIVATE_KEY;
-        }
+        /* make sure server has cert and key unless using PSK or Anon
+        * This should be true even if just switching ssl ctx */
+        if (ssl->options.side == WOLFSSL_SERVER_END && !havePSK && !haveAnon)
+            if (!ssl->buffers.certificate || !ssl->buffers.certificate->buffer
+                     || !ssl->buffers.key || !ssl->buffers.key->buffer) {
+                WOLFSSL_MSG("Server missing certificate and/or private key");
+                return NO_PRIVATE_KEY;
+            }
 #endif
 
+    }  /* writeDup check */
+
 #ifdef WOLFSSL_SESSION_EXPORT
     #ifdef WOLFSSL_DTLS
     ssl->dtls_export = ctx->dtls_export; /* export function for session */
@@ -3355,11 +3355,85 @@ int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
     return SSL_SUCCESS;
 }
 
+static int InitHashes(WOLFSSL* ssl)
+{
+    int ret;
+
+    ssl->hsHashes = (HS_Hashes*)XMALLOC(sizeof(HS_Hashes), ssl->heap,
+                                                           DYNAMIC_TYPE_HASHES);
+    if (ssl->hsHashes == NULL) {
+        WOLFSSL_MSG("HS_Hashes Memory error");
+        return MEMORY_E;
+    }
+    XMEMSET(ssl->hsHashes, 0, sizeof(HS_Hashes));
+
+#ifndef NO_OLD_TLS
+#ifndef NO_MD5
+    ret = wc_InitMd5_ex(&ssl->hsHashes->hashMd5, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#ifndef NO_SHA
+    ret = wc_InitSha_ex(&ssl->hsHashes->hashSha, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#endif /* !NO_OLD_TLS */
+#ifndef NO_SHA256
+    ret = wc_InitSha256_ex(&ssl->hsHashes->hashSha256, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#ifdef WOLFSSL_SHA384
+    ret = wc_InitSha384_ex(&ssl->hsHashes->hashSha384, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#ifdef WOLFSSL_SHA512
+    ret = wc_InitSha512_ex(&ssl->hsHashes->hashSha512, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+
+    return ret;
+}
+
+static void FreeHashes(WOLFSSL* ssl)
+{
+    if (ssl->hsHashes) {
+#ifndef NO_OLD_TLS
+    #ifndef NO_MD5
+        wc_Md5Free(&ssl->hsHashes->hashMd5);
+    #endif
+    #ifndef NO_SHA
+        wc_ShaFree(&ssl->hsHashes->hashSha);
+    #endif
+#endif /* !NO_OLD_TLS */
+    #ifndef NO_SHA256
+        wc_Sha256Free(&ssl->hsHashes->hashSha256);
+    #endif
+    #ifdef WOLFSSL_SHA384
+        wc_Sha384Free(&ssl->hsHashes->hashSha384);
+    #endif
+    #ifdef WOLFSSL_SHA512
+        wc_Sha512Free(&ssl->hsHashes->hashSha512);
+    #endif
+
+        XFREE(ssl->hsHashes, ssl->heap, DYNAMIC_TYPE_HASHES);
+        ssl->hsHashes = NULL;
+    }
+}
+
 
 /* init everything to 0, NULL, default values before calling anything that may
    fail so that destructor has a "good" state to cleanup
+
+   ssl      object to initialize
+   ctx      parent factory
+   writeDup flag indicating this is a write dup only
+
    0 on success */
-int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
+int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
 {
     int  ret;
 
@@ -3470,6 +3544,7 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
 
     ssl->rfd = -1;   /* set to invalid descriptor */
     ssl->wfd = -1;
+    ssl->devId = ctx->devId; /* device for async HW (from wolfAsync_DevOpen) */
 
     ssl->IOCB_ReadCtx  = &ssl->rfd;  /* prevent invalid pointer access if not */
     ssl->IOCB_WriteCtx = &ssl->wfd;  /* correctly set */
@@ -3479,12 +3554,17 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
     ssl->IOCB_WriteCtx = &ssl->nxCtx;  /* and write */
 #endif
 
+    /* initialize states */
     ssl->options.serverState = NULL_STATE;
     ssl->options.clientState = NULL_STATE;
     ssl->options.connectState = CONNECT_BEGIN;
     ssl->options.acceptState  = ACCEPT_BEGIN;
     ssl->options.handShakeState  = NULL_STATE;
     ssl->options.processReply = doProcessInit;
+    ssl->options.asyncState = TLS_ASYNC_BEGIN;
+    ssl->options.buildMsgState = BUILD_MSG_BEGIN;
+    ssl->encrypt.state = CIPHER_STATE_BEGIN;
+    ssl->decrypt.state = CIPHER_STATE_BEGIN;
 
 #ifdef WOLFSSL_DTLS
     #ifdef WOLFSSL_SCTP
@@ -3542,68 +3622,38 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
 
     /* all done with init, now can return errors, call other stuff */
 
-    /* arrays */
-    ssl->arrays = (Arrays*)XMALLOC(sizeof(Arrays), ssl->heap,
+    if (!writeDup) {
+        /* arrays */
+        ssl->arrays = (Arrays*)XMALLOC(sizeof(Arrays), ssl->heap,
                                                            DYNAMIC_TYPE_ARRAYS);
-    if (ssl->arrays == NULL) {
-        WOLFSSL_MSG("Arrays Memory error");
-        return MEMORY_E;
-    }
-    XMEMSET(ssl->arrays, 0, sizeof(Arrays));
+        if (ssl->arrays == NULL) {
+            WOLFSSL_MSG("Arrays Memory error");
+            return MEMORY_E;
+        }
+        XMEMSET(ssl->arrays, 0, sizeof(Arrays));
+        ssl->arrays->preMasterSecret = (byte*)XMALLOC(ENCRYPT_LEN, ssl->heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (ssl->arrays->preMasterSecret == NULL) {
+            return MEMORY_E;
+        }
+        XMEMSET(ssl->arrays->preMasterSecret, 0, ENCRYPT_LEN);
 
-    /* suites */
-    ssl->suites = (Suites*)XMALLOC(sizeof(Suites), ssl->heap,
+        /* suites */
+        ssl->suites = (Suites*)XMALLOC(sizeof(Suites), ssl->heap,
                                    DYNAMIC_TYPE_SUITES);
-    if (ssl->suites == NULL) {
-        WOLFSSL_MSG("Suites Memory error");
-        return MEMORY_E;
+        if (ssl->suites == NULL) {
+            WOLFSSL_MSG("Suites Memory error");
+            return MEMORY_E;
+        }
     }
 
     /* Initialize SSL with the appropriate fields from it's ctx */
-    /* requires valid arrays and suites */
-    if((ret =  SetSSL_CTX(ssl, ctx)) != SSL_SUCCESS)
+    /* requires valid arrays and suites unless writeDup ing */
+    if ((ret =  SetSSL_CTX(ssl, ctx, writeDup)) != SSL_SUCCESS)
         return ret;
 
     ssl->options.dtls = ssl->version.major == DTLS_MAJOR;
 
-    /* hsHashes */
-    ssl->hsHashes = (HS_Hashes*)XMALLOC(sizeof(HS_Hashes), ssl->heap,
-                                                           DYNAMIC_TYPE_HASHES);
-    if (ssl->hsHashes == NULL) {
-        WOLFSSL_MSG("HS_Hashes Memory error");
-        return MEMORY_E;
-    }
-
-#ifndef NO_OLD_TLS
-#ifndef NO_MD5
-    wc_InitMd5(&ssl->hsHashes->hashMd5);
-#endif
-#ifndef NO_SHA
-    ret = wc_InitSha(&ssl->hsHashes->hashSha);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
-#endif
-#ifndef NO_SHA256
-    ret = wc_InitSha256(&ssl->hsHashes->hashSha256);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
-#ifdef WOLFSSL_SHA384
-    ret = wc_InitSha384(&ssl->hsHashes->hashSha384);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
-#ifdef WOLFSSL_SHA512
-    ret = wc_InitSha512(&ssl->hsHashes->hashSha512);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
-
 #ifdef SINGLE_THREADED
     ssl->rng = ctx->rng;   /* CTX may have one, if so use it */
 #endif
@@ -3620,7 +3670,45 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
 
         /* FIPS RNG API does not accept a heap hint */
 #ifndef HAVE_FIPS
-        if ( (ret = wc_InitRng_ex(ssl->rng, ssl->heap)) != 0) {
+        if ( (ret = wc_InitRng_ex(ssl->rng, ssl->heap, ssl->devId)) != 0) {
+            WOLFSSL_MSG("RNG Init error");
+            return ret;
+        }
+#else
+        if ( (ret = wc_InitRng(ssl->rng)) != 0) {
+            WOLFSSL_MSG("RNG Init error");
+            return ret;
+        }
+#endif
+    }
+
+    if (writeDup) {
+        /* all done */
+        return 0;
+    }
+
+    /* hsHashes */
+    ret = InitHashes(ssl);
+    if (ret != 0)
+        return ret;
+
+#ifdef SINGLE_THREADED
+    ssl->rng = ctx->rng;   /* CTX may have one, if so use it */
+#endif
+
+    if (ssl->rng == NULL) {
+        /* RNG */
+        ssl->rng = (WC_RNG*)XMALLOC(sizeof(WC_RNG), ssl->heap,DYNAMIC_TYPE_RNG);
+        if (ssl->rng == NULL) {
+            WOLFSSL_MSG("RNG Memory error");
+            return MEMORY_E;
+        }
+        XMEMSET(ssl->rng, 0, sizeof(WC_RNG));
+        ssl->options.weOwnRng = 1;
+
+        /* FIPS RNG API does not accept a heap hint */
+#ifndef HAVE_FIPS
+        if ( (ret = wc_InitRng_ex(ssl->rng, ssl->heap, ssl->devId)) != 0) {
             WOLFSSL_MSG("RNG Init error");
             return ret;
         }
@@ -3657,12 +3745,16 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
 /* free use of temporary arrays */
 void FreeArrays(WOLFSSL* ssl, int keep)
 {
-    if (ssl->arrays && keep) {
-        /* keeps session id for user retrieval */
-        XMEMCPY(ssl->session.sessionID, ssl->arrays->sessionID, ID_LEN);
-        ssl->session.sessionIDSz = ssl->arrays->sessionIDSz;
-    }
     if (ssl->arrays) {
+        if (keep) {
+            /* keeps session id for user retrieval */
+            XMEMCPY(ssl->session.sessionID, ssl->arrays->sessionID, ID_LEN);
+            ssl->session.sessionIDSz = ssl->arrays->sessionIDSz;
+        }
+        if (ssl->arrays->preMasterSecret) {
+            XFREE(ssl->arrays->preMasterSecret, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            ssl->arrays->preMasterSecret = NULL;
+        }
         XFREE(ssl->arrays->pendingMsg, ssl->heap, DYNAMIC_TYPE_ARRAYS);
         ssl->arrays->pendingMsg = NULL;
         ForceZero(ssl->arrays, sizeof(Arrays)); /* clear arrays struct */
@@ -3671,6 +3763,110 @@ void FreeArrays(WOLFSSL* ssl, int keep)
     ssl->arrays = NULL;
 }
 
+void FreeKey(WOLFSSL* ssl, int type, void** pKey)
+{
+    if (ssl && pKey && *pKey) {
+        switch (type) {
+        #ifndef NO_RSA
+            case DYNAMIC_TYPE_RSA:
+                wc_FreeRsaKey((RsaKey*)*pKey);
+                break;
+        #endif /* ! NO_RSA */
+        #ifdef HAVE_ECC
+            case DYNAMIC_TYPE_ECC:
+                wc_ecc_free((ecc_key*)*pKey);
+                break;
+        #endif /* HAVE_ECC */
+        #ifndef NO_DH
+            case DYNAMIC_TYPE_DH:
+                wc_FreeDhKey((DhKey*)*pKey);
+                break;
+        #endif /* !NO_DH */
+            default:
+                break;
+        }
+        XFREE(*pKey, ssl->heap, type);
+
+        /* Reset pointer */
+        *pKey = NULL;
+    }
+}
+
+int AllocKey(WOLFSSL* ssl, int type, void** pKey)
+{
+    int ret = BAD_FUNC_ARG;
+    int sz = 0;
+
+    if (ssl == NULL || pKey == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* Sanity check key destination */
+    if (*pKey != NULL) {
+        WOLFSSL_MSG("Key already present!");
+        return BAD_STATE_E;
+    }
+
+    /* Determine size */
+    switch (type) {
+        case DYNAMIC_TYPE_RSA:
+        #ifndef NO_RSA
+            sz = sizeof(RsaKey);
+        #endif /* ! NO_RSA */
+            break;
+        case DYNAMIC_TYPE_ECC:
+        #ifdef HAVE_ECC
+            sz = sizeof(ecc_key);
+        #endif /* HAVE_ECC */
+            break;
+        case DYNAMIC_TYPE_DH:
+        #ifndef NO_DH
+            sz = sizeof(DhKey);
+        #endif /* !NO_DH */
+            break;
+        default:
+            return BAD_FUNC_ARG;
+    }
+
+    if (sz == 0) {
+        return NOT_COMPILED_IN;
+    }
+
+    /* Allocate memeory for key */
+    *pKey = XMALLOC(sz, ssl->heap, type);
+    if (*pKey == NULL) {
+        return MEMORY_E;
+    }
+
+    /* Initialize key */
+    switch (type) {
+    #ifndef NO_RSA
+        case DYNAMIC_TYPE_RSA:
+            ret = wc_InitRsaKey_ex((RsaKey*)*pKey, ssl->heap, ssl->devId);
+            break;
+    #endif /* ! NO_RSA */
+    #ifdef HAVE_ECC
+        case DYNAMIC_TYPE_ECC:
+            ret = wc_ecc_init_ex((ecc_key*)*pKey, ssl->heap, ssl->devId);
+            break;
+    #endif /* HAVE_ECC */
+    #ifndef NO_DH
+        case DYNAMIC_TYPE_DH:
+            ret = wc_InitDhKey_ex((DhKey*)*pKey, ssl->heap, ssl->devId);
+            break;
+    #endif /* !NO_DH */
+        default:
+            return BAD_FUNC_ARG;
+    }
+
+    /* On error free handshake key */
+    if (ret != 0) {
+        FreeKey(ssl, type, pKey);
+    }
+
+    return ret;
+}
+
 static void FreeKeyExchange(WOLFSSL* ssl)
 {
     /* Cleanup signature buffer */
@@ -3687,33 +3883,21 @@ static void FreeKeyExchange(WOLFSSL* ssl)
         ssl->buffers.digest.length = 0;
     }
 
-    /* Free sigKey */
-    if (ssl->sigKey) {
-        switch (ssl->sigType)
-        {
-        #ifndef NO_RSA
-            case DYNAMIC_TYPE_RSA:
-            {
-                wc_FreeRsaKey((RsaKey*)ssl->sigKey);
-                XFREE(ssl->sigKey, ssl->heap, DYNAMIC_TYPE_RSA);
-                break;
-            }
-        #endif /* ! NO_RSA */
-        #ifdef HAVE_ECC
-            case DYNAMIC_TYPE_ECC:
-            {
-                wc_ecc_free((ecc_key*)ssl->sigKey);
-                XFREE(ssl->sigKey, ssl->heap, DYNAMIC_TYPE_ECC);
-                break;
-            }
-        #endif /* HAVE_ECC */
-            default:
-                break;
-        }
-        /* Reset type and pointer */
-        ssl->sigType = 0;
-        ssl->sigKey = NULL;
+    /* Free handshake key */
+    FreeKey(ssl, ssl->hsType, &ssl->hsKey);
+
+#ifndef NO_DH
+    /* Free temp DH key */
+    FreeKey(ssl, DYNAMIC_TYPE_DH, (void**)&ssl->buffers.serverDH_Key);
+#endif
+
+    /* Cleanup async */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ssl->async.freeArgs) {
+        ssl->async.freeArgs(ssl, ssl->async.args);
+        ssl->async.freeArgs = NULL;
     }
+#endif
 }
 
 /* In case holding SSL object in array and don't want to free actual ssl */
@@ -3733,23 +3917,23 @@ void SSL_ResourceFree(WOLFSSL* ssl)
         XFREE(ssl->rng, ssl->heap, DYNAMIC_TYPE_RNG);
     }
     XFREE(ssl->suites, ssl->heap, DYNAMIC_TYPE_SUITES);
-    XFREE(ssl->hsHashes, ssl->heap, DYNAMIC_TYPE_HASHES);
+    FreeHashes(ssl);
     XFREE(ssl->buffers.domainName.buffer, ssl->heap, DYNAMIC_TYPE_DOMAIN);
 
     /* clear keys struct after session */
-    ForceZero(&(ssl->keys), sizeof(Keys));
+    ForceZero(&ssl->keys, sizeof(Keys));
 
 #ifndef NO_DH
     if (ssl->buffers.serverDH_Priv.buffer) {
         ForceZero(ssl->buffers.serverDH_Priv.buffer,
                                              ssl->buffers.serverDH_Priv.length);
     }
-    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH);
-    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     /* parameters (p,g) may be owned by ctx */
     if (ssl->buffers.weOwnDH || ssl->options.side == WOLFSSL_CLIENT_END) {
-        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH);
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     }
 #endif /* !NO_DH */
 #ifndef NO_CERTS
@@ -3757,10 +3941,8 @@ void SSL_ResourceFree(WOLFSSL* ssl)
     wolfSSL_UnloadCertsKeys(ssl);
 #endif
 #ifndef NO_RSA
-    if (ssl->peerRsaKey) {
-        wc_FreeRsaKey(ssl->peerRsaKey);
-        XFREE(ssl->peerRsaKey, ssl->heap, DYNAMIC_TYPE_RSA);
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
+    ssl->peerRsaKeyPresent = 0;
 #endif
     if (ssl->buffers.inputBuffer.dynamicFlag)
         ShrinkInputBuffer(ssl, FORCED_FREE);
@@ -3789,21 +3971,12 @@ void SSL_ResourceFree(WOLFSSL* ssl)
     FreeStreams(ssl);
 #endif
 #ifdef HAVE_ECC
-    if (ssl->peerEccKey) {
-        if (ssl->peerEccKeyPresent)
-            wc_ecc_free(ssl->peerEccKey);
-        XFREE(ssl->peerEccKey, ssl->heap, DYNAMIC_TYPE_ECC);
-    }
-    if (ssl->peerEccDsaKey) {
-        if (ssl->peerEccDsaKeyPresent)
-            wc_ecc_free(ssl->peerEccDsaKey);
-        XFREE(ssl->peerEccDsaKey, ssl->heap, DYNAMIC_TYPE_ECC);
-    }
-    if (ssl->eccTempKey) {
-        if (ssl->eccTempKeyPresent)
-            wc_ecc_free(ssl->eccTempKey);
-        XFREE(ssl->eccTempKey, ssl->heap, DYNAMIC_TYPE_ECC);
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccKey);
+    ssl->peerEccKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
+    ssl->peerEccDsaKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->eccTempKey);
+    ssl->eccTempKeyPresent = 0;
 #endif /* HAVE_ECC */
 #ifdef HAVE_PK_CALLBACKS
     #ifdef HAVE_ECC
@@ -3842,6 +4015,11 @@ void SSL_ResourceFree(WOLFSSL* ssl)
 #ifdef HAVE_EXT_CACHE
     wolfSSL_SESSION_free(ssl->extSession);
 #endif
+#ifdef HAVE_WRITE_DUP
+    if (ssl->dupWrite) {
+        FreeWriteDup(ssl);
+    }
+#endif
 
 #ifdef WOLFSSL_STATIC_MEMORY
     /* check if using fixed io buffers and free them */
@@ -3884,15 +4062,6 @@ void SSL_ResourceFree(WOLFSSL* ssl)
 /* Free any handshake resources no longer needed */
 void FreeHandshakeResources(WOLFSSL* ssl)
 {
-#ifndef NO_MD5
-    wc_Md5Free(&ssl->hsHashes->hashMd5);
-#endif
-#ifndef NO_SHA
-    wc_ShaFree(&ssl->hsHashes->hashSha);
-#endif
-#ifndef NO_SHA256
-    wc_Sha256Free(&ssl->hsHashes->hashSha256);
-#endif
 
 #ifdef HAVE_SECURE_RENEGOTIATION
     if (ssl->secure_renegotiation && ssl->secure_renegotiation->enabled) {
@@ -3910,8 +4079,7 @@ void FreeHandshakeResources(WOLFSSL* ssl)
     ssl->suites = NULL;
 
     /* hsHashes */
-    XFREE(ssl->hsHashes, ssl->heap, DYNAMIC_TYPE_HASHES);
-    ssl->hsHashes = NULL;
+    FreeHashes(ssl);
 
     /* RNG */
     if (ssl->specs.cipher_type == stream || ssl->options.tls1_1 == 0) {
@@ -3939,56 +4107,32 @@ void FreeHandshakeResources(WOLFSSL* ssl)
 
 #ifndef NO_RSA
     /* peerRsaKey */
-    if (ssl->peerRsaKey) {
-        wc_FreeRsaKey(ssl->peerRsaKey);
-        XFREE(ssl->peerRsaKey, ssl->heap, DYNAMIC_TYPE_RSA);
-        ssl->peerRsaKey = NULL;
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
+    ssl->peerRsaKeyPresent = 0;
 #endif
 
 #ifdef HAVE_ECC
-    if (ssl->peerEccKey)
-    {
-        if (ssl->peerEccKeyPresent) {
-            wc_ecc_free(ssl->peerEccKey);
-            ssl->peerEccKeyPresent = 0;
-        }
-        XFREE(ssl->peerEccKey, ssl->heap, DYNAMIC_TYPE_ECC);
-        ssl->peerEccKey = NULL;
-    }
-    if (ssl->peerEccDsaKey)
-    {
-        if (ssl->peerEccDsaKeyPresent) {
-            wc_ecc_free(ssl->peerEccDsaKey);
-            ssl->peerEccDsaKeyPresent = 0;
-        }
-        XFREE(ssl->peerEccDsaKey, ssl->heap, DYNAMIC_TYPE_ECC);
-        ssl->peerEccDsaKey = NULL;
-    }
-    if (ssl->eccTempKey)
-    {
-        if (ssl->eccTempKeyPresent) {
-            wc_ecc_free(ssl->eccTempKey);
-            ssl->eccTempKeyPresent = 0;
-        }
-        XFREE(ssl->eccTempKey, ssl->heap, DYNAMIC_TYPE_ECC);
-        ssl->eccTempKey = NULL;
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccKey);
+    ssl->peerEccKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
+    ssl->peerEccDsaKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->eccTempKey);
+    ssl->eccTempKeyPresent = 0;
 #endif /* HAVE_ECC */
 #ifndef NO_DH
     if (ssl->buffers.serverDH_Priv.buffer) {
         ForceZero(ssl->buffers.serverDH_Priv.buffer,
                                              ssl->buffers.serverDH_Priv.length);
     }
-    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     ssl->buffers.serverDH_Priv.buffer = NULL;
-    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     ssl->buffers.serverDH_Pub.buffer = NULL;
     /* parameters (p,g) may be owned by ctx */
     if (ssl->buffers.weOwnDH || ssl->options.side == WOLFSSL_CLIENT_END) {
-        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_G.buffer = NULL;
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_P.buffer = NULL;
     }
 #endif /* !NO_DH */
@@ -4557,7 +4701,7 @@ int DtlsMsgPoolSend(WOLFSSL* ssl, int sendOnlyFirstPacket)
                 output = ssl->buffers.outputBuffer.buffer +
                          ssl->buffers.outputBuffer.length;
                 sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                      handshake, 0, 0);
+                                      handshake, 0, 0, 0);
                 if (sendSz < 0)
                     return BUILD_MSG_ERROR;
 
@@ -4878,7 +5022,9 @@ static int HashOutput(WOLFSSL* ssl, const byte* output, int sz, int ivSz)
 static int HashInput(WOLFSSL* ssl, const byte* input, int sz)
 {
     int ret = 0;
-    const byte* adj = input - HANDSHAKE_HEADER_SZ;
+    const byte* adj;
+
+    adj = input - HANDSHAKE_HEADER_SZ;
     sz += HANDSHAKE_HEADER_SZ;
 
     (void)adj;
@@ -5303,7 +5449,6 @@ int GrowInputBuffer(WOLFSSL* ssl, int size, int usedLength)
 /* check available size into output buffer, make room if needed */
 int CheckAvailableSize(WOLFSSL *ssl, int size)
 {
-
     if (size < 0) {
         WOLFSSL_MSG("CheckAvailableSize() called with negative number");
         return BAD_FUNC_ARG;
@@ -5518,96 +5663,93 @@ static const byte PAD2[PAD_MD5] =
 
 static int BuildMD5(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
-
+    int ret;
     byte md5_result[MD5_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Md5* md5   = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Md5* md5_2 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-        if (md5 == NULL || md5_2 == NULL) {
-            if (md5) {
-                XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            if (md5_2) {
-                XFREE(md5_2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            return MEMORY_E;
-        }
+    Md5* md5 = (Md5*)XMALLOC(sizeof(Md5), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (md5 == NULL)
+        return MEMORY_E;
 #else
-        Md5 md5[1];
-        Md5 md5_2[1];
+    Md5  md5[1];
 #endif
 
     /* make md5 inner */
-    md5[0] = ssl->hsHashes->hashMd5 ; /* Save current position */
-
-    wc_Md5Update(&ssl->hsHashes->hashMd5, sender, SIZEOF_SENDER);
-    wc_Md5Update(&ssl->hsHashes->hashMd5, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_Md5Update(&ssl->hsHashes->hashMd5, PAD1, PAD_MD5);
-    wc_Md5GetHash(&ssl->hsHashes->hashMd5, md5_result);
-    wc_Md5RestorePos(&ssl->hsHashes->hashMd5, md5) ; /* Restore current position */
+    ret = wc_Md5Copy(&ssl->hsHashes->hashMd5, md5);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, sender, SIZEOF_SENDER);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, PAD1, PAD_MD5);
+    if (ret == 0)
+        ret = wc_Md5Final(md5, md5_result);
 
     /* make md5 outer */
-    wc_InitMd5(md5_2) ;
-    wc_Md5Update(md5_2, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_Md5Update(md5_2, PAD2, PAD_MD5);
-    wc_Md5Update(md5_2, md5_result, MD5_DIGEST_SIZE);
-    wc_Md5Final(md5_2, hashes->md5);
+    if (ret == 0) {
+        ret = wc_InitMd5_ex(md5, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_Md5Update(md5, ssl->arrays->masterSecret,SECRET_LEN);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, PAD2, PAD_MD5);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, md5_result, MD5_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_Md5Final(md5, hashes->md5);
+            wc_Md5Free(md5);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(md5_2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(md5, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
-    return 0;
+    return ret;
 }
 
 
 /* calculate SHA hash for finished */
 static int BuildSHA(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
+    int ret;
     byte sha_result[SHA_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Sha* sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Sha* sha2 = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-        if (sha == NULL || sha2 == NULL) {
-            if (sha) {
-                XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            if (sha2) {
-                XFREE(sha2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            return MEMORY_E;
-        }
+    Sha* sha = (Sha*)XMALLOC(sizeof(Sha), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (sha == NULL)
+        return MEMORY_E;
 #else
-        Sha sha[1];
-        Sha sha2[1] ;
+    Sha  sha[1];
 #endif
     /* make sha inner */
-    sha[0] = ssl->hsHashes->hashSha ; /* Save current position */
-
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, sender, SIZEOF_SENDER);
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, PAD1, PAD_SHA);
-    wc_ShaGetHash(&ssl->hsHashes->hashSha, sha_result);
-    wc_ShaRestorePos(&ssl->hsHashes->hashSha, sha) ; /* Restore current position */
+    ret = wc_ShaCopy(&ssl->hsHashes->hashSha, sha); /* Save current position */
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, sender, SIZEOF_SENDER);
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, PAD1, PAD_SHA);
+    if (ret == 0)
+        ret = wc_ShaFinal(sha, sha_result);
 
     /* make sha outer */
-    wc_InitSha(sha2) ;
-    wc_ShaUpdate(sha2, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(sha2, PAD2, PAD_SHA);
-    wc_ShaUpdate(sha2, sha_result, SHA_DIGEST_SIZE);
-    wc_ShaFinal(sha2, hashes->sha);
+    if (ret == 0) {
+        ret = wc_InitSha_ex(sha, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, PAD2, PAD_SHA);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, sha_result, SHA_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_ShaFinal(sha, hashes->sha);
+            wc_ShaFree(sha);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(sha2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(sha, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
-    return 0;
+    return ret;
 }
 #endif
 
@@ -5615,15 +5757,14 @@ static int BuildSHA(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 static int BuildFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
     int ret = 0;
+#ifdef WOLFSSL_SHA384
 #ifdef WOLFSSL_SMALL_STACK
-    #ifdef WOLFSSL_SHA384
-        Sha384* sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL,                                                                        DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
+    Sha384* sha384 = (Sha384*)XMALLOC(sizeof(Sha384), ssl->heap,
+                                                DYNAMIC_TYPE_TMP_BUFFER);
 #else
-    #ifdef WOLFSSL_SHA384
-        Sha384 sha384[1];
-    #endif
-#endif
+    Sha384 sha384[1];
+#endif /* WOLFSSL_SMALL_STACK */
+#endif /* WOLFSSL_SHA384 */
 
 #ifdef WOLFSSL_SMALL_STACK
     if (ssl == NULL
@@ -5632,7 +5773,7 @@ static int BuildFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
     #endif
         ) {
     #ifdef WOLFSSL_SHA384
-        XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(sha384, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
     #endif
         return MEMORY_E;
     }
@@ -5664,9 +5805,9 @@ static int BuildFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
     #endif
     }
 
-#ifdef WOLFSSL_SMALL_STACK
 #ifdef WOLFSSL_SHA384
-    XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(sha384, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 #endif
 
@@ -6213,8 +6354,8 @@ static int MatchDomainName(const char* pattern, int len, const char* str)
 
         if (p == '*') {
             while (--len > 0 &&
-                         (p = (char)XTOLOWER((unsigned char)*pattern++)) == '*')
-                ;
+                (p = (char)XTOLOWER((unsigned char)*pattern++)) == '*') {
+            }
 
             if (len == 0)
                 p = '\0';
@@ -6567,721 +6708,952 @@ int CopyDecodedToX509(WOLFSSL_X509* x509, DecodedCert* dCert)
 
 #endif /* KEEP_PEER_CERT || SESSION_CERTS */
 
+typedef struct DoCertArgs {
+    buffer*      certs;
+    DecodedCert* dCert;
+    char*  domain;
+    word32 idx;
+    word32 begin;
+    int    totalCerts; /* number of certs in certs buffer */
+    int    count;
+    int    dCertInit;
+    int    certIdx;
+#ifdef WOLFSSL_TRUST_PEER_CERT
+    byte haveTrustPeer; /* was cert verified by loaded trusted peer cert */
+#endif
+} DoCertArgs;
+
+static void FreeDoCertArgs(WOLFSSL* ssl, void* pArgs)
+{
+    DoCertArgs* args = (DoCertArgs*)pArgs;
+
+    (void)ssl;
+
+    if (args->domain) {
+        XFREE(args->domain, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->domain = NULL;
+    }
+    if (args->certs) {
+        XFREE(args->certs, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->certs = NULL;
+    }
+    if (args->dCert) {
+        if (args->dCertInit) {
+            FreeDecodedCert(args->dCert);
+            args->dCertInit = 0;
+        }
+        XFREE(args->dCert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->dCert = NULL;
+    }
+}
 
 static int DoCertificate(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                                                                 word32 size)
 {
-    word32 listSz;
-    word32 begin = *inOutIdx;
-    int    ret = 0;
-    int    anyError = 0;
-    int    totalCerts = 0;    /* number of certs in certs buffer */
-    int    count;
-    buffer certs[MAX_CHAIN_DEPTH];
-
-#ifdef WOLFSSL_SMALL_STACK
-    char*                  domain = NULL;
-    DecodedCert*           dCert  = NULL;
-    WOLFSSL_X509_STORE_CTX* store  = NULL;
+    int ret = 0, lastErr = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    DoCertArgs* args = (DoCertArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
 #else
-    char                   domain[ASN_NAME_MAX];
-    DecodedCert            dCert[1];
-    WOLFSSL_X509_STORE_CTX  store[1];
+    DoCertArgs  args[1];
 #endif
 
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    byte haveTrustPeer = 0; /* was cert verified by loaded trusted peer cert */
-#endif
+    WOLFSSL_ENTER("DoCertificate");
 
-    #ifdef WOLFSSL_CALLBACKS
-        if (ssl->hsInfoOn) AddPacketName("Certificate", &ssl->handShakeInfo);
-        if (ssl->toInfoOn) AddLateName("Certificate", &ssl->timeoutInfo);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
+    if (ret != WC_NOT_PENDING_E) {
+        /* Check for error */
+        if (ret < 0)
+            goto exit_dc;
+    }
+    else
+#endif
+    {
+        /* Reset state */
+        ret = 0;
+        ssl->options.asyncState = TLS_ASYNC_BEGIN;
+        XMEMSET(args, 0, sizeof(DoCertArgs));
+        args->idx = *inOutIdx;
+        args->begin = *inOutIdx;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeDoCertArgs;
     #endif
-
-    if ((*inOutIdx - begin) + OPAQUE24_LEN > size)
-        return BUFFER_ERROR;
-
-    c24to32(input + *inOutIdx, &listSz);
-    *inOutIdx += OPAQUE24_LEN;
-
-    if (listSz > MAX_RECORD_SIZE)
-        return BUFFER_E;
-
-    if ((*inOutIdx - begin) + listSz != size)
-        return BUFFER_ERROR;
-
-    WOLFSSL_MSG("Loading peer's cert chain");
-    /* first put cert chain into buffer so can verify top down
-       we're sent bottom up */
-    while (listSz) {
-        word32 certSz;
-
-        if (totalCerts >= MAX_CHAIN_DEPTH) {
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_CERT_CHAIN_TOO_LONG;
-        #endif
-            return MAX_CHAIN_ERROR;
-        }
-
-        if ((*inOutIdx - begin) + OPAQUE24_LEN > size)
-            return BUFFER_ERROR;
-
-        c24to32(input + *inOutIdx, &certSz);
-        *inOutIdx += OPAQUE24_LEN;
-
-        if ((*inOutIdx - begin) + certSz > size)
-            return BUFFER_ERROR;
-
-        certs[totalCerts].length = certSz;
-        certs[totalCerts].buffer = input + *inOutIdx;
-
-#ifdef SESSION_CERTS
-        if (ssl->session.chain.count < MAX_CHAIN_DEPTH &&
-                                       certSz < MAX_X509_SIZE) {
-            ssl->session.chain.certs[ssl->session.chain.count].length = certSz;
-            XMEMCPY(ssl->session.chain.certs[ssl->session.chain.count].buffer,
-                    input + *inOutIdx, certSz);
-            ssl->session.chain.count++;
-        } else {
-            WOLFSSL_MSG("Couldn't store chain cert for session");
-        }
-#endif
-
-        *inOutIdx += certSz;
-        listSz -= certSz + CERT_HEADER_SZ;
-
-        totalCerts++;
-        WOLFSSL_MSG("\tPut another cert into chain");
     }
 
-    count = totalCerts;
-
-#ifdef WOLFSSL_SMALL_STACK
-    dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-    if (dCert == NULL)
-        return MEMORY_E;
-#endif
-
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    /* if using trusted peer certs check before verify chain and CA test */
-    if (count > 0) {
-        TrustedPeerCert* tp = NULL;
-
-        InitDecodedCert(dCert, certs[0].buffer, certs[0].length, ssl->heap);
-        ret = ParseCertRelative(dCert, CERT_TYPE, 0, ssl->ctx->cm);
-        #ifndef NO_SKID
-            if (dCert->extAuthKeyIdSet) {
-                tp = GetTrustedPeer(ssl->ctx->cm, dCert->extSubjKeyId,
-                                                                 WC_MATCH_SKID);
-            }
-            else { /* if the cert has no SKID try to match by name */
-                tp = GetTrustedPeer(ssl->ctx->cm, dCert->subjectHash,
-                                                                 WC_MATCH_NAME);
-            }
-        #else /* NO_SKID */
-            tp = GetTrustedPeer(ssl->ctx->cm, dCert->subjectHash,
-                                                                 WC_MATCH_NAME);
-        #endif /* NO SKID */
-        WOLFSSL_MSG("Checking for trusted peer cert");
-
-        if (tp == NULL) {
-            /* no trusted peer cert */
-            WOLFSSL_MSG("No matching trusted peer cert. Checking CAs");
-            FreeDecodedCert(dCert);
-        } else if (MatchTrustedPeer(tp, dCert)){
-            WOLFSSL_MSG("Found matching trusted peer cert");
-            haveTrustPeer = 1;
-        } else {
-            WOLFSSL_MSG("Trusted peer cert did not match!");
-            FreeDecodedCert(dCert);
-        }
-    }
-    if (!haveTrustPeer) { /* do not verify chain if trusted peer cert found */
-#endif /* WOLFSSL_TRUST_PEER_CERT */
-
-    /* verify up to peer's first */
-    while (count > 1) {
-        buffer myCert = certs[count - 1];
-        byte* subjectHash;
-
-        InitDecodedCert(dCert, myCert.buffer, myCert.length, ssl->heap);
-        ret = ParseCertRelative(dCert, CERT_TYPE, !ssl->options.verifyNone,
-                                ssl->ctx->cm);
-        #ifndef NO_SKID
-            subjectHash = dCert->extSubjKeyId;
-        #else
-            subjectHash = dCert->subjectHash;
-        #endif
-
-        /* Check key sizes for certs. Is redundent check since ProcessBuffer
-           also performs this check. */
-        if (!ssl->options.verifyNone) {
-            switch (dCert->keyOID) {
-                #ifndef NO_RSA
-                case RSAk:
-                    if (ssl->options.minRsaKeySz < 0 ||
-                         dCert->pubKeySize < (word16)ssl->options.minRsaKeySz) {
-                        WOLFSSL_MSG("RSA key size in cert chain error");
-                        ret = RSA_KEY_SIZE_E;
-                    }
-                    break;
-                #endif /* !NO_RSA */
-                #ifdef HAVE_ECC
-                case ECDSAk:
-                    if (ssl->options.minEccKeySz < 0 ||
-                        dCert->pubKeySize < (word16)ssl->options.minEccKeySz) {
-                        WOLFSSL_MSG("ECC key size in cert chain error");
-                        ret = ECC_KEY_SIZE_E;
-                    }
-                    break;
-                #endif /* HAVE_ECC */
-
-                default:
-                    WOLFSSL_MSG("Key size not checked");
-                    break; /* key not being checked for size if not in switch */
-            }
-        }
-
-        if (ret == 0 && dCert->isCA == 0) {
-            WOLFSSL_MSG("Chain cert is not a CA, not adding as one");
-        }
-        else if (ret == 0 && ssl->options.verifyNone) {
-            WOLFSSL_MSG("Chain cert not verified by option, not adding as CA");
-        }
-        else if (ret == 0 && !AlreadySigner(ssl->ctx->cm, subjectHash)) {
-            DerBuffer* add = NULL;
-            ret = AllocDer(&add, myCert.length, CA_TYPE, ssl->heap);
-            if (ret < 0) {
-            #ifdef WOLFSSL_SMALL_STACK
-                XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            #endif
-                return ret;
-            }
-
-            WOLFSSL_MSG("Adding CA from chain");
-
-            XMEMCPY(add->buffer, myCert.buffer, myCert.length);
-
-            /* already verified above */
-            ret = AddCA(ssl->ctx->cm, &add, WOLFSSL_CHAIN_CA, 0);
-            if (ret == 1) ret = 0;   /* SSL_SUCCESS for external */
-        }
-        else if (ret != 0) {
-            WOLFSSL_MSG("Failed to verify CA from chain");
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_INVALID_CA;
-        #endif
-        }
-        else {
-            WOLFSSL_MSG("Verified CA from chain and already had it");
-        }
-
-#if defined(HAVE_OCSP) || defined(HAVE_CRL)
-        if (ret == 0) {
-            int doCrlLookup = 1;
-
-#ifdef HAVE_OCSP
-        #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
-            if (ssl->status_request_v2)
-                ret = TLSX_CSR2_InitRequests(ssl->extensions, dCert, 0,
-                                                                     ssl->heap);
-            else /* skips OCSP and force CRL check */
-        #endif
-            if (ssl->ctx->cm->ocspEnabled && ssl->ctx->cm->ocspCheckAll) {
-                WOLFSSL_MSG("Doing Non Leaf OCSP check");
-                ret = CheckCertOCSP(ssl->ctx->cm->ocsp, dCert, NULL);
-                doCrlLookup = (ret == OCSP_CERT_UNKNOWN);
-                if (ret != 0) {
-                    doCrlLookup = 0;
-                    WOLFSSL_MSG("\tOCSP Lookup not ok");
-                }
-            }
-#endif /* HAVE_OCSP */
-
-#ifdef HAVE_CRL
-            if (ret == 0 && doCrlLookup && ssl->ctx->cm->crlEnabled
-                                                 && ssl->ctx->cm->crlCheckAll) {
-                WOLFSSL_MSG("Doing Non Leaf CRL check");
-                ret = CheckCertCRL(ssl->ctx->cm->crl, dCert);
-
-                if (ret != 0) {
-                    WOLFSSL_MSG("\tCRL check not ok");
-                }
-            }
-#else
-            (void)doCrlLookup;
-#endif /* HAVE_CRL */
-        }
-#endif /* HAVE_OCSP || HAVE_CRL */
-
-        if (ret != 0 && anyError == 0)
-            anyError = ret;   /* save error from last time */
-
-        FreeDecodedCert(dCert);
-        count--;
-    }
-
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    } /* end of if (haveTrustPeer) -- a check for if already verified */
-#endif
-
-    /* peer's, may not have one if blank client cert sent by TLSv1.2 */
-    if (count) {
-        buffer myCert = certs[0];
-        int    fatal  = 0;
-
-        WOLFSSL_MSG("Verifying Peer's cert");
-
-#ifdef WOLFSSL_TRUST_PEER_CERT
-        if (!haveTrustPeer) { /* do not parse again if previously verified */
-#endif
-        InitDecodedCert(dCert, myCert.buffer, myCert.length, ssl->heap);
-        ret = ParseCertRelative(dCert, CERT_TYPE, !ssl->options.verifyNone,
-                                ssl->ctx->cm);
-#ifdef WOLFSSL_TRUST_PEER_CERT
-        }
-#endif
-
-        if (ret == 0) {
-            WOLFSSL_MSG("Verified Peer's cert");
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_OK;
-        #endif
-            fatal = 0;
-        }
-        else if (ret == ASN_PARSE_E) {
-            WOLFSSL_MSG("Got Peer cert ASN PARSE ERROR, fatal");
-            fatal = 1;
-        }
-        else {
-            WOLFSSL_MSG("Failed to verify Peer's cert");
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE;
-        #endif
-            if (ssl->verifyCallback) {
-                WOLFSSL_MSG("\tCallback override available, will continue");
-                fatal = 0;
-            }
-            else {
-                WOLFSSL_MSG("\tNo callback override available, fatal");
-                fatal = 1;
-            }
-        }
-
-#ifdef HAVE_SECURE_RENEGOTIATION
-        if (fatal == 0 && ssl->secure_renegotiation
-                       && ssl->secure_renegotiation->enabled) {
-
-            if (IsEncryptionOn(ssl, 0)) {
-                /* compare against previous time */
-                if (XMEMCMP(dCert->subjectHash,
-                            ssl->secure_renegotiation->subject_hash,
-                            SHA_DIGEST_SIZE) != 0) {
-                    WOLFSSL_MSG("Peer sent different cert during scr, fatal");
-                    fatal = 1;
-                    ret   = SCR_DIFFERENT_CERT_E;
-                }
-            }
-
-            /* cache peer's hash */
-            if (fatal == 0) {
-                XMEMCPY(ssl->secure_renegotiation->subject_hash,
-                        dCert->subjectHash, SHA_DIGEST_SIZE);
-            }
-        }
-#endif
-
-#if defined(HAVE_OCSP) || defined(HAVE_CRL)
-        if (fatal == 0) {
-            int doLookup = 1;
-
-            if (ssl->options.side == WOLFSSL_CLIENT_END) {
-#ifdef HAVE_CERTIFICATE_STATUS_REQUEST
-                if (ssl->status_request) {
-                    fatal = TLSX_CSR_InitRequest(ssl->extensions, dCert,
-                                                                     ssl->heap);
-                    doLookup = 0;
-                }
-#endif
-#ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
-                if (ssl->status_request_v2) {
-                    fatal = TLSX_CSR2_InitRequests(ssl->extensions, dCert, 1,
-                                                                     ssl->heap);
-                    doLookup = 0;
-                }
-#endif
-            }
-
-#ifdef HAVE_OCSP
-            if (doLookup && ssl->ctx->cm->ocspEnabled) {
-                WOLFSSL_MSG("Doing Leaf OCSP check");
-                ret = CheckCertOCSP(ssl->ctx->cm->ocsp, dCert, NULL);
-                doLookup = (ret == OCSP_CERT_UNKNOWN);
-                if (ret != 0) {
-                    WOLFSSL_MSG("\tOCSP Lookup not ok");
-                    fatal = 0;
-        #ifdef OPENSSL_EXTRA
-                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
-        #endif
-                }
-            }
-#endif /* HAVE_OCSP */
-
-#ifdef HAVE_CRL
-            if (doLookup && ssl->ctx->cm->crlEnabled) {
-                WOLFSSL_MSG("Doing Leaf CRL check");
-                ret = CheckCertCRL(ssl->ctx->cm->crl, dCert);
-                if (ret != 0) {
-                    WOLFSSL_MSG("\tCRL check not ok");
-                    fatal = 0;
-        #ifdef OPENSSL_EXTRA
-                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
-        #endif
-                }
-            }
-#endif /* HAVE_CRL */
-            (void)doLookup;
-        }
-#endif /* HAVE_OCSP || HAVE_CRL */
-
-#ifdef KEEP_PEER_CERT
+    switch(ssl->options.asyncState)
+    {
+        case TLS_ASYNC_BEGIN:
         {
-            /* set X509 format for peer cert even if fatal */
-            int copyRet = CopyDecodedToX509(&ssl->peerCert, dCert);
-            if (copyRet == MEMORY_E)
-                fatal = 1;
-        }
-#endif
+            word32 listSz;
 
-#ifndef IGNORE_KEY_EXTENSIONS
-        if (dCert->extKeyUsageSet) {
-            if ((ssl->specs.kea == rsa_kea) &&
-                (ssl->options.side == WOLFSSL_CLIENT_END) &&
-                (dCert->extKeyUsage & KEYUSE_KEY_ENCIPHER) == 0) {
-                ret = KEYUSE_ENCIPHER_E;
-            }
-            if ((ssl->specs.sig_algo == rsa_sa_algo ||
-                    (ssl->specs.sig_algo == ecc_dsa_sa_algo &&
-                         !ssl->specs.static_ecdh)) &&
-                (dCert->extKeyUsage & KEYUSE_DIGITAL_SIG) == 0) {
-                WOLFSSL_MSG("KeyUse Digital Sig not set");
-                ret = KEYUSE_SIGNATURE_E;
-            }
-        }
-
-        if (dCert->extExtKeyUsageSet) {
-            if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                if ((dCert->extExtKeyUsage &
-                        (EXTKEYUSE_ANY | EXTKEYUSE_SERVER_AUTH)) == 0) {
-                    WOLFSSL_MSG("ExtKeyUse Server Auth not set");
-                    ret = EXTKEYUSE_AUTH_E;
-                }
-            }
-            else {
-                if ((dCert->extExtKeyUsage &
-                        (EXTKEYUSE_ANY | EXTKEYUSE_CLIENT_AUTH)) == 0) {
-                    WOLFSSL_MSG("ExtKeyUse Client Auth not set");
-                    ret = EXTKEYUSE_AUTH_E;
-                }
-            }
-        }
-#endif /* IGNORE_KEY_EXTENSIONS */
-
-        if (fatal) {
-            FreeDecodedCert(dCert);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #ifdef WOLFSSL_CALLBACKS
+            if (ssl->hsInfoOn)
+                AddPacketName("Certificate", &ssl->handShakeInfo);
+            if (ssl->toInfoOn)
+                AddLateName("Certificate", &ssl->timeoutInfo);
         #endif
-            ssl->error = ret;
+
+            /* allocate buffer for certs */
+            args->certs = (buffer*)XMALLOC(sizeof(buffer) * MAX_CHAIN_DEPTH,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            if (args->certs == NULL) {
+                ERROR_OUT(MEMORY_E, exit_dc);
+            }
+
+            if ((args->idx - args->begin) + OPAQUE24_LEN > size) {
+                ERROR_OUT(BUFFER_ERROR, exit_dc);
+            }
+
+            c24to32(input + args->idx, &listSz);
+            args->idx += OPAQUE24_LEN;
+
+            if (listSz > MAX_RECORD_SIZE) {
+                ERROR_OUT(BUFFER_ERROR, exit_dc);
+            }
+
+            if ((args->idx - args->begin) + listSz != size) {
+                ERROR_OUT(BUFFER_ERROR, exit_dc);
+            }
+
+            WOLFSSL_MSG("Loading peer's cert chain");
+            /* first put cert chain into buffer so can verify top down
+               we're sent bottom up */
+            while (listSz) {
+                word32 certSz;
+
+                if (args->totalCerts >= MAX_CHAIN_DEPTH) {
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_ERR_CERT_CHAIN_TOO_LONG;
+                #endif
+                    ERROR_OUT(MAX_CHAIN_ERROR, exit_dc);
+                }
+
+                if ((args->idx - args->begin) + OPAQUE24_LEN > size) {
+                    ERROR_OUT(BUFFER_ERROR, exit_dc);
+                }
+
+                c24to32(input + args->idx, &certSz);
+                args->idx += OPAQUE24_LEN;
+
+                if ((args->idx - args->begin) + certSz > size) {
+                    ERROR_OUT(BUFFER_ERROR, exit_dc);
+                }
+
+                args->certs[args->totalCerts].length = certSz;
+                args->certs[args->totalCerts].buffer = input + args->idx;
+
+            #ifdef SESSION_CERTS
+                if (ssl->session.chain.count < MAX_CHAIN_DEPTH &&
+                                               certSz < MAX_X509_SIZE) {
+                    ssl->session.chain.certs[
+                        ssl->session.chain.count].length = certSz;
+                    XMEMCPY(ssl->session.chain.certs[
+                        ssl->session.chain.count].buffer,
+                            input + args->idx, certSz);
+                    ssl->session.chain.count++;
+                }
+                else {
+                    WOLFSSL_MSG("Couldn't store chain cert for session");
+                }
+            #endif /* SESSION_CERTS */
+
+                args->idx += certSz;
+                listSz -= certSz + CERT_HEADER_SZ;
+
+                args->totalCerts++;
+                WOLFSSL_MSG("\tPut another cert into chain");
+            } /* while (listSz) */
+
+            args->count = args->totalCerts;
+            args->certIdx = 0;
+
+            args->dCertInit = 0;
+            args->dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), ssl->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+            if (args->dCert == NULL) {
+                ERROR_OUT(MEMORY_E, exit_dc);
+            }
+
+            /* Advance state and proceed */
+            ssl->options.asyncState = TLS_ASYNC_BUILD;
+        } /* case TLS_ASYNC_BEGIN */
+
+        case TLS_ASYNC_BUILD:
+        {
+            if (args->count > 0) {
+            #ifdef WOLFSSL_TRUST_PEER_CERT
+                if (args->certIdx == 0) {
+                    /* if using trusted peer certs check before verify chain
+                       and CA test */
+                    TrustedPeerCert* tp;
+
+                    if (!args->dCertInit) {
+                        InitDecodedCert(args->dCert,
+                            args->certs[args->certIdx].buffer,
+                            args->certs[args->certIdx].length, ssl->heap);
+                        args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
+                        args->dCertInit = 1;
+                    }
+
+                    ret = ParseCertRelative(args->dCert, CERT_TYPE, 0,
+                                                                ssl->ctx->cm);
+                    if (ret != 0) {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPush(ssl,
+                                args->dCert->sigCtx.asyncDev,
+                                WC_ASYNC_FLAG_CALL_AGAIN);
+                        }
+                    #endif
+                        goto exit_dc;
+                    }
+
+                #ifndef NO_SKID
+                    if (args->dCert->extAuthKeyIdSet) {
+                        tp = GetTrustedPeer(ssl->ctx->cm,
+                                    args->dCert->extSubjKeyId, WC_MATCH_SKID);
+                    }
+                    else { /* if the cert has no SKID try to match by name */
+                        tp = GetTrustedPeer(ssl->ctx->cm,
+                                    args->dCert->subjectHash, WC_MATCH_NAME);
+                    }
+                #else /* NO_SKID */
+                    tp = GetTrustedPeer(ssl->ctx->cm, args->dCert->subjectHash,
+                                                                 WC_MATCH_NAME);
+                #endif /* NO SKID */
+                    WOLFSSL_MSG("Checking for trusted peer cert");
+
+                    if (tp == NULL) {
+                        /* no trusted peer cert */
+                        WOLFSSL_MSG("No matching trusted peer cert. "
+                            "Checking CAs");
+                        FreeDecodedCert(args->dCert);
+                        args->dCertInit = 0;
+                    } else if (MatchTrustedPeer(tp, args->dCert)){
+                        WOLFSSL_MSG("Found matching trusted peer cert");
+                        haveTrustPeer = 1;
+                    } else {
+                        WOLFSSL_MSG("Trusted peer cert did not match!");
+                        FreeDecodedCert(args->dCert);
+                        args->dCertInit = 0;
+                    }
+                }
+            #endif /* WOLFSSL_TRUST_PEER_CERT */
+
+                /* verify up to peer's first */
+                /* do not verify chain if trusted peer cert found */
+                while (args->count > 1
+                #ifdef WOLFSSL_TRUST_PEER_CERT
+                    && !haveTrustPeer
+                #endif /* WOLFSSL_TRUST_PEER_CERT */
+                ) {
+                    byte* subjectHash;
+
+                    args->certIdx = args->count - 1;
+
+                    if (!args->dCertInit) {
+                        InitDecodedCert(args->dCert,
+                            args->certs[args->certIdx].buffer,
+                            args->certs[args->certIdx].length, ssl->heap);
+                        args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
+                        args->dCertInit = 1;
+                    }
+
+                    ret = ParseCertRelative(args->dCert, CERT_TYPE,
+                                    !ssl->options.verifyNone, ssl->ctx->cm);
+                    if (ret != 0) {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPush(ssl,
+                                args->dCert->sigCtx.asyncDev,
+                                WC_ASYNC_FLAG_CALL_AGAIN);
+                        }
+                    #endif
+                        goto exit_dc;
+                    }
+
+                #ifndef NO_SKID
+                    subjectHash = args->dCert->extSubjKeyId;
+                #else
+                    subjectHash = args->dCert->subjectHash;
+                #endif
+
+                    /* Check key sizes for certs. Is redundent check since
+                       ProcessBuffer also performs this check. */
+                    if (!ssl->options.verifyNone) {
+                        switch (args->dCert->keyOID) {
+                        #ifndef NO_RSA
+                            case RSAk:
+                                if (ssl->options.minRsaKeySz < 0 ||
+                                        args->dCert->pubKeySize <
+                                         (word16)ssl->options.minRsaKeySz) {
+                                    WOLFSSL_MSG(
+                                        "RSA key size in cert chain error");
+                                    ret = RSA_KEY_SIZE_E;
+                                }
+                                break;
+                        #endif /* !NO_RSA */
+                        #ifdef HAVE_ECC
+                            case ECDSAk:
+                                if (ssl->options.minEccKeySz < 0 ||
+                                        args->dCert->pubKeySize <
+                                         (word16)ssl->options.minEccKeySz) {
+                                    WOLFSSL_MSG(
+                                        "ECC key size in cert chain error");
+                                    ret = ECC_KEY_SIZE_E;
+                                }
+                                break;
+                        #endif /* HAVE_ECC */
+                            default:
+                                WOLFSSL_MSG("Key size not checked");
+                                /* key not being checked for size if not in
+                                   switch */
+                                break;
+                        } /* switch (dCert->keyOID) */
+                    } /* if (!ssl->options.verifyNone) */
+
+                    if (ret == 0 && args->dCert->isCA == 0) {
+                        WOLFSSL_MSG("Chain cert is not a CA, not adding as one");
+                    }
+                    else if (ret == 0 && ssl->options.verifyNone) {
+                        WOLFSSL_MSG("Chain cert not verified by option, not adding as CA");
+                    }
+                    else if (ret == 0 && !AlreadySigner(ssl->ctx->cm, subjectHash)) {
+                        DerBuffer* add = NULL;
+                        ret = AllocDer(&add, args->certs[args->certIdx].length,
+                                                            CA_TYPE, ssl->heap);
+                        if (ret < 0)
+                            goto exit_dc;
+
+                        WOLFSSL_MSG("Adding CA from chain");
+
+                        XMEMCPY(add->buffer, args->certs[args->certIdx].buffer,
+                                             args->certs[args->certIdx].length);
+
+                        /* already verified above */
+                        ret = AddCA(ssl->ctx->cm, &add, WOLFSSL_CHAIN_CA, 0);
+                        if (ret == 1) {
+                            ret = 0;   /* SSL_SUCCESS for external */
+                        }
+                    }
+                    else if (ret != 0) {
+                        WOLFSSL_MSG("Failed to verify CA from chain");
+                    #ifdef OPENSSL_EXTRA
+                        ssl->peerVerifyRet = X509_V_ERR_INVALID_CA;
+                    #endif
+                    }
+                    else {
+                        WOLFSSL_MSG("Verified CA from chain and already had it");
+                    }
+
+            #if defined(HAVE_OCSP) || defined(HAVE_CRL)
+                    if (ret == 0) {
+                        int doCrlLookup = 1;
+                #ifdef HAVE_OCSP
+                    #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+                        if (ssl->status_request_v2) {
+                            ret = TLSX_CSR2_InitRequests(ssl->extensions,
+                                                    args->dCert, 0, ssl->heap);
+                        }
+                        else /* skips OCSP and force CRL check */
+                    #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
+                        if (ssl->ctx->cm->ocspEnabled &&
+                                            ssl->ctx->cm->ocspCheckAll) {
+                            WOLFSSL_MSG("Doing Non Leaf OCSP check");
+                            ret = CheckCertOCSP(ssl->ctx->cm->ocsp, args->dCert,
+                                                                          NULL);
+                            doCrlLookup = (ret == OCSP_CERT_UNKNOWN);
+                            if (ret != 0) {
+                                doCrlLookup = 0;
+                                WOLFSSL_MSG("\tOCSP Lookup not ok");
+                            }
+                        }
+                #endif /* HAVE_OCSP */
+
+                #ifdef HAVE_CRL
+                        if (ret == 0 && doCrlLookup &&
+                                    ssl->ctx->cm->crlEnabled &&
+                                                ssl->ctx->cm->crlCheckAll) {
+                            WOLFSSL_MSG("Doing Non Leaf CRL check");
+                            ret = CheckCertCRL(ssl->ctx->cm->crl, args->dCert);
+                            if (ret != 0) {
+                                WOLFSSL_MSG("\tCRL check not ok");
+                            }
+                        }
+                #endif /* HAVE_CRL */
+                        (void)doCrlLookup;
+                    }
+            #endif /* HAVE_OCSP || HAVE_CRL */
+
+                    if (ret != 0 && lastErr == 0) {
+                        lastErr = ret;   /* save error from last time */
+                    }
+
+                    FreeDecodedCert(args->dCert);
+                    args->dCertInit = 0;
+                    args->count--;
+                } /* while (count > 0 && !haveTrustPeer) */
+            } /* if (count > 0) */
+
+            /* Check for error */
+            if (ret != 0) {
+                goto exit_dc;
+            }
+
+            /* Advance state and proceed */
+            ssl->options.asyncState = TLS_ASYNC_DO;
+        } /* case TLS_ASYNC_BUILD */
+
+        case TLS_ASYNC_DO:
+        {
+            /* peer's, may not have one if blank client cert sent by TLSv1.2 */
+            if (args->count > 0) {
+                int fatal  = 0;
+
+                WOLFSSL_MSG("Verifying Peer's cert");
+
+                args->certIdx = 0;
+
+                if (!args->dCertInit) {
+                    InitDecodedCert(args->dCert,
+                        args->certs[args->certIdx].buffer,
+                        args->certs[args->certIdx].length, ssl->heap);
+                    args->dCertInit = 1;
+                }
+
+            #ifdef WOLFSSL_TRUST_PEER_CERT
+                if (!haveTrustPeer)
+            #endif
+                { /* only parse if not already present in dCert from above */
+                    ret = ParseCertRelative(args->dCert, CERT_TYPE,
+                                    !ssl->options.verifyNone, ssl->ctx->cm);
+                    if (ret != 0) {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPush(ssl,
+                                args->dCert->sigCtx.asyncDev,
+                                WC_ASYNC_FLAG_CALL_AGAIN);
+                        }
+                    #endif
+                        goto exit_dc;
+                    }
+                }
+
+                if (ret == 0) {
+                    WOLFSSL_MSG("Verified Peer's cert");
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_OK;
+                #endif
+                    fatal = 0;
         #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
         #endif
-            return ret;
-        }
-        ssl->options.havePeerCert = 1;
-
-#ifdef WOLFSSL_SMALL_STACK
-        domain = (char*)XMALLOC(ASN_NAME_MAX, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        if (domain == NULL) {
-            FreeDecodedCert(dCert);
-            XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            return MEMORY_E;
-        }
-#endif
-        /* store for callback use */
-        if (dCert->subjectCNLen < ASN_NAME_MAX) {
-            XMEMCPY(domain, dCert->subjectCN, dCert->subjectCNLen);
-            domain[dCert->subjectCNLen] = '\0';
-        }
-        else
-            domain[0] = '\0';
-
-        if (!ssl->options.verifyNone && ssl->buffers.domainName.buffer) {
-            if (MatchDomainName(dCert->subjectCN, dCert->subjectCNLen,
-                                (char*)ssl->buffers.domainName.buffer) == 0) {
-                WOLFSSL_MSG("DomainName match on common name failed");
-                if (CheckAltNames(dCert,
-                                 (char*)ssl->buffers.domainName.buffer) == 0 ) {
-                    WOLFSSL_MSG("DomainName match on alt names failed too");
-                    ret = DOMAIN_NAME_MISMATCH; /* try to get peer key still */
                 }
+                else if (ret == ASN_PARSE_E) {
+                    WOLFSSL_MSG("Got Peer cert ASN PARSE ERROR, fatal");
+                    fatal = 1;
+                }
+                else {
+                    WOLFSSL_MSG("Failed to verify Peer's cert");
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE;
+                #endif
+                    if (ssl->verifyCallback) {
+                        WOLFSSL_MSG(
+                            "\tCallback override available, will continue");
+                        fatal = 0;
+                    }
+                    else {
+                        WOLFSSL_MSG("\tNo callback override available, fatal");
+                        fatal = 1;
+                    }
+                }
+
+            #ifdef HAVE_SECURE_RENEGOTIATION
+                if (fatal == 0 && ssl->secure_renegotiation
+                               && ssl->secure_renegotiation->enabled) {
+
+                    if (IsEncryptionOn(ssl, 0)) {
+                        /* compare against previous time */
+                        if (XMEMCMP(args->dCert->subjectHash,
+                                    ssl->secure_renegotiation->subject_hash,
+                                    SHA_DIGEST_SIZE) != 0) {
+                            WOLFSSL_MSG(
+                                "Peer sent different cert during scr, fatal");
+                            fatal = 1;
+                            ret   = SCR_DIFFERENT_CERT_E;
+                        }
+                    }
+
+                    /* cache peer's hash */
+                    if (fatal == 0) {
+                        XMEMCPY(ssl->secure_renegotiation->subject_hash,
+                                args->dCert->subjectHash, SHA_DIGEST_SIZE);
+                    }
+                }
+            #endif /* HAVE_SECURE_RENEGOTIATION */
+
+            #if defined(HAVE_OCSP) || defined(HAVE_CRL)
+                if (fatal == 0) {
+                    int doLookup = 1;
+
+                    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+                #ifdef HAVE_CERTIFICATE_STATUS_REQUEST
+                        if (ssl->status_request) {
+                            fatal = TLSX_CSR_InitRequest(ssl->extensions,
+                                                    args->dCert, ssl->heap);
+                            doLookup = 0;
+                        }
+                #endif /* HAVE_CERTIFICATE_STATUS_REQUEST */
+                #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+                        if (ssl->status_request_v2) {
+                            fatal = TLSX_CSR2_InitRequests(ssl->extensions,
+                                                    args->dCert, 1, ssl->heap);
+                            doLookup = 0;
+                        }
+                #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
+                    }
+
+                #ifdef HAVE_OCSP
+                    if (doLookup && ssl->ctx->cm->ocspEnabled) {
+                        WOLFSSL_MSG("Doing Leaf OCSP check");
+                        ret = CheckCertOCSP(ssl->ctx->cm->ocsp,
+                                                            args->dCert, NULL);
+                        doLookup = (ret == OCSP_CERT_UNKNOWN);
+                        if (ret != 0) {
+                            WOLFSSL_MSG("\tOCSP Lookup not ok");
+                            fatal = 0;
+                        #ifdef OPENSSL_EXTRA
+                            ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                        #endif
+                        }
+                    }
+                #endif /* HAVE_OCSP */
+
+                #ifdef HAVE_CRL
+                    if (doLookup && ssl->ctx->cm->crlEnabled) {
+                        WOLFSSL_MSG("Doing Leaf CRL check");
+                        ret = CheckCertCRL(ssl->ctx->cm->crl, args->dCert);
+                        if (ret != 0) {
+                            WOLFSSL_MSG("\tCRL check not ok");
+                            fatal = 0;
+                        #ifdef OPENSSL_EXTRA
+                            ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                        #endif
+                        }
+                    }
+                #endif /* HAVE_CRL */
+                    (void)doLookup;
+                }
+            #endif /* HAVE_OCSP || HAVE_CRL */
+
+            #ifdef KEEP_PEER_CERT
+                {
+                    /* set X509 format for peer cert even if fatal */
+                    int copyRet = CopyDecodedToX509(&ssl->peerCert,
+                                                                args->dCert);
+                    if (copyRet == MEMORY_E)
+                        fatal = 1;
+                }
+            #endif /* KEEP_PEER_CERT */
+
+            #ifndef IGNORE_KEY_EXTENSIONS
+                if (args->dCert->extKeyUsageSet) {
+                    if ((ssl->specs.kea == rsa_kea) &&
+                        (ssl->options.side == WOLFSSL_CLIENT_END) &&
+                        (args->dCert->extKeyUsage & KEYUSE_KEY_ENCIPHER) == 0) {
+                        ret = KEYUSE_ENCIPHER_E;
+                    }
+                    if ((ssl->specs.sig_algo == rsa_sa_algo ||
+                            (ssl->specs.sig_algo == ecc_dsa_sa_algo &&
+                                 !ssl->specs.static_ecdh)) &&
+                        (args->dCert->extKeyUsage & KEYUSE_DIGITAL_SIG) == 0) {
+                        WOLFSSL_MSG("KeyUse Digital Sig not set");
+                        ret = KEYUSE_SIGNATURE_E;
+                    }
+                }
+
+                if (args->dCert->extExtKeyUsageSet) {
+                    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+                        if ((args->dCert->extExtKeyUsage &
+                                (EXTKEYUSE_ANY | EXTKEYUSE_SERVER_AUTH)) == 0) {
+                            WOLFSSL_MSG("ExtKeyUse Server Auth not set");
+                            ret = EXTKEYUSE_AUTH_E;
+                        }
+                    }
+                    else {
+                        if ((args->dCert->extExtKeyUsage &
+                                (EXTKEYUSE_ANY | EXTKEYUSE_CLIENT_AUTH)) == 0) {
+                            WOLFSSL_MSG("ExtKeyUse Client Auth not set");
+                            ret = EXTKEYUSE_AUTH_E;
+                        }
+                    }
+                }
+            #endif /* IGNORE_KEY_EXTENSIONS */
+
+                if (fatal) {
+                    ssl->error = ret;
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                #endif
+                    goto exit_dc;
+                }
+
+                ssl->options.havePeerCert = 1;
+            } /* if (count > 0) */
+
+            /* Check for error */
+            if (ret != 0) {
+                goto exit_dc;
             }
-        }
 
-        /* decode peer key */
-        switch (dCert->keyOID) {
-        #ifndef NO_RSA
-            case RSAk:
-                {
-                    word32 idx = 0;
-                    int    keyRet = 0;
+            /* Advance state and proceed */
+            ssl->options.asyncState = TLS_ASYNC_VERIFY;
+        } /* case TLS_ASYNC_DO */
+
+        case TLS_ASYNC_VERIFY:
+        {
+            if (args->count > 0) {
+                args->domain = (char*)XMALLOC(ASN_NAME_MAX, ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+                if (args->domain == NULL) {
+                    ERROR_OUT(MEMORY_E, exit_dc);
+                }
+
+                /* store for callback use */
+                if (args->dCert->subjectCNLen < ASN_NAME_MAX) {
+                    XMEMCPY(args->domain, args->dCert->subjectCN, args->dCert->subjectCNLen);
+                    args->domain[args->dCert->subjectCNLen] = '\0';
+                }
+                else {
+                    args->domain[0] = '\0';
+                }
+
+                if (!ssl->options.verifyNone && ssl->buffers.domainName.buffer) {
+                    if (MatchDomainName(args->dCert->subjectCN,
+                                args->dCert->subjectCNLen,
+                                (char*)ssl->buffers.domainName.buffer) == 0) {
+                        WOLFSSL_MSG("DomainName match on common name failed");
+                        if (CheckAltNames(args->dCert,
+                                 (char*)ssl->buffers.domainName.buffer) == 0 ) {
+                            WOLFSSL_MSG(
+                                "DomainName match on alt names failed too");
+                            /* try to get peer key still */
+                            ret = DOMAIN_NAME_MISMATCH;
+                        }
+                    }
+                }
+
+                /* decode peer key */
+                switch (args->dCert->keyOID) {
+                #ifndef NO_RSA
+                    case RSAk:
+                    {
+                        word32 keyIdx = 0;
+                        int keyRet = 0;
 
-                    if (ssl->peerRsaKey == NULL) {
-                        ssl->peerRsaKey = (RsaKey*)XMALLOC(sizeof(RsaKey),
-                                                   ssl->heap, DYNAMIC_TYPE_RSA);
                         if (ssl->peerRsaKey == NULL) {
-                            WOLFSSL_MSG("PeerRsaKey Memory error");
-                            keyRet = MEMORY_E;
-                        } else {
+                            keyRet = AllocKey(ssl, DYNAMIC_TYPE_RSA,
+                                                (void**)&ssl->peerRsaKey);
+                        } else if (ssl->peerRsaKeyPresent) {
+                            /* don't leak on reuse */
+                            wc_FreeRsaKey(ssl->peerRsaKey);
+                            ssl->peerRsaKeyPresent = 0;
                             keyRet = wc_InitRsaKey_ex(ssl->peerRsaKey,
-                                                       ssl->heap, ssl->devId);
+                                                    ssl->heap, ssl->devId);
                         }
-                    } else if (ssl->peerRsaKeyPresent) {
-                        /* don't leak on reuse */
-                        wc_FreeRsaKey(ssl->peerRsaKey);
-                        ssl->peerRsaKeyPresent = 0;
-                        keyRet = wc_InitRsaKey_ex(ssl->peerRsaKey, ssl->heap, ssl->devId);
-                    }
 
-                    if (keyRet != 0 || wc_RsaPublicKeyDecode(dCert->publicKey,
-                               &idx, ssl->peerRsaKey, dCert->pubKeySize) != 0) {
-                        ret = PEER_KEY_ERROR;
-                    }
-                    else {
-                        ssl->peerRsaKeyPresent = 1;
-                        #ifdef HAVE_PK_CALLBACKS
-                            #ifndef NO_RSA
-                                ssl->buffers.peerRsaKey.buffer =
-                                       (byte*)XMALLOC(dCert->pubKeySize,
-                                               ssl->heap, DYNAMIC_TYPE_RSA);
-                                if (ssl->buffers.peerRsaKey.buffer == NULL)
-                                    ret = MEMORY_ERROR;
-                                else {
-                                    XMEMCPY(ssl->buffers.peerRsaKey.buffer,
-                                           dCert->publicKey, dCert->pubKeySize);
-                                    ssl->buffers.peerRsaKey.length =
-                                            dCert->pubKeySize;
-                                }
-                            #endif /* NO_RSA */
-                        #endif /*HAVE_PK_CALLBACKS */
-                    }
+                        if (keyRet != 0 || wc_RsaPublicKeyDecode(
+                                args->dCert->publicKey, &keyIdx, ssl->peerRsaKey,
+                                                args->dCert->pubKeySize) != 0) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            ssl->peerRsaKeyPresent = 1;
+                    #ifdef HAVE_PK_CALLBACKS
+                        #ifndef NO_RSA
+                            ssl->buffers.peerRsaKey.buffer =
+                                   (byte*)XMALLOC(args->dCert->pubKeySize,
+                                                ssl->heap, DYNAMIC_TYPE_RSA);
+                            if (ssl->buffers.peerRsaKey.buffer == NULL) {
+                                ret = MEMORY_ERROR;
+                            }
+                            else {
+                                XMEMCPY(ssl->buffers.peerRsaKey.buffer,
+                                        args->dCert->publicKey,
+                                        args->dCert->pubKeySize);
+                                ssl->buffers.peerRsaKey.length =
+                                    args->dCert->pubKeySize;
+                            }
+                        #endif /* NO_RSA */
+                    #endif /* HAVE_PK_CALLBACKS */
+                        }
 
-                    /* check size of peer RSA key */
-                    if (ret == 0 && ssl->peerRsaKeyPresent &&
-                                              !ssl->options.verifyNone &&
-                                              wc_RsaEncryptSize(ssl->peerRsaKey)
+                        /* check size of peer RSA key */
+                        if (ret == 0 && ssl->peerRsaKeyPresent &&
+                                          !ssl->options.verifyNone &&
+                                          wc_RsaEncryptSize(ssl->peerRsaKey)
                                               < ssl->options.minRsaKeySz) {
-                        ret = RSA_KEY_SIZE_E;
-                        WOLFSSL_MSG("Peer RSA key is too small");
-                    }
-
-                }
-                break;
-        #endif /* NO_RSA */
-        #ifdef HAVE_NTRU
-            case NTRUk:
-                {
-                    if (dCert->pubKeySize > sizeof(ssl->peerNtruKey)) {
-                        ret = PEER_KEY_ERROR;
-                    }
-                    else {
-                        XMEMCPY(ssl->peerNtruKey, dCert->publicKey,
-                                                             dCert->pubKeySize);
-                        ssl->peerNtruKeyLen = (word16)dCert->pubKeySize;
-                        ssl->peerNtruKeyPresent = 1;
-                    }
-                }
-                break;
-        #endif /* HAVE_NTRU */
-        #ifdef HAVE_ECC
-            case ECDSAk:
-                {
-                    int curveId;
-                    if (ssl->peerEccDsaKey == NULL) {
-                        /* alloc/init on demand */
-                        ssl->peerEccDsaKey = (ecc_key*)XMALLOC(sizeof(ecc_key),
-                                              ssl->heap, DYNAMIC_TYPE_ECC);
-                        if (ssl->peerEccDsaKey == NULL) {
-                            WOLFSSL_MSG("PeerEccDsaKey Memory error");
-                            return MEMORY_E;
+                            ret = RSA_KEY_SIZE_E;
+                            WOLFSSL_MSG("Peer RSA key is too small");
                         }
-                        wc_ecc_init_ex(ssl->peerEccDsaKey, ssl->heap,
-                                                                ssl->devId);
-                    } else if (ssl->peerEccDsaKeyPresent) {
-                        /* don't leak on reuse */
-                        wc_ecc_free(ssl->peerEccDsaKey);
-                        ssl->peerEccDsaKeyPresent = 0;
-                        wc_ecc_init_ex(ssl->peerEccDsaKey, ssl->heap,
-                                                                ssl->devId);
+                        break;
                     }
+                #endif /* NO_RSA */
+                #ifdef HAVE_NTRU
+                    case NTRUk:
+                    {
+                        if (args->dCert->pubKeySize > sizeof(ssl->peerNtruKey)) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            XMEMCPY(ssl->peerNtruKey, args->dCert->publicKey,
+                                                      args->dCert->pubKeySize);
+                            ssl->peerNtruKeyLen =
+                                (word16)args->dCert->pubKeySize;
+                            ssl->peerNtruKeyPresent = 1;
+                        }
+                        break;
+                    }
+                #endif /* HAVE_NTRU */
+                #ifdef HAVE_ECC
+                    case ECDSAk:
+                    {
+                        int curveId;
+                        if (ssl->peerEccDsaKey == NULL) {
+                            /* alloc/init on demand */
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                    (void**)&ssl->peerEccDsaKey);
+                        } else if (ssl->peerEccDsaKeyPresent) {
+                            /* don't leak on reuse */
+                            wc_ecc_free(ssl->peerEccDsaKey);
+                            ssl->peerEccDsaKeyPresent = 0;
+                            ret = wc_ecc_init_ex(ssl->peerEccDsaKey,
+                                                    ssl->heap, ssl->devId);
+                        }
+                        if (ret != 0) {
+                            break;
+                        }
 
-                    curveId = wc_ecc_get_oid(dCert->keyOID, NULL, NULL);
-                    if (wc_ecc_import_x963_ex(dCert->publicKey,
-                        dCert->pubKeySize, ssl->peerEccDsaKey, curveId) != 0) {
-                        ret = PEER_KEY_ERROR;
-                    }
-                    else {
-                        ssl->peerEccDsaKeyPresent = 1;
-                        #ifdef HAVE_PK_CALLBACKS
-                            #ifdef HAVE_ECC
-                                ssl->buffers.peerEccDsaKey.buffer =
-                                       (byte*)XMALLOC(dCert->pubKeySize,
-                                               ssl->heap, DYNAMIC_TYPE_ECC);
-                                if (ssl->buffers.peerEccDsaKey.buffer == NULL)
-                                    ret = MEMORY_ERROR;
-                                else {
-                                    XMEMCPY(ssl->buffers.peerEccDsaKey.buffer,
-                                           dCert->publicKey, dCert->pubKeySize);
-                                    ssl->buffers.peerEccDsaKey.length =
-                                            dCert->pubKeySize;
-                                }
-                            #endif /* HAVE_ECC */
-                        #endif /*HAVE_PK_CALLBACKS */
-                    }
+                        curveId = wc_ecc_get_oid(args->dCert->keyOID, NULL, NULL);
+                        if (wc_ecc_import_x963_ex(args->dCert->publicKey,
+                                    args->dCert->pubKeySize, ssl->peerEccDsaKey,
+                                                            curveId) != 0) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            ssl->peerEccDsaKeyPresent = 1;
+                    #ifdef HAVE_PK_CALLBACKS
+                        #ifdef HAVE_ECC
+                            ssl->buffers.peerEccDsaKey.buffer =
+                                   (byte*)XMALLOC(args->dCert->pubKeySize,
+                                           ssl->heap, DYNAMIC_TYPE_ECC);
+                            if (ssl->buffers.peerEccDsaKey.buffer == NULL)
+                                ret = MEMORY_ERROR;
+                            else {
+                                XMEMCPY(ssl->buffers.peerEccDsaKey.buffer,
+                                        args->dCert->publicKey,
+                                        args->dCert->pubKeySize);
+                                ssl->buffers.peerEccDsaKey.length =
+                                        args->dCert->pubKeySize;
+                            }
+                        #endif /* HAVE_ECC */
+                    #endif /*HAVE_PK_CALLBACKS */
+                        }
 
-                    /* check size of peer ECC key */
-                    if (ret == 0 && ssl->peerEccDsaKeyPresent &&
+                        /* check size of peer ECC key */
+                        if (ret == 0 && ssl->peerEccDsaKeyPresent &&
                                               !ssl->options.verifyNone &&
                                               wc_ecc_size(ssl->peerEccDsaKey)
                                               < ssl->options.minEccKeySz) {
-                        ret = ECC_KEY_SIZE_E;
-                        WOLFSSL_MSG("Peer ECC key is too small");
+                            ret = ECC_KEY_SIZE_E;
+                            WOLFSSL_MSG("Peer ECC key is too small");
+                        }
+                        break;
                     }
-
+                #endif /* HAVE_ECC */
+                    default:
+                        break;
                 }
-                break;
-        #endif /* HAVE_ECC */
-            default:
-                break;
-        }
 
-        FreeDecodedCert(dCert);
-    }
+                FreeDecodedCert(args->dCert);
+                args->dCertInit = 0;
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(sizeof(WOLFSSL_X509_STORE_CTX),
-                                                 NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (store == NULL) {
-        XFREE(domain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-    XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
-
-    if (anyError != 0 && ret == 0)
-        ret = anyError;
-
-    if (ret != 0) {
-        if (!ssl->options.verifyNone) {
-            int why = bad_certificate;
-
-            if (ret == ASN_AFTER_DATE_E || ret == ASN_BEFORE_DATE_E)
-                why = certificate_expired;
-            if (ssl->verifyCallback) {
-                int ok;
-
-                store->error = ret;
-                store->error_depth = totalCerts;
-                store->discardSessionCerts = 0;
-                store->domain = domain;
-                store->userCtx = ssl->verifyCbCtx;
-                store->certs = certs;
-                store->totalCerts = totalCerts;
-#ifdef KEEP_PEER_CERT
-                store->current_cert = &ssl->peerCert;
-#else
-                store->current_cert = NULL;
-#endif
-#if defined(HAVE_EX_DATA) || defined(HAVE_FORTRESS)
-                store->ex_data = ssl;
-#endif
-                ok = ssl->verifyCallback(0, store);
-                if (ok) {
-                    WOLFSSL_MSG("Verify callback overriding error!");
-                    ret = 0;
+                /* release since we don't need it anymore */
+                if (args->dCert) {
+                    XFREE(args->dCert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    args->dCert = NULL;
                 }
-                #ifdef SESSION_CERTS
-                if (store->discardSessionCerts) {
-                    WOLFSSL_MSG("Verify callback requested discard sess certs");
-                    ssl->session.chain.count = 0;
-                }
-                #endif
-            }
+            } /* if (count > 0) */
+
+            /* Check for error */
             if (ret != 0) {
-                SendAlert(ssl, alert_fatal, why);   /* try to send */
-                ssl->options.isClosed = 1;
+                goto exit_dc;
             }
+
+            /* Advance state and proceed */
+            ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+        } /* case TLS_ASYNC_VERIFY */
+
+        case TLS_ASYNC_FINALIZE:
+        {
+        #ifdef WOLFSSL_SMALL_STACK
+            WOLFSSL_X509_STORE_CTX* store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
+                                    sizeof(WOLFSSL_X509_STORE_CTX), ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+            if (store == NULL) {
+                ERROR_OUT(MEMORY_E, exit_dc);
+            }
+        #else
+            WOLFSSL_X509_STORE_CTX  store[1];
+        #endif
+
+            XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
+
+            /* load last error */
+            if (lastErr != 0 && ret == 0) {
+                ret = lastErr;
+            }
+
+            if (ret != 0) {
+                if (!ssl->options.verifyNone) {
+                    int why = bad_certificate;
+
+                    if (ret == ASN_AFTER_DATE_E || ret == ASN_BEFORE_DATE_E) {
+                        why = certificate_expired;
+                    }
+                    if (ssl->verifyCallback) {
+                        int ok;
+
+                        store->error = ret;
+                        store->error_depth = args->totalCerts;
+                        store->discardSessionCerts = 0;
+                        store->domain = args->domain;
+                        store->userCtx = ssl->verifyCbCtx;
+                        store->certs = args->certs;
+                        store->totalCerts = args->totalCerts;
+                    #ifdef KEEP_PEER_CERT
+                        if (ssl->peerCert.subject.sz > 0)
+                            store->current_cert = &ssl->peerCert;
+                        else
+                            store->current_cert = NULL;
+                    #else
+                        store->current_cert = NULL;
+                    #endif /* KEEP_PEER_CERT */
+                    #if defined(HAVE_EX_DATA) || defined(HAVE_FORTRESS)
+                        store->ex_data = ssl;
+                    #endif
+                        ok = ssl->verifyCallback(0, store);
+                        if (ok) {
+                            WOLFSSL_MSG("Verify callback overriding error!");
+                            ret = 0;
+                        }
+                    #ifdef SESSION_CERTS
+                        if (store->discardSessionCerts) {
+                            WOLFSSL_MSG("Verify callback requested discard sess certs");
+                            ssl->session.chain.count = 0;
+                        }
+                    #endif /* SESSION_CERTS */
+                    }
+                    if (ret != 0) {
+                        SendAlert(ssl, alert_fatal, why);   /* try to send */
+                        ssl->options.isClosed = 1;
+                    }
+                }
+                ssl->error = ret;
+            }
+        #ifdef WOLFSSL_ALWAYS_VERIFY_CB
+            else {
+                if (ssl->verifyCallback) {
+                    int ok;
+
+                    store->error = ret;
+                #ifdef WOLFSSL_WPAS
+                    store->error_depth = 0;
+                #else
+                    store->error_depth = args->totalCerts;
+                #endif
+                    store->discardSessionCerts = 0;
+                    store->domain = args->domain;
+                    store->userCtx = ssl->verifyCbCtx;
+                    store->certs = args->certs;
+                    store->totalCerts = args->totalCerts;
+                #ifdef KEEP_PEER_CERT
+                    if (ssl->peerCert.subject.sz > 0)
+                        store->current_cert = &ssl->peerCert;
+                    else
+                        store->current_cert = NULL;
+                #endif
+                    store->ex_data = ssl;
+
+                    ok = ssl->verifyCallback(1, store);
+                    if (!ok) {
+                        WOLFSSL_MSG("Verify callback overriding valid certificate!");
+                        ret = -1;
+                        SendAlert(ssl, alert_fatal, bad_certificate);
+                        ssl->options.isClosed = 1;
+                    }
+                #ifdef SESSION_CERTS
+                    if (store->discardSessionCerts) {
+                        WOLFSSL_MSG("Verify callback requested discard sess certs");
+                        ssl->session.chain.count = 0;
+                    }
+                #endif /* SESSION_CERTS */
+                }
+            }
+        #endif /* WOLFSSL_ALWAYS_VERIFY_CB */
+
+            if (ssl->options.verifyNone &&
+                                      (ret == CRL_MISSING || ret == CRL_CERT_REVOKED)) {
+                WOLFSSL_MSG("Ignoring CRL problem based on verify setting");
+                ret = ssl->error = 0;
+            }
+
+            if (ret == 0 && ssl->options.side == WOLFSSL_CLIENT_END) {
+                ssl->options.serverState = SERVER_CERT_COMPLETE;
+            }
+
+            if (IsEncryptionOn(ssl, 0)) {
+                args->idx += ssl->keys.padSz;
+            }
+
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(store, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            /* Advance state and proceed */
+            ssl->options.asyncState = TLS_ASYNC_END;
+        } /* case TLS_ASYNC_FINALIZE */
+
+        case TLS_ASYNC_END:
+        {
+            /* Set final index */
+            *inOutIdx = args->idx;
+
+            break;
         }
-        ssl->error = ret;
+        default:
+            ret = INPUT_CASE_ERROR;
+            break;
+    } /* switch(ssl->options.asyncState) */
+
+exit_dc:
+
+    WOLFSSL_LEAVE("DoCertificate", ret);
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Handle WC_PENDING_E */
+    if (ret == WC_PENDING_E) {
+        /* Mark message as not recevied so it can process again */
+        ssl->msgsReceived.got_certificate = 0;
+
+        return ret;
     }
-#ifdef WOLFSSL_ALWAYS_VERIFY_CB
-    else {
-        if (ssl->verifyCallback) {
-            int ok;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
-            store->error = ret;
-#ifdef WOLFSSL_WPAS
-            store->error_depth = 0;
-#else
-            store->error_depth = totalCerts;
-#endif
-            store->discardSessionCerts = 0;
-            store->domain = domain;
-            store->userCtx = ssl->verifyCbCtx;
-            store->certs = certs;
-            store->totalCerts = totalCerts;
-#ifdef KEEP_PEER_CERT
-            store->current_cert = &ssl->peerCert;
-#endif
-            store->ex_data = ssl;
-
-            ok = ssl->verifyCallback(1, store);
-            if (!ok) {
-                WOLFSSL_MSG("Verify callback overriding valid certificate!");
-                ret = -1;
-                SendAlert(ssl, alert_fatal, bad_certificate);
-                ssl->options.isClosed = 1;
-            }
-            #ifdef SESSION_CERTS
-            if (store->discardSessionCerts) {
-                WOLFSSL_MSG("Verify callback requested discard sess certs");
-                ssl->session.chain.count = 0;
-            }
-            #endif
-        }
-    }
-#endif
-
-    if (ssl->options.verifyNone &&
-                              (ret == CRL_MISSING || ret == CRL_CERT_REVOKED)) {
-        WOLFSSL_MSG("Ignoring CRL problem based on verify setting");
-        ret = ssl->error = 0;
-    }
-
-    if (ret == 0 && ssl->options.side == WOLFSSL_CLIENT_END)
-        ssl->options.serverState = SERVER_CERT_COMPLETE;
-
-    if (IsEncryptionOn(ssl, 0)) {
-        *inOutIdx += ssl->keys.padSz;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(store,  NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(domain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    FreeDoCertArgs(ssl, args);
+    FreeKeyExchange(ssl);
 
     return ret;
 }
@@ -7348,9 +7720,9 @@ static int DoCertificateStatus(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                 return BAD_CERTIFICATE_STATUS_ERROR; /* not expected */
 
             #ifdef WOLFSSL_SMALL_STACK
-                status = (CertStatus*)XMALLOC(sizeof(CertStatus), NULL,
+                status = (CertStatus*)XMALLOC(sizeof(CertStatus), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), NULL,
+                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
 
                 if (status == NULL || response == NULL) {
@@ -7379,8 +7751,8 @@ static int DoCertificateStatus(WOLFSSL* ssl, byte* input, word32* inOutIdx,
             *inOutIdx += status_length;
 
             #ifdef WOLFSSL_SMALL_STACK
-                XFREE(status,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                XFREE(response, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(status,   ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(response, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
             #endif
 
         }
@@ -7413,16 +7785,16 @@ static int DoCertificateStatus(WOLFSSL* ssl, byte* input, word32* inOutIdx,
             } while(0);
 
             #ifdef WOLFSSL_SMALL_STACK
-                status = (CertStatus*)XMALLOC(sizeof(CertStatus), NULL,
+                status = (CertStatus*)XMALLOC(sizeof(CertStatus), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), NULL,
+                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
 
                 if (status == NULL || response == NULL) {
                     if (status)
-                        XFREE(status, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        XFREE(status, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                     if (response)
-                        XFREE(response, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        XFREE(response, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 
                     return MEMORY_ERROR;
                 }
@@ -8011,12 +8383,12 @@ static int DoHandShakeMsgType(WOLFSSL* ssl, byte* input, word32* inOutIdx,
 
     case server_hello_done:
         WOLFSSL_MSG("processing server hello done");
-        #ifdef WOLFSSL_CALLBACKS
-            if (ssl->hsInfoOn)
-                AddPacketName("ServerHelloDone", &ssl->handShakeInfo);
-            if (ssl->toInfoOn)
-                AddLateName("ServerHelloDone", &ssl->timeoutInfo);
-        #endif
+    #ifdef WOLFSSL_CALLBACKS
+        if (ssl->hsInfoOn)
+            AddPacketName("ServerHelloDone", &ssl->handShakeInfo);
+        if (ssl->toInfoOn)
+            AddLateName("ServerHelloDone", &ssl->timeoutInfo);
+    #endif
         ssl->options.serverState = SERVER_HELLODONE_COMPLETE;
         if (IsEncryptionOn(ssl, 0)) {
             *inOutIdx += ssl->keys.padSz;
@@ -8065,6 +8437,7 @@ static int DoHandShakeMsgType(WOLFSSL* ssl, byte* input, word32* inOutIdx,
         ret = DECODE_E;
     }
 
+#ifdef WOLFSSL_ASYNC_CRYPT
     /* if async, offset index so this msg will be processed again */
     if (ret == WC_PENDING_E) {
         *inOutIdx -= HANDSHAKE_HEADER_SZ;
@@ -8074,6 +8447,7 @@ static int DoHandShakeMsgType(WOLFSSL* ssl, byte* input, word32* inOutIdx,
         }
     #endif
     }
+#endif
 
     WOLFSSL_LEAVE("DoHandShakeMsgType()", ret);
     return ret;
@@ -8769,185 +9143,254 @@ static int ChachaAEADDecrypt(WOLFSSL* ssl, byte* plain, const byte* input,
 #endif /* HAVE_AEAD */
 
 
-static INLINE int Encrypt(WOLFSSL* ssl, byte* out, const byte* input, word16 sz)
+static INLINE int EncryptDo(WOLFSSL* ssl, byte* out, const byte* input,
+    word16 sz, int asyncOkay)
 {
     int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV* asyncDev = NULL;
+    word32 event_flags = WC_ASYNC_FLAG_CALL_AGAIN;
+#else
+    (void)asyncOkay;
+#endif
 
     (void)out;
     (void)input;
     (void)sz;
 
-    if (ssl->encrypt.setup == 0) {
-        WOLFSSL_MSG("Encrypt ciphers not setup");
-        return ENCRYPT_ERROR;
-    }
-
-#ifdef HAVE_FUZZER
-    if (ssl->fuzzerCb)
-        ssl->fuzzerCb(ssl, input, sz, FUZZ_ENCRYPT, ssl->fuzzerCtx);
-#endif
-
     switch (ssl->specs.bulk_cipher_algorithm) {
-        #ifdef BUILD_ARC4
-            case wolfssl_rc4:
-                wc_Arc4Process(ssl->encrypt.arc4, out, input, sz);
-                break;
+    #ifdef BUILD_ARC4
+        case wolfssl_rc4:
+            wc_Arc4Process(ssl->encrypt.arc4, out, input, sz);
+            break;
+    #endif
+
+    #ifdef BUILD_DES3
+        case wolfssl_triple_des:
+            ret = wc_Des3_CbcEncrypt(ssl->encrypt.des3, out, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                asyncDev = &ssl->encrypt.des3->asyncDev;
+                if (asyncOkay)
+                    ret = wolfSSL_AsyncPush(ssl, asyncDev, event_flags);
+            }
         #endif
+            break;
+    #endif
 
-        #ifdef BUILD_DES3
-            case wolfssl_triple_des:
-                ret = wc_Des3_CbcEncrypt(ssl->encrypt.des3, out, input, sz);
+    #ifdef BUILD_AES
+        case wolfssl_aes:
+            ret = wc_AesCbcEncrypt(ssl->encrypt.aes, out, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                asyncDev = &ssl->encrypt.aes->asyncDev;
+                if (asyncOkay)
+                    ret = wolfSSL_AsyncPush(ssl, asyncDev, event_flags);
                 break;
+            }
         #endif
+            break;
+    #endif
 
-        #ifdef BUILD_AES
-            case wolfssl_aes:
-                ret = wc_AesCbcEncrypt(ssl->encrypt.aes, out, input, sz);
-                break;
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        case wolfssl_aes_gcm:
+        case wolfssl_aes_ccm:/* GCM AEAD macros use same size as CCM */
+        {
+            wc_AesAuthEncryptFunc aes_auth_fn;
+        #if defined(BUILD_AESGCM) && defined(HAVE_AESCCM)
+            aes_auth_fn = (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
+                            ? wc_AesGcmEncrypt : wc_AesCcmEncrypt;
+        #elif defined(BUILD_AESGCM)
+            aes_auth_fn = wc_AesGcmEncrypt;
+        #else
+            aes_auth_fn = wc_AesCcmEncrypt;
         #endif
+            const byte* additionalSrc = input - 5;
 
-        #ifdef BUILD_AESGCM
-            case wolfssl_aes_gcm:
-                {
-                    byte additional[AEAD_AUTH_DATA_SZ];
-                    byte nonce[AESGCM_NONCE_SZ];
-                    const byte* additionalSrc = input - 5;
+            XMEMSET(ssl->encrypt.additional, 0, AEAD_AUTH_DATA_SZ);
 
-                    XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
+            /* sequence number field is 64-bits */
+            WriteSEQ(ssl, CUR_ORDER, ssl->encrypt.additional);
 
-                    /* sequence number field is 64-bits */
-                    WriteSEQ(ssl, CUR_ORDER, additional);
-
-                    /* Store the type, version. Unfortunately, they are in
-                     * the input buffer ahead of the plaintext. */
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls) {
-                            additionalSrc -= DTLS_HANDSHAKE_EXTRA;
-                        }
-                    #endif
-                    XMEMCPY(additional + AEAD_TYPE_OFFSET, additionalSrc, 3);
-
-                    /* Store the length of the plain text minus the explicit
-                     * IV length minus the authentication tag size. */
-                    c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                                additional + AEAD_LEN_OFFSET);
-                    XMEMCPY(nonce,
-                                 ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
-                    XMEMCPY(nonce + AESGCM_IMP_IV_SZ,
-                                     ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
-                    ret = wc_AesGcmEncrypt(ssl->encrypt.aes,
-                               out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
-                               sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                 nonce, AESGCM_NONCE_SZ,
-                                 out + sz - ssl->specs.aead_mac_size,
-                                 ssl->specs.aead_mac_size,
-                                 additional, AEAD_AUTH_DATA_SZ);
-                    AeadIncrementExpIV(ssl);
-                    ForceZero(nonce, AESGCM_NONCE_SZ);
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls)
-                            DtlsSEQIncrement(ssl, CUR_ORDER);
-                    #endif
-                }
-                break;
+            /* Store the type, version. Unfortunately, they are in
+             * the input buffer ahead of the plaintext. */
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls) {
+                additionalSrc -= DTLS_HANDSHAKE_EXTRA;
+            }
         #endif
+            XMEMCPY(ssl->encrypt.additional + AEAD_TYPE_OFFSET,
+                                                        additionalSrc, 3);
 
-        #ifdef HAVE_AESCCM
-            /* AEAD CCM uses same size as macros for AESGCM */
-            case wolfssl_aes_ccm:
-                {
-                    byte additional[AEAD_AUTH_DATA_SZ];
-                    byte nonce[AESGCM_NONCE_SZ];
-                    const byte* additionalSrc = input - 5;
-
-                    XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
-
-                    /* sequence number field is 64-bits */
-                    WriteSEQ(ssl, CUR_ORDER, additional);
-
-                    /* Store the type, version. Unfortunately, they are in
-                     * the input buffer ahead of the plaintext. */
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls) {
-                            additionalSrc -= DTLS_HANDSHAKE_EXTRA;
-                        }
-                    #endif
-                    XMEMCPY(additional + AEAD_TYPE_OFFSET, additionalSrc, 3);
-
-                    /* Store the length of the plain text minus the explicit
-                     * IV length minus the authentication tag size. */
-                    c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                                additional + AEAD_LEN_OFFSET);
-                    XMEMCPY(nonce,
-                                 ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
-                    XMEMCPY(nonce + AESGCM_IMP_IV_SZ,
-                                     ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
-                    ret = wc_AesCcmEncrypt(ssl->encrypt.aes,
-                        out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
-                            sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                        nonce, AESGCM_NONCE_SZ,
-                        out + sz - ssl->specs.aead_mac_size,
-                        ssl->specs.aead_mac_size,
-                        additional, AEAD_AUTH_DATA_SZ);
-                    AeadIncrementExpIV(ssl);
-                    ForceZero(nonce, AESGCM_NONCE_SZ);
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls)
-                            DtlsSEQIncrement(ssl, CUR_ORDER);
-                    #endif
-                }
-                break;
+            /* Store the length of the plain text minus the explicit
+             * IV length minus the authentication tag size. */
+            c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                                ssl->encrypt.additional + AEAD_LEN_OFFSET);
+            XMEMCPY(ssl->encrypt.nonce,
+                                ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
+            XMEMCPY(ssl->encrypt.nonce + AESGCM_IMP_IV_SZ,
+                                ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+            ret = aes_auth_fn(ssl->encrypt.aes,
+                    out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
+                    sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                    ssl->encrypt.nonce, AESGCM_NONCE_SZ,
+                    out + sz - ssl->specs.aead_mac_size,
+                    ssl->specs.aead_mac_size,
+                    ssl->encrypt.additional, AEAD_AUTH_DATA_SZ);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                asyncDev = &ssl->encrypt.aes->asyncDev;
+                if (asyncOkay)
+                    ret = wolfSSL_AsyncPush(ssl, asyncDev, event_flags);
+            }
         #endif
+        }
+        break;
+    #endif /* BUILD_AESGCM || HAVE_AESCCM */
 
-        #ifdef HAVE_CAMELLIA
-            case wolfssl_camellia:
-                wc_CamelliaCbcEncrypt(ssl->encrypt.cam, out, input, sz);
-                break;
-        #endif
+    #ifdef HAVE_CAMELLIA
+        case wolfssl_camellia:
+            wc_CamelliaCbcEncrypt(ssl->encrypt.cam, out, input, sz);
+            break;
+    #endif
 
-        #ifdef HAVE_HC128
-            case wolfssl_hc128:
-                ret = wc_Hc128_Process(ssl->encrypt.hc128, out, input, sz);
-                break;
-        #endif
+    #ifdef HAVE_HC128
+        case wolfssl_hc128:
+            ret = wc_Hc128_Process(ssl->encrypt.hc128, out, input, sz);
+            break;
+    #endif
 
-        #ifdef BUILD_RABBIT
-            case wolfssl_rabbit:
-                ret = wc_RabbitProcess(ssl->encrypt.rabbit, out, input, sz);
-                break;
-        #endif
+    #ifdef BUILD_RABBIT
+        case wolfssl_rabbit:
+            ret = wc_RabbitProcess(ssl->encrypt.rabbit, out, input, sz);
+            break;
+    #endif
 
-        #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
-            case wolfssl_chacha:
-                ret = ChachaAEADEncrypt(ssl, out, input, sz);
-                break;
-        #endif
+    #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
+        case wolfssl_chacha:
+            ret = ChachaAEADEncrypt(ssl, out, input, sz);
+            break;
+    #endif
 
-        #ifdef HAVE_NULL_CIPHER
-            case wolfssl_cipher_null:
-                if (input != out) {
-                    XMEMMOVE(out, input, sz);
-                }
-                break;
-        #endif
+    #ifdef HAVE_NULL_CIPHER
+        case wolfssl_cipher_null:
+            if (input != out) {
+                XMEMMOVE(out, input, sz);
+            }
+            break;
+    #endif
 
-        #ifdef HAVE_IDEA
-            case wolfssl_idea:
-                ret = wc_IdeaCbcEncrypt(ssl->encrypt.idea, out, input, sz);
-                break;
-        #endif
+    #ifdef HAVE_IDEA
+        case wolfssl_idea:
+            ret = wc_IdeaCbcEncrypt(ssl->encrypt.idea, out, input, sz);
+            break;
+    #endif
 
-            default:
-                WOLFSSL_MSG("wolfSSL Encrypt programming error");
-                ret = ENCRYPT_ERROR;
+        default:
+            WOLFSSL_MSG("wolfSSL Encrypt programming error");
+            ret = ENCRYPT_ERROR;
     }
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* if async is not okay, then block */
+    if (ret == WC_PENDING_E && !asyncOkay) {
+        ret = wc_AsyncWait(ret, asyncDev, event_flags);
+    }
+#endif
+
     return ret;
 }
 
+static INLINE int Encrypt(WOLFSSL* ssl, byte* out, const byte* input, word16 sz,
+    int asyncOkay)
+{
+    int ret = 0;
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (asyncOkay && ssl->error == WC_PENDING_E) {
+        ssl->error = 0; /* clear async */
+    }
+#endif
 
-static INLINE int Decrypt(WOLFSSL* ssl, byte* plain, const byte* input,
+    switch (ssl->encrypt.state) {
+        case CIPHER_STATE_BEGIN:
+        {
+            if (ssl->encrypt.setup == 0) {
+                WOLFSSL_MSG("Encrypt ciphers not setup");
+                return ENCRYPT_ERROR;
+            }
+
+        #ifdef HAVE_FUZZER
+            if (ssl->fuzzerCb)
+                ssl->fuzzerCb(ssl, input, sz, FUZZ_ENCRYPT, ssl->fuzzerCtx);
+        #endif
+
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            /* make sure AES GCM/CCM memory is allocated */
+            /* free for these happens in FreeCiphers */
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm) {
+                /* make sure auth iv and auth are allocated */
+                if (ssl->encrypt.additional == NULL)
+                    ssl->encrypt.additional = (byte*)XMALLOC(AEAD_AUTH_DATA_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->encrypt.nonce == NULL)
+                    ssl->encrypt.nonce = (byte*)XMALLOC(AESGCM_NONCE_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->encrypt.additional == NULL ||
+                         ssl->encrypt.nonce == NULL) {
+                    return MEMORY_E;
+                }
+            }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
+
+            /* Advance state and proceed */
+            ssl->encrypt.state = CIPHER_STATE_DO;
+        }
+        case CIPHER_STATE_DO:
+        {
+            ret = EncryptDo(ssl, out, input, sz, asyncOkay);
+
+            /* Advance state */
+            ssl->encrypt.state = CIPHER_STATE_END;
+
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            /* If pending, then leave and return will resume below */
+            if (ret == WC_PENDING_E) {
+                return ret;
+            }
+        #endif
+        }
+
+        case CIPHER_STATE_END:
+        {
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
+            {
+                /* finalize authentication cipher */
+                AeadIncrementExpIV(ssl);
+
+                if (ssl->encrypt.nonce)
+                    ForceZero(ssl->encrypt.nonce, AESGCM_NONCE_SZ);
+
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+            }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
+            break;
+        }
+    }
+
+    /* Reset state */
+    ssl->encrypt.state = CIPHER_STATE_BEGIN;
+
+    return ret;
+}
+
+static INLINE int DecryptDo(WOLFSSL* ssl, byte* plain, const byte* input,
                            word16 sz)
 {
     int ret = 0;
@@ -8956,144 +9399,226 @@ static INLINE int Decrypt(WOLFSSL* ssl, byte* plain, const byte* input,
     (void)input;
     (void)sz;
 
-    if (ssl->decrypt.setup == 0) {
-        WOLFSSL_MSG("Decrypt ciphers not setup");
-        return DECRYPT_ERROR;
+    switch (ssl->specs.bulk_cipher_algorithm)
+    {
+    #ifdef BUILD_ARC4
+        case wolfssl_rc4:
+            wc_Arc4Process(ssl->decrypt.arc4, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef BUILD_DES3
+        case wolfssl_triple_des:
+            ret = wc_Des3_CbcDecrypt(ssl->decrypt.des3, plain, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                ret = wolfSSL_AsyncPush(ssl, &ssl->decrypt.des3->asyncDev,
+                                                    WC_ASYNC_FLAG_CALL_AGAIN);
+            }
+        #endif
+            break;
+    #endif
+
+    #ifdef BUILD_AES
+        case wolfssl_aes:
+            ret = wc_AesCbcDecrypt(ssl->decrypt.aes, plain, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                ret = wolfSSL_AsyncPush(ssl, &ssl->decrypt.aes->asyncDev,
+                                                    WC_ASYNC_FLAG_CALL_AGAIN);
+            }
+        #endif
+            break;
+    #endif
+
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        case wolfssl_aes_gcm:
+        case wolfssl_aes_ccm: /* GCM AEAD macros use same size as CCM */
+        {
+            wc_AesAuthDecryptFunc aes_auth_fn;
+        #if defined(BUILD_AESGCM) && defined(HAVE_AESCCM)
+            aes_auth_fn = (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
+                            ? wc_AesGcmDecrypt : wc_AesCcmDecrypt;
+        #elif defined(BUILD_AESGCM)
+            aes_auth_fn = wc_AesGcmDecrypt;
+        #else
+            aes_auth_fn = wc_AesCcmDecrypt;
+        #endif
+
+            XMEMSET(ssl->decrypt.additional, 0, AEAD_AUTH_DATA_SZ);
+
+            /* sequence number field is 64-bits */
+            WriteSEQ(ssl, PEER_ORDER, ssl->decrypt.additional);
+
+            ssl->decrypt.additional[AEAD_TYPE_OFFSET] = ssl->curRL.type;
+            ssl->decrypt.additional[AEAD_VMAJ_OFFSET] = ssl->curRL.pvMajor;
+            ssl->decrypt.additional[AEAD_VMIN_OFFSET] = ssl->curRL.pvMinor;
+
+            c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                                    ssl->decrypt.additional + AEAD_LEN_OFFSET);
+            XMEMCPY(ssl->decrypt.nonce, ssl->keys.aead_dec_imp_IV,
+                                                            AESGCM_IMP_IV_SZ);
+            XMEMCPY(ssl->decrypt.nonce + AESGCM_IMP_IV_SZ, input,
+                                                            AESGCM_EXP_IV_SZ);
+            if ((ret = aes_auth_fn(ssl->decrypt.aes,
+                        plain + AESGCM_EXP_IV_SZ,
+                        input + AESGCM_EXP_IV_SZ,
+                           sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                        ssl->decrypt.nonce, AESGCM_NONCE_SZ,
+                        input + sz - ssl->specs.aead_mac_size,
+                        ssl->specs.aead_mac_size,
+                        ssl->decrypt.additional, AEAD_AUTH_DATA_SZ)) < 0) {
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E) {
+                    ret = wolfSSL_AsyncPush(ssl,
+                        &ssl->decrypt.aes->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+                    break;
+                }
+            #endif
+            }
+        }
+        break;
+    #endif /* BUILD_AESGCM || HAVE_AESCCM */
+
+    #ifdef HAVE_CAMELLIA
+        case wolfssl_camellia:
+            wc_CamelliaCbcDecrypt(ssl->decrypt.cam, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef HAVE_HC128
+        case wolfssl_hc128:
+            ret = wc_Hc128_Process(ssl->decrypt.hc128, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef BUILD_RABBIT
+        case wolfssl_rabbit:
+            ret = wc_RabbitProcess(ssl->decrypt.rabbit, plain, input, sz);
+            break;
+    #endif
+
+    #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
+        case wolfssl_chacha:
+            ret = ChachaAEADDecrypt(ssl, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef HAVE_NULL_CIPHER
+        case wolfssl_cipher_null:
+            if (input != plain) {
+                XMEMMOVE(plain, input, sz);
+            }
+            break;
+    #endif
+
+    #ifdef HAVE_IDEA
+        case wolfssl_idea:
+            ret = wc_IdeaCbcDecrypt(ssl->decrypt.idea, plain, input, sz);
+            break;
+    #endif
+
+        default:
+            WOLFSSL_MSG("wolfSSL Decrypt programming error");
+            ret = DECRYPT_ERROR;
     }
 
-    switch (ssl->specs.bulk_cipher_algorithm) {
-        #ifdef BUILD_ARC4
-            case wolfssl_rc4:
-                wc_Arc4Process(ssl->decrypt.arc4, plain, input, sz);
-                break;
-        #endif
+    return ret;
+}
 
-        #ifdef BUILD_DES3
-            case wolfssl_triple_des:
-                ret = wc_Des3_CbcDecrypt(ssl->decrypt.des3, plain, input, sz);
-                break;
-        #endif
+static INLINE int Decrypt(WOLFSSL* ssl, byte* plain, const byte* input,
+                           word16 sz)
+{
+    int ret = 0;
 
-        #ifdef BUILD_AES
-            case wolfssl_aes:
-                ret = wc_AesCbcDecrypt(ssl->decrypt.aes, plain, input, sz);
-                break;
-        #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfSSL_AsyncPop(ssl, &ssl->decrypt.state);
+    if (ret != WC_NOT_PENDING_E) {
+        /* check for still pending */
+        if (ret == WC_PENDING_E)
+            return ret;
 
-        #ifdef BUILD_AESGCM
-            case wolfssl_aes_gcm:
-            {
-                byte additional[AEAD_AUTH_DATA_SZ];
-                byte nonce[AESGCM_NONCE_SZ];
+        ssl->error = 0; /* clear async */
 
-                XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
+        /* let failures through so CIPHER_STATE_END logic is run */
+    }
+    else
+#endif
+    {
+        /* Reset state */
+        ret = 0;
+        ssl->decrypt.state = CIPHER_STATE_BEGIN;
+    }
 
-                /* sequence number field is 64-bits */
-                WriteSEQ(ssl, PEER_ORDER, additional);
-
-                additional[AEAD_TYPE_OFFSET] = ssl->curRL.type;
-                additional[AEAD_VMAJ_OFFSET] = ssl->curRL.pvMajor;
-                additional[AEAD_VMIN_OFFSET] = ssl->curRL.pvMinor;
-
-                c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                        additional + AEAD_LEN_OFFSET);
-                XMEMCPY(nonce, ssl->keys.aead_dec_imp_IV, AESGCM_IMP_IV_SZ);
-                XMEMCPY(nonce + AESGCM_IMP_IV_SZ, input, AESGCM_EXP_IV_SZ);
-                if (wc_AesGcmDecrypt(ssl->decrypt.aes,
-                            plain + AESGCM_EXP_IV_SZ,
-                            input + AESGCM_EXP_IV_SZ,
-                               sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                            nonce, AESGCM_NONCE_SZ,
-                            input + sz - ssl->specs.aead_mac_size,
-                            ssl->specs.aead_mac_size,
-                            additional, AEAD_AUTH_DATA_SZ) < 0) {
-                    if (!ssl->options.dtls)
-                        SendAlert(ssl, alert_fatal, bad_record_mac);
-                    ret = VERIFY_MAC_ERROR;
-                }
-                ForceZero(nonce, AESGCM_NONCE_SZ);
+    switch (ssl->decrypt.state) {
+        case CIPHER_STATE_BEGIN:
+        {
+            if (ssl->decrypt.setup == 0) {
+                WOLFSSL_MSG("Decrypt ciphers not setup");
+                return DECRYPT_ERROR;
             }
-            break;
-        #endif
 
-        #ifdef HAVE_AESCCM
-            /* AESGCM AEAD macros use same size as AESCCM */
-            case wolfssl_aes_ccm:
-            {
-                byte additional[AEAD_AUTH_DATA_SZ];
-                byte nonce[AESGCM_NONCE_SZ];
-
-                XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
-
-                /* sequence number field is 64-bits */
-                WriteSEQ(ssl, PEER_ORDER, additional);
-
-                additional[AEAD_TYPE_OFFSET] = ssl->curRL.type;
-                additional[AEAD_VMAJ_OFFSET] = ssl->curRL.pvMajor;
-                additional[AEAD_VMIN_OFFSET] = ssl->curRL.pvMinor;
-
-                c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                        additional + AEAD_LEN_OFFSET);
-                XMEMCPY(nonce, ssl->keys.aead_dec_imp_IV, AESGCM_IMP_IV_SZ);
-                XMEMCPY(nonce + AESGCM_IMP_IV_SZ, input, AESGCM_EXP_IV_SZ);
-                if (wc_AesCcmDecrypt(ssl->decrypt.aes,
-                            plain + AESGCM_EXP_IV_SZ,
-                            input + AESGCM_EXP_IV_SZ,
-                               sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                            nonce, AESGCM_NONCE_SZ,
-                            input + sz - ssl->specs.aead_mac_size,
-                            ssl->specs.aead_mac_size,
-                            additional, AEAD_AUTH_DATA_SZ) < 0) {
-                    if (!ssl->options.dtls)
-                        SendAlert(ssl, alert_fatal, bad_record_mac);
-                    ret = VERIFY_MAC_ERROR;
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            /* make sure AES GCM/CCM memory is allocated */
+            /* free for these happens in FreeCiphers */
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm) {
+                /* make sure auth iv and auth are allocated */
+                if (ssl->decrypt.additional == NULL)
+                    ssl->decrypt.additional = (byte*)XMALLOC(AEAD_AUTH_DATA_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->decrypt.nonce == NULL)
+                    ssl->decrypt.nonce = (byte*)XMALLOC(AESGCM_NONCE_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->decrypt.additional == NULL ||
+                         ssl->decrypt.nonce == NULL) {
+                    return MEMORY_E;
                 }
-                ForceZero(nonce, AESGCM_NONCE_SZ);
             }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
+
+            /* Advance state and proceed */
+            ssl->decrypt.state = CIPHER_STATE_DO;
+        }
+        case CIPHER_STATE_DO:
+        {
+            ret = DecryptDo(ssl, plain, input, sz);
+
+            /* Advance state */
+            ssl->decrypt.state = CIPHER_STATE_END;
+
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            /* If pending, leave and return below */
+            if (ret == WC_PENDING_E) {
+                return ret;
+            }
+        #endif
+        }
+
+        case CIPHER_STATE_END:
+        {
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            /* make sure AES GCM/CCM nonce is cleared */
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm) {
+                if (ssl->decrypt.nonce)
+                    ForceZero(ssl->decrypt.nonce, AESGCM_NONCE_SZ);
+
+                if (ret < 0)
+                    ret = VERIFY_MAC_ERROR;
+            }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
             break;
-        #endif
+        }
+    }
 
-        #ifdef HAVE_CAMELLIA
-            case wolfssl_camellia:
-                wc_CamelliaCbcDecrypt(ssl->decrypt.cam, plain, input, sz);
-                break;
-        #endif
+    /* Reset state */
+    ssl->decrypt.state = CIPHER_STATE_BEGIN;
 
-        #ifdef HAVE_HC128
-            case wolfssl_hc128:
-                ret = wc_Hc128_Process(ssl->decrypt.hc128, plain, input, sz);
-                break;
-        #endif
-
-        #ifdef BUILD_RABBIT
-            case wolfssl_rabbit:
-                ret = wc_RabbitProcess(ssl->decrypt.rabbit, plain, input, sz);
-                break;
-        #endif
-
-        #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
-            case wolfssl_chacha:
-                ret = ChachaAEADDecrypt(ssl, plain, input, sz);
-                break;
-        #endif
-
-        #ifdef HAVE_NULL_CIPHER
-            case wolfssl_cipher_null:
-                if (input != plain) {
-                    XMEMMOVE(plain, input, sz);
-                }
-                break;
-        #endif
-
-        #ifdef HAVE_IDEA
-            case wolfssl_idea:
-                ret = wc_IdeaCbcDecrypt(ssl->decrypt.idea, plain, input, sz);
-                break;
-        #endif
-
-            default:
-                WOLFSSL_MSG("wolfSSL Decrypt programming error");
-                ret = DECRYPT_ERROR;
+    /* handle mac error case */
+    if (ret == VERIFY_MAC_ERROR) {
+        if (!ssl->options.dtls)
+            SendAlert(ssl, alert_fatal, bad_record_mac);
     }
 
     return ret;
@@ -9146,11 +9671,11 @@ static INLINE void Md5Rounds(int rounds, const byte* data, int sz)
     Md5 md5;
     int i;
 
-    wc_InitMd5(&md5);
+    wc_InitMd5(&md5);   /* no error check on purpose, dummy round */
 
     for (i = 0; i < rounds; i++)
         wc_Md5Update(&md5, data, sz);
-    wc_Md5Free(&md5) ; /* in case needed to release resources */
+    wc_Md5Free(&md5); /* in case needed to release resources */
 }
 
 
@@ -9165,7 +9690,7 @@ static INLINE void ShaRounds(int rounds, const byte* data, int sz)
 
     for (i = 0; i < rounds; i++)
         wc_ShaUpdate(&sha, data, sz);
-    wc_ShaFree(&sha) ; /* in case needed to release resources */
+    wc_ShaFree(&sha); /* in case needed to release resources */
 }
 #endif
 
@@ -9183,7 +9708,7 @@ static INLINE void Sha256Rounds(int rounds, const byte* data, int sz)
         wc_Sha256Update(&sha256, data, sz);
         /* no error check on purpose, dummy round */
     }
-    wc_Sha256Free(&sha256) ; /* in case needed to release resources */
+    wc_Sha256Free(&sha256); /* in case needed to release resources */
 }
 
 #endif
@@ -9202,7 +9727,7 @@ static INLINE void Sha384Rounds(int rounds, const byte* data, int sz)
         wc_Sha384Update(&sha384, data, sz);
         /* no error check on purpose, dummy round */
     }
-    wc_Sha384Free(&sha384) ; /* in case needed to release resources */
+    wc_Sha384Free(&sha384); /* in case needed to release resources */
 }
 
 #endif
@@ -9221,7 +9746,7 @@ static INLINE void Sha512Rounds(int rounds, const byte* data, int sz)
         wc_Sha512Update(&sha512, data, sz);
         /* no error check on purpose, dummy round */
     }
-    wc_Sha512Free(&sha512) ; /* in case needed to release resources */
+    wc_Sha512Free(&sha512); /* in case needed to release resources */
 }
 
 #endif
@@ -9387,9 +9912,11 @@ static int TimingPadVerify(WOLFSSL* ssl, const byte* input, int padLen, int t,
         return VERIFY_MAC_ERROR;
     }
 
+    /* treat any faulure as verify MAC error */
     if (ret != 0)
-        return VERIFY_MAC_ERROR;
-    return 0;
+        ret = VERIFY_MAC_ERROR;
+
+    return ret;
 }
 
 
@@ -9465,9 +9992,10 @@ static int DoAlert(WOLFSSL* ssl, byte* input, word32* inOutIdx, int* type,
         if (ssl->hsInfoOn)
             AddPacketName("Alert", &ssl->handShakeInfo);
         if (ssl->toInfoOn)
-            /* add record header back on to info + 2 byte level, data */
+            /* add record header back on to info + alert bytes level/code */
             AddPacketInfo("Alert", &ssl->timeoutInfo, input + *inOutIdx -
-                          RECORD_HEADER_SZ, 2 + RECORD_HEADER_SZ, ssl->heap);
+                          RECORD_HEADER_SZ, RECORD_HEADER_SZ + ALERT_SIZE,
+                          ssl->heap);
     #endif
 
     /* make sure can read the message */
@@ -9665,10 +10193,10 @@ int ProcessReply(WOLFSSL* ssl)
 
             readSz = RECORD_HEADER_SZ;
 
-            #ifdef WOLFSSL_DTLS
-                if (ssl->options.dtls)
-                    readSz = DTLS_RECORD_HEADER_SZ;
-            #endif
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls)
+                readSz = DTLS_RECORD_HEADER_SZ;
+        #endif
 
             /* get header or return error */
             if (!ssl->options.dtls) {
@@ -9679,9 +10207,10 @@ int ProcessReply(WOLFSSL* ssl)
                 /* read ahead may already have header */
                 used = ssl->buffers.inputBuffer.length -
                        ssl->buffers.inputBuffer.idx;
-                if (used < readSz)
+                if (used < readSz) {
                     if ((ret = GetInputData(ssl, readSz)) < 0)
                         return ret;
+                }
             #endif
             }
 
@@ -9698,15 +10227,15 @@ int ProcessReply(WOLFSSL* ssl)
 
                 /* sanity checks before getting size at front */
                 if (ssl->buffers.inputBuffer.buffer[
-                          ssl->buffers.inputBuffer.idx + 2] != OLD_HELLO_ID) {
+                          ssl->buffers.inputBuffer.idx + OPAQUE16_LEN] != OLD_HELLO_ID) {
                     WOLFSSL_MSG("Not a valid old client hello");
                     return PARSE_ERROR;
                 }
 
                 if (ssl->buffers.inputBuffer.buffer[
-                          ssl->buffers.inputBuffer.idx + 3] != SSLv3_MAJOR &&
+                          ssl->buffers.inputBuffer.idx + OPAQUE24_LEN] != SSLv3_MAJOR &&
                     ssl->buffers.inputBuffer.buffer[
-                          ssl->buffers.inputBuffer.idx + 3] != DTLS_MAJOR) {
+                          ssl->buffers.inputBuffer.idx + OPAQUE24_LEN] != DTLS_MAJOR) {
                     WOLFSSL_MSG("Not a valid version in old client hello");
                     return PARSE_ERROR;
                 }
@@ -9802,14 +10331,13 @@ int ProcessReply(WOLFSSL* ssl)
 #endif
             }
 
-            ssl->options.processReply = runProcessingOneMessage;
+            ssl->options.processReply = decryptMessage;
             startIdx = ssl->buffers.inputBuffer.idx;  /* in case > 1 msg per */
 
-        /* the record layer is here */
-        case runProcessingOneMessage:
+        /* decrypt message */
+        case decryptMessage:
 
-            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0)
-            {
+            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0) {
                 ret = SanityCheckCipherText(ssl, ssl->curSize);
                 if (ret < 0)
                     return ret;
@@ -9823,12 +10351,6 @@ int ProcessReply(WOLFSSL* ssl)
                                   ssl->buffers.inputBuffer.idx,
                                   ssl->curSize, ssl->curRL.type, 1,
                                   &ssl->keys.padSz, ssl->DecryptVerifyCtx);
-                    if (ssl->options.tls1_1 && ssl->specs.cipher_type == block)
-                        ssl->buffers.inputBuffer.idx += ssl->specs.block_size;
-                        /* go past TLSv1.1 IV */
-                    if (ssl->specs.cipher_type == aead &&
-                            ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
-                        ssl->buffers.inputBuffer.idx += AESGCM_EXP_IV_SZ;
                 #endif /* ATOMIC_USER */
                 }
                 else {
@@ -9837,46 +10359,73 @@ int ProcessReply(WOLFSSL* ssl)
                                   ssl->buffers.inputBuffer.buffer +
                                   ssl->buffers.inputBuffer.idx,
                                   ssl->curSize);
-                    if (ret < 0) {
-                        WOLFSSL_MSG("Decrypt failed");
-                        WOLFSSL_ERROR(ret);
-                        #ifdef WOLFSSL_DTLS
-                            /* If in DTLS mode, if the decrypt fails for any
-                             * reason, pretend the datagram never happened. */
-                            if (ssl->options.dtls) {
-                                ssl->options.processReply = doProcessInit;
-                                ssl->buffers.inputBuffer.idx =
-                                                ssl->buffers.inputBuffer.length;
-                            }
-                        #endif /* WOLFSSL_DTLS */
-                        return DECRYPT_ERROR;
-                    }
+                }
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    return ret;
+            #endif
+
+                if (ret == 0) {
+                    /* handle success */
                     if (ssl->options.tls1_1 && ssl->specs.cipher_type == block)
                         ssl->buffers.inputBuffer.idx += ssl->specs.block_size;
                         /* go past TLSv1.1 IV */
                     if (ssl->specs.cipher_type == aead &&
                             ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
                         ssl->buffers.inputBuffer.idx += AESGCM_EXP_IV_SZ;
+                }
+                else {
+                    WOLFSSL_MSG("Decrypt failed");
+                    WOLFSSL_ERROR(ret);
+                #ifdef WOLFSSL_DTLS
+                    /* If in DTLS mode, if the decrypt fails for any
+                     * reason, pretend the datagram never happened. */
+                    if (ssl->options.dtls) {
+                        ssl->options.processReply = doProcessInit;
+                        ssl->buffers.inputBuffer.idx =
+                                        ssl->buffers.inputBuffer.length;
+                    }
+                #endif /* WOLFSSL_DTLS */
+                    return DECRYPT_ERROR;
+                }
+            }
 
+            ssl->options.processReply = verifyMessage;
+
+        /* verify digest of message */
+        case verifyMessage:
+
+            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0) {
+                if (!atomicUser) {
                     ret = VerifyMac(ssl, ssl->buffers.inputBuffer.buffer +
                                     ssl->buffers.inputBuffer.idx,
                                     ssl->curSize, ssl->curRL.type,
                                     &ssl->keys.padSz);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (ret == WC_PENDING_E)
+                        return ret;
+                #endif
+                    if (ret < 0) {
+                        WOLFSSL_MSG("VerifyMac failed");
+                        WOLFSSL_ERROR(ret);
+                        return DECRYPT_ERROR;
+                    }
                 }
-                if (ret < 0) {
-                    WOLFSSL_MSG("VerifyMac failed");
-                    WOLFSSL_ERROR(ret);
-                    return DECRYPT_ERROR;
-                }
+
                 ssl->keys.encryptSz    = ssl->curSize;
                 ssl->keys.decryptedCur = 1;
             }
 
-            #ifdef WOLFSSL_DTLS
+            ssl->options.processReply = runProcessingOneMessage;
+
+        /* the record layer is here */
+        case runProcessingOneMessage:
+
+        #ifdef WOLFSSL_DTLS
             if (IsDtlsNotSctpMode(ssl)) {
                 DtlsUpdateWindow(ssl);
             }
-            #endif /* WOLFSSL_DTLS */
+        #endif /* WOLFSSL_DTLS */
 
             WOLFSSL_MSG("received record layer msg");
 
@@ -9890,12 +10439,12 @@ int ProcessReply(WOLFSSL* ssl)
                                             ssl->buffers.inputBuffer.length);
                     }
                     else {
-#ifdef WOLFSSL_DTLS
+                    #ifdef WOLFSSL_DTLS
                         ret = DoDtlsHandShakeMsg(ssl,
                                             ssl->buffers.inputBuffer.buffer,
                                             &ssl->buffers.inputBuffer.idx,
                                             ssl->buffers.inputBuffer.length);
-#endif
+                    #endif
                     }
                     if (ret != 0)
                         return ret;
@@ -9922,7 +10471,7 @@ int ProcessReply(WOLFSSL* ssl)
                             return ret;
                         }
                         else {
-#ifdef WOLFSSL_DTLS
+                        #ifdef WOLFSSL_DTLS
                         /* Check for duplicate CCS message in DTLS mode.
                          * DTLS allows for duplicate messages, and it should be
                          * skipped. Also skip if out of order. */
@@ -9942,7 +10491,7 @@ int ProcessReply(WOLFSSL* ssl)
                             }
                             ssl->buffers.inputBuffer.idx++;
                             break;
-#endif /* WOLFSSL_DTLS */
+                        #endif /* WOLFSSL_DTLS */
                         }
                     }
 
@@ -10106,9 +10655,10 @@ int SendChangeCipher(WOLFSSL* ssl)
 
         input[0] = 1;  /* turn it on */
         sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                              change_cipher_spec, 0, 0);
-        if (sendSz < 0)
+                              change_cipher_spec, 0, 0, 0);
+        if (sendSz < 0) {
             return sendSz;
+        }
     }
 
     #ifdef WOLFSSL_DTLS
@@ -10167,109 +10717,174 @@ static int SSL_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
     WriteSEQ(ssl, verify, seq);
 
     if (ssl->specs.mac_algorithm == md5_mac) {
-        wc_InitMd5(&md5);
-        /* inner */
-        wc_Md5Update(&md5, macSecret, digestSz);
-        wc_Md5Update(&md5, PAD1, padSz);
-        wc_Md5Update(&md5, seq, SEQ_SZ);
-        wc_Md5Update(&md5, conLen, sizeof(conLen));
-        /* in buffer */
-        wc_Md5Update(&md5, in, sz);
-        wc_Md5Final(&md5, result);
-        /* outer */
-        wc_Md5Update(&md5, macSecret, digestSz);
-        wc_Md5Update(&md5, PAD2, padSz);
-        wc_Md5Update(&md5, result, digestSz);
-        wc_Md5Final(&md5, digest);
-    }
-    else {
-        ret = wc_InitSha(&sha);
+        ret =  wc_InitMd5_ex(&md5, ssl->heap, ssl->devId);
         if (ret != 0)
             return ret;
+
         /* inner */
-        wc_ShaUpdate(&sha, macSecret, digestSz);
-        wc_ShaUpdate(&sha, PAD1, padSz);
-        wc_ShaUpdate(&sha, seq, SEQ_SZ);
-        wc_ShaUpdate(&sha, conLen, sizeof(conLen));
+        ret =  wc_Md5Update(&md5, macSecret, digestSz);
+        ret |= wc_Md5Update(&md5, PAD1, padSz);
+        ret |= wc_Md5Update(&md5, seq, SEQ_SZ);
+        ret |= wc_Md5Update(&md5, conLen, sizeof(conLen));
         /* in buffer */
-        wc_ShaUpdate(&sha, in, sz);
-        wc_ShaFinal(&sha, result);
+        ret |= wc_Md5Update(&md5, in, sz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret = wc_Md5Final(&md5, result);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &md5.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
         /* outer */
-        wc_ShaUpdate(&sha, macSecret, digestSz);
-        wc_ShaUpdate(&sha, PAD2, padSz);
-        wc_ShaUpdate(&sha, result, digestSz);
-        wc_ShaFinal(&sha, digest);
+        ret =  wc_Md5Update(&md5, macSecret, digestSz);
+        ret |= wc_Md5Update(&md5, PAD2, padSz);
+        ret |= wc_Md5Update(&md5, result, digestSz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret =  wc_Md5Final(&md5, digest);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &md5.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
+        wc_Md5Free(&md5);
+    }
+    else {
+        ret =  wc_InitSha_ex(&sha, ssl->heap, ssl->devId);
+        if (ret != 0)
+            return ret;
+
+        /* inner */
+        ret =  wc_ShaUpdate(&sha, macSecret, digestSz);
+        ret |= wc_ShaUpdate(&sha, PAD1, padSz);
+        ret |= wc_ShaUpdate(&sha, seq, SEQ_SZ);
+        ret |= wc_ShaUpdate(&sha, conLen, sizeof(conLen));
+        /* in buffer */
+        ret |= wc_ShaUpdate(&sha, in, sz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret = wc_ShaFinal(&sha, result);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &sha.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
+        /* outer */
+        ret =  wc_ShaUpdate(&sha, macSecret, digestSz);
+        ret |= wc_ShaUpdate(&sha, PAD2, padSz);
+        ret |= wc_ShaUpdate(&sha, result, digestSz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret =  wc_ShaFinal(&sha, digest);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &sha.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
+        wc_ShaFree(&sha);
     }
     return 0;
 }
 
 #ifndef NO_CERTS
-static void BuildMD5_CertVerify(WOLFSSL* ssl, byte* digest)
+static int BuildMD5_CertVerify(WOLFSSL* ssl, byte* digest)
 {
+    int ret;
     byte md5_result[MD5_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Md5* md5   = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Md5* md5_2 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    Md5* md5 = (Md5*)XMALLOC(sizeof(Md5), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #else
-        Md5 md5[1];
-        Md5 md5_2[1];
+    Md5  md5[1];
 #endif
 
     /* make md5 inner */
-    md5[0] = ssl->hsHashes->hashMd5 ; /* Save current position */
-    wc_Md5Update(&ssl->hsHashes->hashMd5, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_Md5Update(&ssl->hsHashes->hashMd5, PAD1, PAD_MD5);
-    wc_Md5GetHash(&ssl->hsHashes->hashMd5, md5_result);
-    wc_Md5RestorePos(&ssl->hsHashes->hashMd5, md5) ; /* Restore current position */
+    ret = wc_Md5Copy(&ssl->hsHashes->hashMd5, md5); /* Save current position */
+    if (ret == 0)
+        ret = wc_Md5Update(md5, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, PAD1, PAD_MD5);
+    if (ret == 0)
+        ret = wc_Md5Final(md5, md5_result);
 
     /* make md5 outer */
-    wc_InitMd5(md5_2) ;
-    wc_Md5Update(md5_2, ssl->arrays->masterSecret, SECRET_LEN);
-    wc_Md5Update(md5_2, PAD2, PAD_MD5);
-    wc_Md5Update(md5_2, md5_result, MD5_DIGEST_SIZE);
-
-    wc_Md5Final(md5_2, digest);
+    if (ret == 0) {
+        ret = wc_InitMd5_ex(md5, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_Md5Update(md5, ssl->arrays->masterSecret, SECRET_LEN);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, PAD2, PAD_MD5);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, md5_result, MD5_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_Md5Final(md5, digest);
+            wc_Md5Free(md5);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(md5_2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(md5, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
+
+    return ret;
 }
 
 
-static void BuildSHA_CertVerify(WOLFSSL* ssl, byte* digest)
+static int BuildSHA_CertVerify(WOLFSSL* ssl, byte* digest)
 {
+    int ret;
     byte sha_result[SHA_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Sha* sha   = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Sha* sha2 = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    Sha* sha = (Sha*)XMALLOC(sizeof(Sha), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #else
-        Sha sha[1];
-        Sha sha2[1];
+    Sha  sha[1];
 #endif
 
     /* make sha inner */
-    sha[0] = ssl->hsHashes->hashSha ; /* Save current position */
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, PAD1, PAD_SHA);
-    wc_ShaGetHash(&ssl->hsHashes->hashSha, sha_result);
-    wc_ShaRestorePos(&ssl->hsHashes->hashSha, sha) ; /* Restore current position */
+    ret = wc_ShaCopy(&ssl->hsHashes->hashSha, sha); /* Save current position */
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, PAD1, PAD_SHA);
+    if (ret == 0)
+        ret = wc_ShaFinal(sha, sha_result);
 
     /* make sha outer */
-    wc_InitSha(sha2) ;
-    wc_ShaUpdate(sha2, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(sha2, PAD2, PAD_SHA);
-    wc_ShaUpdate(sha2, sha_result, SHA_DIGEST_SIZE);
-
-    wc_ShaFinal(sha2, digest);
+    if (ret == 0) {
+        ret = wc_InitSha_ex(sha, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, PAD2, PAD_SHA);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, sha_result, SHA_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_ShaFinal(sha, digest);
+            wc_ShaFree(sha);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(sha2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(sha, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
+    return ret;
 }
 #endif /* NO_CERTS */
 #endif /* NO_OLD_TLS */
@@ -10284,10 +10899,16 @@ static int BuildCertHashes(WOLFSSL* ssl, Hashes* hashes)
     (void)hashes;
 
     if (ssl->options.tls) {
-#if ! defined( NO_OLD_TLS )
-        wc_Md5GetHash(&ssl->hsHashes->hashMd5, hashes->md5);
-        wc_ShaGetHash(&ssl->hsHashes->hashSha, hashes->sha);
-#endif
+    #if !defined(NO_MD5) && !defined(NO_OLD_TLS)
+        ret = wc_Md5GetHash(&ssl->hsHashes->hashMd5, hashes->md5);
+        if (ret != 0)
+            return ret;
+    #endif
+    #if !defined(NO_SHA)
+        ret = wc_ShaGetHash(&ssl->hsHashes->hashSha, hashes->sha);
+        if (ret != 0)
+            return ret;
+    #endif
         if (IsAtLeastTLSv1_2(ssl)) {
             #ifndef NO_SHA256
                 ret = wc_Sha256GetHash(&ssl->hsHashes->hashSha256,
@@ -10309,145 +10930,219 @@ static int BuildCertHashes(WOLFSSL* ssl, Hashes* hashes)
             #endif
         }
     }
-#if ! defined( NO_OLD_TLS )
     else {
-        BuildMD5_CertVerify(ssl, hashes->md5);
-        BuildSHA_CertVerify(ssl, hashes->sha);
+    #if !defined(NO_MD5) && !defined(NO_OLD_TLS)
+        ret = BuildMD5_CertVerify(ssl, hashes->md5);
+        if (ret != 0)
+            return ret;
+    #endif
+    #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
+                              defined(WOLFSSL_ALLOW_TLS_SHA1))
+        ret = BuildSHA_CertVerify(ssl, hashes->sha);
+        if (ret != 0)
+            return ret;
+    #endif
     }
-#endif
 
     return ret;
 }
 
 #endif /* WOLFSSL_LEANPSK */
 
+/* Persistable BuildMessage arguments */
+typedef struct BuildMsgArgs {
+    word32 digestSz;
+    word32 sz;
+    word32 pad;
+    word32 idx;
+    word32 headerSz;
+    word16 size;
+    word32 ivSz;      /* TLSv1.1  IV */
+    byte   iv[AES_BLOCK_SIZE]; /* max size */
+} BuildMsgArgs;
+
+static void FreeBuildMsgArgs(WOLFSSL* ssl, void* pArgs)
+{
+    BuildMsgArgs* args = (BuildMsgArgs*)pArgs;
+
+    (void)ssl;
+    (void)args;
+
+    /* no allocations in BuildMessage */
+}
+
 /* Build SSL Message, encrypted */
 int BuildMessage(WOLFSSL* ssl, byte* output, int outSz, const byte* input,
-                 int inSz, int type, int hashOutput, int sizeOnly)
+             int inSz, int type, int hashOutput, int sizeOnly, int asyncOkay)
 {
-    word32 digestSz;
-    word32 sz = RECORD_HEADER_SZ + inSz;
-    word32 pad  = 0, i;
-    word32 idx  = RECORD_HEADER_SZ;
-    word32 ivSz = 0;      /* TLSv1.1  IV */
-    word32 headerSz = RECORD_HEADER_SZ;
-    word16 size;
-    byte               iv[AES_BLOCK_SIZE];                  /* max size */
-    int ret        = 0;
-    int atomicUser = 0;
+    int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    BuildMsgArgs* args = (BuildMsgArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    BuildMsgArgs  args[1];
+#endif
 
-    if (ssl == NULL) {
+    WOLFSSL_ENTER("BuildMessage");
+
+    if (ssl == NULL || output == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    if (!sizeOnly && (output == NULL || input == NULL) ) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* catch mistaken sizeOnly parameter */
-    if (sizeOnly && (output || input) ) {
-        WOLFSSL_MSG("BuildMessage with sizeOnly doesn't need input or output");
-        return BAD_FUNC_ARG;
-    }
-
-    digestSz = ssl->specs.hash_size;
-#ifdef HAVE_TRUNCATED_HMAC
-    if (ssl->truncated_hmac)
-        digestSz = min(TRUNCATED_HMAC_SZ, digestSz);
-#endif
-    sz += digestSz;
-
-#ifdef WOLFSSL_DTLS
-    if (ssl->options.dtls) {
-        sz       += DTLS_RECORD_EXTRA;
-        idx      += DTLS_RECORD_EXTRA;
-        headerSz += DTLS_RECORD_EXTRA;
+    ret = WC_NOT_PENDING_E;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (asyncOkay) {
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.buildMsgState);
+        if (ret != WC_NOT_PENDING_E) {
+            /* Check for error */
+            if (ret < 0)
+                goto exit_buildmsg;
+        }
     }
 #endif
 
-#ifdef ATOMIC_USER
-    if (ssl->ctx->MacEncryptCb)
-        atomicUser = 1;
-#endif
+    /* Reset state */
+    if (ret == WC_NOT_PENDING_E) {
+        ret = 0;
+        ssl->options.buildMsgState = BUILD_MSG_BEGIN;
+        XMEMSET(args, 0, sizeof(BuildMsgArgs));
 
-    if (ssl->specs.cipher_type == block) {
-        word32 blockSz = ssl->specs.block_size;
-        if (ssl->options.tls1_1) {
-            ivSz = blockSz;
-            sz  += ivSz;
+        args->sz = RECORD_HEADER_SZ + inSz;
+        args->idx  = RECORD_HEADER_SZ;
+        args->headerSz = RECORD_HEADER_SZ;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeBuildMsgArgs;
+    #endif
+    }
 
-            if (ivSz > (word32)sizeof(iv))
-                return BUFFER_E;
-
-            if (!sizeOnly) {
-                ret = wc_RNG_GenerateBlock(ssl->rng, iv, ivSz);
-                if (ret != 0)
-                    return ret;
+    switch (ssl->options.buildMsgState) {
+        case BUILD_MSG_BEGIN:
+        {
+            /* catch mistaken sizeOnly parameter */
+            if (!sizeOnly && (output == NULL || input == NULL) ) {
+                return BAD_FUNC_ARG;
+            }
+            if (sizeOnly && (output || input) ) {
+                WOLFSSL_MSG("BuildMessage w/sizeOnly doesn't need input/output");
+                return BAD_FUNC_ARG;
             }
 
+            ssl->options.buildMsgState = BUILD_MSG_SIZE;
         }
-        sz += 1;       /* pad byte */
-        pad = (sz - headerSz) % blockSz;
-        pad = blockSz - pad;
-        sz += pad;
-    }
 
-#ifdef HAVE_AEAD
-    if (ssl->specs.cipher_type == aead) {
-        if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
-            ivSz = AESGCM_EXP_IV_SZ;
+        case BUILD_MSG_SIZE:
+        {
+            args->digestSz = ssl->specs.hash_size;
+        #ifdef HAVE_TRUNCATED_HMAC
+            if (ssl->truncated_hmac)
+                args->digestSz = min(TRUNCATED_HMAC_SZ, args->digestSz);
+        #endif
+            args->sz += args->digestSz;
 
-        sz += (ivSz + ssl->specs.aead_mac_size - digestSz);
-        if (!sizeOnly) {
-            XMEMCPY(iv, ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls) {
+                args->sz       += DTLS_RECORD_EXTRA;
+                args->idx      += DTLS_RECORD_EXTRA;
+                args->headerSz += DTLS_RECORD_EXTRA;
+            }
+        #endif
+
+            if (ssl->specs.cipher_type == block) {
+                word32 blockSz = ssl->specs.block_size;
+                if (ssl->options.tls1_1) {
+                    args->ivSz = blockSz;
+                    args->sz  += args->ivSz;
+
+                    if (args->ivSz > (word32)sizeof(args->iv))
+                        ERROR_OUT(BUFFER_E, exit_buildmsg);
+                }
+                args->sz += 1;       /* pad byte */
+                args->pad = (args->sz - args->headerSz) % blockSz;
+                args->pad = blockSz - args->pad;
+                args->sz += args->pad;
+            }
+
+        #ifdef HAVE_AEAD
+            if (ssl->specs.cipher_type == aead) {
+                if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
+                    args->ivSz = AESGCM_EXP_IV_SZ;
+
+                args->sz += (args->ivSz + ssl->specs.aead_mac_size - args->digestSz);
+            }
+        #endif
+
+            /* done with size calculations */
+            if (sizeOnly)
+                goto exit_buildmsg;
+
+            if (args->sz > (word32)outSz) {
+                WOLFSSL_MSG("Oops, want to write past output buffer size");
+                ERROR_OUT(BUFFER_E, exit_buildmsg);
+            }
+
+            if (args->ivSz > 0) {
+                ret = wc_RNG_GenerateBlock(ssl->rng, args->iv, args->ivSz);
+                if (ret != 0)
+                    goto exit_buildmsg;
+
+            }
+
+        #ifdef HAVE_AEAD
+            if (ssl->specs.cipher_type == aead) {
+                if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
+                    XMEMCPY(args->iv, ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+            }
+        #endif
+
+            args->size = (word16)(args->sz - args->headerSz);    /* include mac and digest */
+            AddRecordHeader(output, args->size, (byte)type, ssl);
+
+            /* write to output */
+            if (args->ivSz) {
+                XMEMCPY(output + args->idx, args->iv,
+                                        min(args->ivSz, sizeof(args->iv)));
+                args->idx += args->ivSz;
+            }
+            XMEMCPY(output + args->idx, input, inSz);
+            args->idx += inSz;
+
+            ssl->options.buildMsgState = BUILD_MSG_HASH;
         }
-    }
-#endif
-    /* done with size calculations */
-    if (sizeOnly) {
-        return sz;
-    }
-    if (sz > (word32)outSz) {
-        WOLFSSL_MSG("Oops, want to write past output buffer size");
-        return BUFFER_E;
-    }
-    size = (word16)(sz - headerSz);    /* include mac and digest */
-    AddRecordHeader(output, size, (byte)type, ssl);
+        case BUILD_MSG_HASH:
+        {
+            word32 i;
 
-    /* write to output */
-    if (ivSz) {
-        XMEMCPY(output + idx, iv, min(ivSz, sizeof(iv)));
-        idx += ivSz;
-    }
-    XMEMCPY(output + idx, input, inSz);
-    idx += inSz;
+            if (type == handshake && hashOutput) {
+                ret = HashOutput(ssl, output, args->headerSz + inSz, args->ivSz);
+                if (ret != 0)
+                    goto exit_buildmsg;
+            }
+            if (ssl->specs.cipher_type == block) {
+                word32 tmpIdx = args->idx + args->digestSz;
 
-    if (type == handshake && hashOutput) {
-        ret = HashOutput(ssl, output, headerSz + inSz, ivSz);
-        if (ret != 0)
-            return ret;
-    }
+                for (i = 0; i <= args->pad; i++)
+                    output[tmpIdx++] = (byte)args->pad; /* pad byte gets pad value */
+            }
 
-    if (ssl->specs.cipher_type == block) {
-        word32 tmpIdx = idx + digestSz;
+            ssl->options.buildMsgState = BUILD_MSG_VERIFY_MAC;
+        }
+        case BUILD_MSG_VERIFY_MAC:
+        {
+            /* User Record Layer Callback handling */
+        #ifdef ATOMIC_USER
+            if (ssl->ctx->MacEncryptCb) {
+                ret = ssl->ctx->MacEncryptCb(ssl, output + args->idx,
+                                output + args->headerSz + args->ivSz, inSz, type, 0,
+                                output + args->headerSz, output + args->headerSz, args->size,
+                                ssl->MacEncryptCtx);
+                goto exit_buildmsg;
+            }
+        #endif
 
-        for (i = 0; i <= pad; i++)
-            output[tmpIdx++] = (byte)pad; /* pad byte gets pad value too */
-    }
-
-    if (atomicUser) {   /* User Record Layer Callback handling */
-#ifdef ATOMIC_USER
-        if ( (ret = ssl->ctx->MacEncryptCb(ssl, output + idx,
-                        output + headerSz + ivSz, inSz, type, 0,
-                        output + headerSz, output + headerSz, size,
-                        ssl->MacEncryptCtx)) != 0)
-            return ret;
-#endif
-    }
-    else {
-        if (ssl->specs.cipher_type != aead) {
-#ifdef HAVE_TRUNCATED_HMAC
-            if (ssl->truncated_hmac && ssl->specs.hash_size > digestSz) {
+            if (ssl->specs.cipher_type != aead) {
+        #ifdef HAVE_TRUNCATED_HMAC
+            if (ssl->truncated_hmac && ssl->specs.hash_size > args->digestSz) {
             #ifdef WOLFSSL_SMALL_STACK
                 byte* hmac = NULL;
             #else
@@ -10455,36 +11150,63 @@ int BuildMessage(WOLFSSL* ssl, byte* output, int outSz, const byte* input,
             #endif
 
             #ifdef WOLFSSL_SMALL_STACK
-                hmac = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL,
+                hmac = (byte*)XMALLOC(MAX_DIGEST_SIZE, ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
                 if (hmac == NULL)
-                    return MEMORY_E;
+                    ERROR_OUT(MEMORY_E, exit_buildmsg);
             #endif
 
-                ret = ssl->hmac(ssl, hmac, output + headerSz + ivSz, inSz,
+                ret = ssl->hmac(ssl, hmac, output + args->headerSz + args->ivSz, inSz,
                                                                        type, 0);
-                XMEMCPY(output + idx, hmac, digestSz);
+                XMEMCPY(output + args->idx, hmac, args->digestSz);
 
             #ifdef WOLFSSL_SMALL_STACK
-                XFREE(hmac, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(hmac, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
             #endif
-            } else
-#endif
-                ret = ssl->hmac(ssl, output+idx, output + headerSz + ivSz, inSz,
-                                                                       type, 0);
-                #ifdef WOLFSSL_DTLS
-                    if (ssl->options.dtls)
-                        DtlsSEQIncrement(ssl, CUR_ORDER);
-                #endif
-        }
-        if (ret != 0)
-            return ret;
+            }
+            else
+        #endif
+                ret = ssl->hmac(ssl, output + args->idx, output + args->headerSz + args->ivSz,
+                                                                inSz, type, 0);
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+            }
+            if (ret != 0)
+                goto exit_buildmsg;
 
-        if ( (ret = Encrypt(ssl, output + headerSz, output+headerSz,size)) != 0)
-            return ret;
+            ssl->options.buildMsgState = BUILD_MSG_ENCRYPT;
+        }
+        case BUILD_MSG_ENCRYPT:
+        {
+            ret = Encrypt(ssl, output + args->headerSz, output + args->headerSz, args->size,
+                asyncOkay);
+            break;
+        }
     }
 
-    return sz;
+exit_buildmsg:
+
+    WOLFSSL_LEAVE("BuildMessage", ret);
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ret == WC_PENDING_E) {
+        return ret;
+    }
+#endif
+
+    /* make sure build message state is reset */
+    ssl->options.buildMsgState = BUILD_MSG_BEGIN;
+
+    /* return sz on success */
+    if (ret == 0)
+        ret = args->sz;
+
+    /* Final cleanup */
+    FreeBuildMsgArgs(ssl, args);
+
+    return ret;
 }
 
 
@@ -10553,7 +11275,7 @@ int SendFinished(WOLFSSL* ssl)
     #endif
 
     sendSz = BuildMessage(ssl, output, outputSz, input, headerSz + finishedSz,
-                          handshake, 1, 0);
+                                                          handshake, 1, 0, 0);
     if (sendSz < 0)
         return BUILD_MSG_ERROR;
 
@@ -10783,7 +11505,7 @@ int SendCertificate(WOLFSSL* ssl)
             }
 
             sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                  handshake, 1, 0);
+                                                          handshake, 1, 0, 0);
 
             if (inputSz > 0)
                 XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -10792,26 +11514,26 @@ int SendCertificate(WOLFSSL* ssl)
                 return sendSz;
         }
         else {
-            #ifdef WOLFSSL_DTLS
-                if (ssl->options.dtls)
-                    DtlsSEQIncrement(ssl, CUR_ORDER);
-            #endif
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls)
+                DtlsSEQIncrement(ssl, CUR_ORDER);
+        #endif
         }
 
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                    return ret;
-            }
-        #endif
+    #ifdef WOLFSSL_DTLS
+        if (IsDtlsNotSctpMode(ssl)) {
+            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                return ret;
+        }
+    #endif
 
-        #ifdef WOLFSSL_CALLBACKS
-            if (ssl->hsInfoOn)
-                AddPacketName("Certificate", &ssl->handShakeInfo);
-            if (ssl->toInfoOn)
-                AddPacketInfo("Certificate", &ssl->timeoutInfo, output, sendSz,
-                               ssl->heap);
-        #endif
+    #ifdef WOLFSSL_CALLBACKS
+        if (ssl->hsInfoOn)
+            AddPacketName("Certificate", &ssl->handShakeInfo);
+        if (ssl->toInfoOn)
+            AddPacketInfo("Certificate", &ssl->timeoutInfo, output, sendSz,
+                           ssl->heap);
+    #endif
 
         ssl->buffers.outputBuffer.length += sendSz;
         if (!ssl->options.groupMessages)
@@ -10986,7 +11708,7 @@ static int BuildCertificateStatus(WOLFSSL* ssl, byte type, buffer* status,
 
             XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
             sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                                               handshake, 1, 0);
+                                                           handshake, 1, 0, 0);
             XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 
             if (sendSz < 0)
@@ -11036,13 +11758,13 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
     (void) ssl;
 
-    #ifdef HAVE_CERTIFICATE_STATUS_REQUEST
-        status_type = ssl->status_request;
-    #endif
+#ifdef HAVE_CERTIFICATE_STATUS_REQUEST
+    status_type = ssl->status_request;
+#endif
 
-    #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
-        status_type = status_type ? status_type : ssl->status_request_v2;
-    #endif
+#ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+    status_type = status_type ? status_type : ssl->status_request_v2;
+#endif
 
     switch (status_type) {
 
@@ -11050,7 +11772,8 @@ int SendCertificateStatus(WOLFSSL* ssl)
     #if defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
      || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
         /* case WOLFSSL_CSR_OCSP: */
-        case WOLFSSL_CSR2_OCSP: {
+        case WOLFSSL_CSR2_OCSP:
+        {
             OcspRequest* request = ssl->ctx->certOcspRequest;
             buffer response;
 
@@ -11072,15 +11795,15 @@ int SendCertificateStatus(WOLFSSL* ssl)
                 if (der->buffer == NULL || der->length == 0)
                     return 0;
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                    if (cert == NULL)
-                        return MEMORY_E;
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), ssl->heap,
+                                                   DYNAMIC_TYPE_TMP_BUFFER);
+                if (cert == NULL)
+                    return MEMORY_E;
+            #endif
 
                 InitDecodedCert(cert, der->buffer, der->length, ssl->heap);
-
+                /* TODO: Setup async support here */
                 if ((ret = ParseCertRelative(cert, CERT_TYPE, VERIFY,
                                                           ssl->ctx->cm)) != 0) {
                     WOLFSSL_MSG("ParseCert failed");
@@ -11091,9 +11814,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
                     if (request == NULL) {
                         FreeDecodedCert(cert);
 
-                        #ifdef WOLFSSL_SMALL_STACK
-                            XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                        #endif
+                    #ifdef WOLFSSL_SMALL_STACK
+                        XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    #endif
 
                         return MEMORY_E;
                     }
@@ -11112,9 +11835,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
                 FreeDecodedCert(cert);
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
             }
 
             if (ret == 0) {
@@ -11142,14 +11865,16 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
             if (request != ssl->ctx->certOcspRequest)
                 XFREE(request, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+
+            break;
         }
-        break;
 
     #endif /* HAVE_CERTIFICATE_STATUS_REQUEST    */
            /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
 
     #if defined HAVE_CERTIFICATE_STATUS_REQUEST_V2
-        case WOLFSSL_CSR2_OCSP_MULTI: {
+        case WOLFSSL_CSR2_OCSP_MULTI:
+        {
             OcspRequest* request = ssl->ctx->certOcspRequest;
             buffer responses[1 + MAX_CHAIN_DEPTH];
             int i = 0;
@@ -11172,15 +11897,15 @@ int SendCertificateStatus(WOLFSSL* ssl)
                 if (der->buffer == NULL || der->length == 0)
                     return 0;
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
-                                                   DYNAMIC_TYPE_TMP_BUFFER);
-                    if (cert == NULL)
-                        return MEMORY_E;
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
+                                               DYNAMIC_TYPE_TMP_BUFFER);
+                if (cert == NULL)
+                    return MEMORY_E;
+            #endif
 
                 InitDecodedCert(cert, der->buffer, der->length, ssl->heap);
-
+                /* TODO: Setup async support here */
                 if ((ret = ParseCertRelative(cert, CERT_TYPE, VERIFY,
                                                           ssl->ctx->cm)) != 0) {
                     WOLFSSL_MSG("ParseCert failed");
@@ -11191,9 +11916,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
                     if (request == NULL) {
                         FreeDecodedCert(cert);
 
-                        #ifdef WOLFSSL_SMALL_STACK
-                            XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                        #endif
+                    #ifdef WOLFSSL_SMALL_STACK
+                        XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    #endif
 
                         return MEMORY_E;
                     }
@@ -11213,9 +11938,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
                 FreeDecodedCert(cert);
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
             }
 
             if (ret == 0) {
@@ -11239,11 +11964,11 @@ int SendCertificateStatus(WOLFSSL* ssl)
                                               || ssl->buffers.weOwnCertChain)) {
                 buffer der;
                 word32 idx = 0;
-                #ifdef WOLFSSL_SMALL_STACK
-                    DecodedCert* cert = NULL;
-                #else
-                    DecodedCert  cert[1];
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                DecodedCert* cert = NULL;
+            #else
+                DecodedCert  cert[1];
+            #endif
 
                 XMEMSET(&der, 0, sizeof(buffer));
 
@@ -11265,7 +11990,7 @@ int SendCertificateStatus(WOLFSSL* ssl)
                         break;
 
                     InitDecodedCert(cert, der.buffer, der.length, ssl->heap);
-
+                    /* TODO: Setup async support here */
                     if ((ret = ParseCertRelative(cert, CERT_TYPE, VERIFY,
                                                       ssl->ctx->cm)) != 0) {
                         WOLFSSL_MSG("ParseCert failed");
@@ -11315,9 +12040,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
                     FreeDecodedCert(cert);
                 }
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
             }
             else {
                 while (ret == 0 &&
@@ -11346,14 +12071,14 @@ int SendCertificateStatus(WOLFSSL* ssl)
                         XFREE(responses[i].buffer, ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
             }
-        }
-        break;
 
+            break;
+        }
     #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
     #endif /* NO_WOLFSSL_SERVER */
 
         default:
-        break;
+            break;
     }
 
     return ret;
@@ -11454,9 +12179,14 @@ int SendData(WOLFSSL* ssl, const void* data, int sz)
         }
 #endif
         sendSz = BuildMessage(ssl, out, outputSz, sendBuffer, buffSz,
-                              application_data, 0, 0);
-        if (sendSz < 0)
+                                                  application_data, 0, 0, 1);
+        if (sendSz < 0) {
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (sendSz == WC_PENDING_E)
+                ssl->error = sendSz;
+        #endif
             return BUILD_MSG_ERROR;
+        }
 
         ssl->buffers.outputBuffer.length += sendSz;
 
@@ -11490,8 +12220,10 @@ int ReceiveData(WOLFSSL* ssl, byte* output, int sz, int peek)
 
     WOLFSSL_ENTER("ReceiveData()");
 
-    if (ssl->error == WANT_READ || ssl->error == WC_PENDING_E)
+    /* reset error state */
+    if (ssl->error == WANT_READ || ssl->error == WC_PENDING_E) {
         ssl->error = 0;
+    }
 
 #ifdef WOLFSSL_DTLS
     if (ssl->options.dtls) {
@@ -11511,10 +12243,12 @@ int ReceiveData(WOLFSSL* ssl, byte* output, int sz, int peek)
         int err;
         WOLFSSL_MSG("Handshake not complete, trying to finish");
         if ( (err = wolfSSL_negotiate(ssl)) != SSL_SUCCESS) {
+        #ifdef WOLFSSL_ASYNC_CRYPT
             /* if async would block return WANT_WRITE */
             if (ssl->error == WC_PENDING_E) {
                 return WOLFSSL_CBIO_ERR_WANT_READ;
             }
+        #endif
             return  err;
         }
     }
@@ -11586,6 +12320,26 @@ int SendAlert(WOLFSSL* ssl, int severity, int type)
     int  outputSz;
     int  dtlsExtra = 0;
 
+#ifdef HAVE_WRITE_DUP
+    if (ssl->dupWrite && ssl->dupSide == READ_DUP_SIDE) {
+        int notifyErr = 0;
+
+        WOLFSSL_MSG("Read dup side cannot write alerts, notifying sibling");
+
+        if (type == close_notify) {
+            notifyErr = ZERO_RETURN;
+        } else if (severity == alert_fatal) {
+            notifyErr = FATAL_ERROR;
+        }
+
+        if (notifyErr != 0) {
+            return NotifyWriteSide(ssl, notifyErr);
+        }
+
+        return 0;
+    }
+#endif
+
     /* if sendalert is called again for nonblocking */
     if (ssl->options.sendAlertState != 0) {
         ret = SendBuffered(ssl);
@@ -11620,7 +12374,7 @@ int SendAlert(WOLFSSL* ssl, int severity, int type)
        other side may not be able to handle it */
     if (IsEncryptionOn(ssl, 1) && ssl->options.handShakeDone)
         sendSz = BuildMessage(ssl, output, outputSz, input, ALERT_SIZE,
-                              alert, 0, 0);
+                                                          alert, 0, 0, 0);
     else {
 
         AddRecordHeader(output, ALERT_SIZE, alert, ssl);
@@ -12022,6 +12776,12 @@ const char* wolfSSL_ERR_reason_error_string(unsigned long e)
     case DECODE_E:
         return "Decode handshake message error";
 
+    case WRITE_DUP_READ_E:
+        return "Write dup write side can't read error";
+
+    case WRITE_DUP_WRITE_E:
+        return "Write dup read side can't write error";
+
     default :
         return "unknown error number";
     }
@@ -13397,11 +14157,11 @@ Set the enabled cipher suites.
 
 @param [out] suites Suites structure.
 @param [in]  list   List of cipher suites, only supports full name from
-                    cipher_name[] delimited by ':'.
+                    cipher_names[] delimited by ':'.
 
 @return true on success, else false.
 */
-int SetCipherList(Suites* suites, const char* list)
+int SetCipherList(WOLFSSL_CTX* ctx, Suites* suites, const char* list)
 {
     int       ret          = 0;
     int       idx          = 0;
@@ -13435,12 +14195,25 @@ int SetCipherList(Suites* suites, const char* list)
 
         for (i = 0; i < suiteSz; i++) {
             if (XSTRNCMP(name, cipher_names[i], sizeof(name)) == 0) {
+            #ifdef WOLFSSL_DTLS
+                /* don't allow stream ciphers with DTLS */
+                if (ctx->method->version.major == DTLS_MAJOR) {
+                    if (XSTRSTR(name, "RC4") ||
+                        XSTRSTR(name, "HC128") ||
+                        XSTRSTR(name, "RABBIT"))
+                    {
+                        WOLFSSL_MSG("Stream ciphers not supported with DTLS");
+                        continue;
+                    }
+
+                }
+            #endif /* WOLFSSL_DTLS */
+
                 suites->suites[idx++] = (XSTRSTR(name, "CHACHA")) ? CHACHA_BYTE
                                       : (XSTRSTR(name, "QSH"))    ? QSH_BYTE
                                       : (XSTRSTR(name, "EC"))     ? ECC_BYTE
                                       : (XSTRSTR(name, "CCM"))    ? ECC_BYTE
                                       : 0x00; /* normal */
-
                 suites->suites[idx++] = (byte)cipher_name_idx[i];
 
                 /* The suites are either ECDSA, RSA, PSK, or Anon. The RSA
@@ -13465,6 +14238,8 @@ int SetCipherList(Suites* suites, const char* list)
         InitSuitesHashSigAlgo(suites, haveECDSAsig, haveRSAsig, haveAnon);
     }
 
+    (void)ctx;
+
     return ret;
 }
 
@@ -13475,10 +14250,21 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
     word32 i;
 
     ssl->suites->sigAlgo = ssl->specs.sig_algo;
-    ssl->suites->hashAlgo = sha_mac;
+
+    /* set defaults */
+    if (IsAtLeastTLSv1_2(ssl)) {
+    #ifdef WOLFSSL_ALLOW_TLS_SHA1
+        ssl->suites->hashAlgo = sha_mac;
+    #else
+        ssl->suites->hashAlgo = sha256_mac;
+    #endif
+    }
+    else {
+        ssl->suites->hashAlgo = sha_mac;
+    }
 
     /* i+1 since peek a byte ahead for type */
-    for (i = 0; (i+1) < hashSigAlgoSz; i += 2) {
+    for (i = 0; (i+1) < hashSigAlgoSz; i += HELLO_EXT_SIGALGO_SZ) {
         if (hashSigAlgo[i+1] == ssl->specs.sig_algo) {
             if (hashSigAlgo[i] == sha_mac) {
                 break;
@@ -13604,7 +14390,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
                 XMEMCPY(info->packets[info->numberPackets].value, data, sz);
             else {
                 info->packets[info->numberPackets].bufferValue =
-                           XMALLOC(sz, heap, DYNAMIC_TYPE_INFO);
+                                    (byte*)XMALLOC(sz, heap, DYNAMIC_TYPE_INFO);
                 if (!info->packets[info->numberPackets].bufferValue)
                     /* let next alloc catch, just don't fill, not fatal here  */
                     info->packets[info->numberPackets].valueSz = 0;
@@ -13741,23 +14527,23 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
         output[idx++] = ssl->version.minor;
         ssl->chVersion = ssl->version;  /* store in case changed */
 
-            /* then random */
+        /* then random */
         if (ssl->options.connectState == CONNECT_BEGIN) {
             ret = wc_RNG_GenerateBlock(ssl->rng, output + idx, RAN_LEN);
             if (ret != 0)
                 return ret;
 
-                /* store random */
+            /* store random */
             XMEMCPY(ssl->arrays->clientRandom, output + idx, RAN_LEN);
         } else {
 #ifdef WOLFSSL_DTLS
-                /* send same random on hello again */
+            /* send same random on hello again */
             XMEMCPY(output + idx, ssl->arrays->clientRandom, RAN_LEN);
 #endif
         }
         idx += RAN_LEN;
 
-            /* then session id */
+        /* then session id */
         output[idx++] = (byte)idSz;
         if (idSz) {
             XMEMCPY(output + idx, ssl->session.sessionID,
@@ -13765,7 +14551,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
             idx += ssl->session.sessionIDSz;
         }
 
-            /* then DTLS cookie */
+        /* then DTLS cookie */
 #ifdef WOLFSSL_DTLS
         if (ssl->options.dtls) {
             byte cookieSz = ssl->arrays->cookieSz;
@@ -13777,13 +14563,13 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
             }
         }
 #endif
-            /* then cipher suites */
+        /* then cipher suites */
         c16toa(ssl->suites->suiteSz, output + idx);
-        idx += 2;
+        idx += OPAQUE16_LEN;
         XMEMCPY(output + idx, &ssl->suites->suites, ssl->suites->suiteSz);
         idx += ssl->suites->suiteSz;
 
-            /* last, compression */
+        /* last, compression */
         output[idx++] = COMP_LEN;
         if (ssl->options.usingCompression)
             output[idx++] = ZLIB_COMPRESSION;
@@ -13838,7 +14624,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
 
             XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
             sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                  handshake, 1, 0);
+                                  handshake, 1, 0, 0);
             XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 
             if (sendSz < 0)
@@ -14395,71 +15181,78 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
 #endif /* HAVE_ECC */
 
 
+/* Persistable DoServerKeyExchange arguments */
+typedef struct DskeArgs {
+    byte*  output; /* not allocated */
+#if !defined(NO_DH) || defined(HAVE_ECC)
+    byte*  verifySig;
+#endif
+    word32 idx;
+    word32 begin;
+#ifndef NO_RSA
+    int    typeH;
+#endif
+#if !defined(NO_DH) || defined(HAVE_ECC)
+    word16 verifySigSz;
+#endif
+    word16 sigSz;
+    byte   sigAlgo;
+} DskeArgs;
+
+static void FreeDskeArgs(WOLFSSL* ssl, void* pArgs)
+{
+    DskeArgs* args = (DskeArgs*)pArgs;
+
+    (void)ssl;
+    (void)args;
+
+#if !defined(NO_DH) || defined(HAVE_ECC)
+    if (args->verifySig) {
+        XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->verifySig = NULL;
+    }
+#endif
+}
+
 static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                word32* inOutIdx, word32 size)
 {
-    int    ret = 0;
-    word16 length = 0;
-    word32 idx = *inOutIdx, begin = *inOutIdx;
-#ifndef NO_RSA
-    int    typeH = 0;
+    int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    DskeArgs* args = (DskeArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    DskeArgs  args[1];
 #endif
-    byte*  output  = NULL;
-    byte   sigAlgo = ssl->specs.sig_algo;
-    word16 sigSz = 0;
-#if !defined(NO_DH) || defined(HAVE_ECC)
-    byte*  verifySig = NULL;
-#endif
-
-    (void)output;
-    (void)sigAlgo;
-    (void)sigSz;
 
     WOLFSSL_ENTER("DoServerKeyExchange");
 
 #ifdef WOLFSSL_ASYNC_CRYPT
-    ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
     if (ret != WC_NOT_PENDING_E) {
-        WOLF_EVENT_TYPE eType = ssl->event.type;
-
-        /* Clear event */
-        XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
         /* Check for error */
-        if (ret < 0) {
+        if (ret < 0)
             goto exit_dske;
-        }
-        else  {
-            /* Restore variables needed for async */
-            idx = ssl->async.idx;
-            length = ssl->async.length;
-            output = ssl->async.output;
-            sigSz = ssl->async.sigSz;
-        #ifndef NO_RSA
-            typeH = ssl->async.hashAlgo;
-        #endif
-            sigAlgo = ssl->async.sigAlgo;
-        #if !defined(NO_DH) || defined(HAVE_ECC)
-            verifySig = ssl->async.data;
-        #endif
-
-            /* Advance key share state if not wolfCrypt */
-            if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                ssl->options.keyShareState++;
-            }
-        }
     }
     else
 #endif
     {
         /* Reset state */
         ret = 0;
-        ssl->options.keyShareState = KEYSHARE_BEGIN;
+        ssl->options.asyncState = TLS_ASYNC_BEGIN;
+        XMEMSET(args, 0, sizeof(DskeArgs));
+        args->idx = *inOutIdx;
+        args->begin = *inOutIdx;
+        args->sigAlgo = ssl->specs.sig_algo;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeDskeArgs;
+    #endif
     }
 
-    switch(ssl->options.keyShareState)
+    switch(ssl->options.asyncState)
     {
-        case KEYSHARE_BEGIN:
+        case TLS_ASYNC_BEGIN:
         {
         #ifdef WOLFSSL_CALLBACKS
             if (ssl->hsInfoOn)
@@ -14474,38 +15267,42 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 case psk_kea:
                 {
                     int srvHintLen;
+                    word16 length;
 
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* get PSK server hint from the wire */
                     srvHintLen = min(length, MAX_PSK_ID_LEN - 1);
-                    XMEMCPY(ssl->arrays->server_hint, input + idx, srvHintLen);
+                    XMEMCPY(ssl->arrays->server_hint, input + args->idx,
+                                                                    srvHintLen);
                     ssl->arrays->server_hint[srvHintLen] = 0;
-                    idx += length;
+                    args->idx += length;
                     break;
                 }
             #endif /* !NO_PSK */
             #ifndef NO_DH
                 case diffie_hellman_kea:
                 {
+                    word16 length;
+
                     /* p */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14516,7 +15313,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     ssl->buffers.serverDH_P.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH);
+                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_P.buffer) {
                         ssl->buffers.serverDH_P.length = length;
                     }
@@ -14524,25 +15321,26 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     ssl->options.dhKeySz = length;
 
                     /* g */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_G.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH);
+                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_G.buffer) {
                         ssl->buffers.serverDH_G.length = length;
                     }
@@ -14550,23 +15348,24 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     /* pub */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_Pub.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH);
+                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_Pub.buffer) {
                         ssl->buffers.serverDH_Pub.length = length;
                     }
@@ -14574,8 +15373,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
                     break;
                 }
             #endif /* !NO_DH */
@@ -14584,25 +15384,27 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 {
                     byte b;
                     int curveId, curveOid;
+                    word16 length;
 
-                    if ((idx - begin) + ENUM_LEN + OPAQUE16_LEN + OPAQUE8_LEN > size) {
+                    if ((args->idx - args->begin) + ENUM_LEN + OPAQUE16_LEN +
+                                                        OPAQUE8_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    b = input[idx++];
+                    b = input[args->idx++];
                     if (b != named_curve) {
                         ERROR_OUT(ECC_CURVETYPE_ERROR, exit_dske);
                     }
 
-                    idx += 1;   /* curve type, eat leading 0 */
-                    b = input[idx++];
+                    args->idx += 1;   /* curve type, eat leading 0 */
+                    b = input[args->idx++];
                     if ((curveOid = CheckCurveId(b)) < 0) {
                         ERROR_OUT(ECC_CURVE_ERROR, exit_dske);
                     }
                     ssl->ecdhCurveOID = curveOid;
 
-                    length = input[idx++];
-                    if ((idx - begin) + length > size) {
+                    length = input[args->idx++];
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14629,12 +15431,12 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     curveId = wc_ecc_get_oid(curveOid, NULL, NULL);
-                    if (wc_ecc_import_x963_ex(input + idx, length,
+                    if (wc_ecc_import_x963_ex(input + args->idx, length,
                                         ssl->peerEccKey, curveId) != 0) {
                         ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
                     }
 
-                    idx += length;
+                    args->idx += length;
                     ssl->peerEccKeyPresent = 1;
                     break;
                 }
@@ -14643,33 +15445,35 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 case dhe_psk_kea:
                 {
                     int srvHintLen;
+                    word16 length;
 
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* get PSK server hint from the wire */
                     srvHintLen = min(length, MAX_PSK_ID_LEN - 1);
-                    XMEMCPY(ssl->arrays->server_hint, input + idx, srvHintLen);
+                    XMEMCPY(ssl->arrays->server_hint, input + args->idx,
+                                                                srvHintLen);
                     ssl->arrays->server_hint[srvHintLen] = 0;
-                    idx += length;
+                    args->idx += length;
 
                     /* p */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14680,7 +15484,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     ssl->buffers.serverDH_P.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_DH);
+                                                ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_P.buffer) {
                         ssl->buffers.serverDH_P.length = length;
                     }
@@ -14688,25 +15492,26 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     ssl->options.dhKeySz = length;
 
                     /* g */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_G.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_DH);
+                                                ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_G.buffer) {
                         ssl->buffers.serverDH_G.length = length;
                     }
@@ -14714,23 +15519,24 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     /* pub */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_Pub.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_DH);
+                                                ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_Pub.buffer) {
                         ssl->buffers.serverDH_Pub.length = length;
                     }
@@ -14738,8 +15544,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
                     break;
                 }
             #endif /* !NO_DH || !NO_PSK */
@@ -14749,75 +15556,78 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     byte b;
                     int curveOid, curveId;
                     int srvHintLen;
+                    word16 length;
 
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* get PSK server hint from the wire */
                     srvHintLen = min(length, MAX_PSK_ID_LEN - 1);
-                    XMEMCPY(ssl->arrays->server_hint, input + idx, srvHintLen);
+                    XMEMCPY(ssl->arrays->server_hint, input + args->idx, srvHintLen);
                     ssl->arrays->server_hint[srvHintLen] = 0;
 
-                    idx += length;
+                    args->idx += length;
 
-                    if ((idx - begin) + ENUM_LEN + OPAQUE16_LEN +
+                    if ((args->idx - args->begin) + ENUM_LEN + OPAQUE16_LEN +
                         OPAQUE8_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* Check curve name and ID */
-                    b = input[idx++];
+                    b = input[args->idx++];
                     if (b != named_curve) {
                         ERROR_OUT(ECC_CURVETYPE_ERROR, exit_dske);
                     }
 
-                    idx += 1;   /* curve type, eat leading 0 */
-                    b = input[idx++];
+                    args->idx += 1;   /* curve type, eat leading 0 */
+                    b = input[args->idx++];
                     if ((curveOid = CheckCurveId(b)) < 0) {
                         ERROR_OUT(ECC_CURVE_ERROR, exit_dske);
                     }
 
-                    length = input[idx++];
-                    if ((idx - begin) + length > size) {
+                    length = input[args->idx++];
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     if (ssl->peerEccKey == NULL) {
                         /* alloc/init on demand */
                         ssl->peerEccKey = (ecc_key*)XMALLOC(sizeof(ecc_key),
-                                                     ssl->heap, DYNAMIC_TYPE_ECC);
+                                                   ssl->heap, DYNAMIC_TYPE_ECC);
                         if (ssl->peerEccKey == NULL) {
                             WOLFSSL_MSG("PeerEccKey Memory error");
                             ERROR_OUT(MEMORY_E, exit_dske);
                         }
-                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap, ssl->devId);
+                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
+                                                                    ssl->devId);
                         if (ret != 0) {
                             goto exit_dske;
                         }
                     } else if (ssl->peerEccKeyPresent) {  /* don't leak on reuse */
                         wc_ecc_free(ssl->peerEccKey);
                         ssl->peerEccKeyPresent = 0;
-                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap, ssl->devId);
+                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
+                                                                    ssl->devId);
                         if (ret != 0) {
                             goto exit_dske;
                         }
                     }
 
                     curveId = wc_ecc_get_oid(curveOid, NULL, NULL);
-                    if (wc_ecc_import_x963_ex(input + idx, length,
+                    if (wc_ecc_import_x963_ex(input + args->idx, length,
                         ssl->peerEccKey, curveId) != 0) {
                         ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
                     }
 
-                    idx += length;
+                    args->idx += length;
                     ssl->peerEccKeyPresent = 1;
                     break;
                 }
@@ -14832,10 +15642,10 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_BUILD;
-        } /* case KEYSHARE_BEGIN */
+            ssl->options.asyncState = TLS_ASYNC_BUILD;
+        } /* case TLS_ASYNC_BEGIN */
 
-        case KEYSHARE_BUILD:
+        case TLS_ASYNC_BUILD:
         {
             switch(ssl->specs.kea)
             {
@@ -14861,34 +15671,35 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         break;
                     }
 
-                    verifySz = (word16)(idx - begin);
+                    verifySz = (word16)(args->idx - args->begin);
                     if (verifySz > MAX_DH_SZ) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     if (IsAtLeastTLSv1_2(ssl)) {
-                        if ((idx - begin) + ENUM_LEN + ENUM_LEN > size) {
+                        if ((args->idx - args->begin) + ENUM_LEN + ENUM_LEN >
+                                                                        size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dske);
                         }
 
-                        hashAlgo = input[idx++];
-                        sigAlgo  = input[idx++];
+                        hashAlgo = input[args->idx++];
+                        args->sigAlgo  = input[args->idx++];
 
                         switch (hashAlgo) {
                             case sha512_mac:
-                                #ifdef WOLFSSL_SHA512
-                                    hashType = WC_HASH_TYPE_SHA512;
-                                #endif
+                            #ifdef WOLFSSL_SHA512
+                                hashType = WC_HASH_TYPE_SHA512;
+                            #endif
                                 break;
                             case sha384_mac:
-                                #ifdef WOLFSSL_SHA384
-                                    hashType = WC_HASH_TYPE_SHA384;
-                                #endif
+                            #ifdef WOLFSSL_SHA384
+                                hashType = WC_HASH_TYPE_SHA384;
+                            #endif
                                 break;
                             case sha256_mac:
-                                #ifndef NO_SHA256
-                                    hashType = WC_HASH_TYPE_SHA256;
-                                #endif
+                            #ifndef NO_SHA256
+                                hashType = WC_HASH_TYPE_SHA256;
+                            #endif
                                 break;
                             case sha_mac:
                                 #if !defined(NO_SHA) && \
@@ -14909,7 +15720,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         /* only using sha and md5 for rsa */
                         #ifndef NO_OLD_TLS
                             hashType = WC_HASH_TYPE_SHA;
-                            if (sigAlgo == rsa_sa_algo) {
+                            if (args->sigAlgo == rsa_sa_algo) {
                                 hashType = WC_HASH_TYPE_MD5_SHA;
                             }
                         #else
@@ -14917,18 +15728,18 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         #endif
                     }
                 #ifndef NO_RSA
-                    typeH = wc_HashGetOID(hashType);
+                    args->typeH = wc_HashGetOID(hashType);
                 #endif
 
                     /* signature */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &args->verifySigSz);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + args->verifySigSz > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14955,7 +15766,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     XMEMCPY(&ssl->buffers.sig.buffer[RAN_LEN],
                         ssl->arrays->serverRandom, RAN_LEN);
                     XMEMCPY(&ssl->buffers.sig.buffer[RAN_LEN * 2],
-                        input + begin, verifySz); /* message */
+                        input + args->begin, verifySz); /* message */
 
                     /* Perform hash */
                     ret = wc_Hash(hashType,
@@ -14965,7 +15776,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         goto exit_dske;
                     }
 
-                    switch (sigAlgo)
+                    switch (args->sigAlgo)
                     {
                     #ifndef NO_RSA
                         case rsa_sa_algo:
@@ -14989,7 +15800,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
 
                     default:
                         ret = ALGO_ID_E;
-                    } /* switch (sigAlgo) */
+                    } /* switch (args->sigAlgo) */
 
             #endif /* NO_DH && !HAVE_ECC */
                     break;
@@ -15004,10 +15815,10 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_DO;
-        } /* case KEYSHARE_BUILD */
+            ssl->options.asyncState = TLS_ASYNC_DO;
+        } /* case TLS_ASYNC_BUILD */
 
-        case KEYSHARE_DO:
+        case TLS_ASYNC_DO:
         {
             switch(ssl->specs.kea)
             {
@@ -15029,23 +15840,24 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         break;
                     }
 
-                    if (verifySig == NULL) {
-                        verifySig = (byte*)XMALLOC(length, ssl->heap,
-                                                    DYNAMIC_TYPE_TMP_BUFFER);
-                        if (!verifySig) {
+                    if (args->verifySig == NULL) {
+                        args->verifySig = (byte*)XMALLOC(args->verifySigSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->verifySig == NULL) {
                             ERROR_OUT(MEMORY_E, exit_dske);
                         }
-                        XMEMCPY(verifySig, input + idx, length);
+                        XMEMCPY(args->verifySig, input + args->idx,
+                                                            args->verifySigSz);
                     }
 
-                    switch (sigAlgo)
+                    switch (args->sigAlgo)
                     {
                     #ifndef NO_RSA
                         case rsa_sa_algo:
                         {
                             ret = RsaVerify(ssl,
-                                verifySig, length,
-                                &output,
+                                args->verifySig, args->verifySigSz,
+                                &args->output,
                                 ssl->peerRsaKey,
                             #ifdef HAVE_PK_CALLBACKS
                                 ssl->buffers.peerRsaKey.buffer,
@@ -15057,7 +15869,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                             );
 
                             if (ret >= 0) {
-                                sigSz = (word16)ret;
+                                args->sigSz = (word16)ret;
                                 ret = 0;
                             }
                             break;
@@ -15067,7 +15879,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         case ecc_dsa_sa_algo:
                         {
                             ret = EccVerify(ssl,
-                                verifySig, length,
+                                args->verifySig, args->verifySigSz,
                                 ssl->buffers.digest.buffer,
                                 ssl->buffers.digest.length,
                                 ssl->peerEccDsaKey,
@@ -15079,6 +15891,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                 NULL, 0, NULL
                             #endif
                             );
+
                             break;
                         }
                     #endif /* HAVE_ECC */
@@ -15099,10 +15912,10 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_VERIFY;
-        } /* case KEYSHARE_DO */
+            ssl->options.asyncState = TLS_ASYNC_VERIFY;
+        } /* case TLS_ASYNC_DO */
 
-        case KEYSHARE_VERIFY:
+        case TLS_ASYNC_VERIFY:
         {
             switch(ssl->specs.kea)
             {
@@ -15125,9 +15938,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     /* increment index after verify is done */
-                    idx += length;
+                    args->idx += args->verifySigSz;
 
-                    switch(sigAlgo)
+                    switch(args->sigAlgo)
                     {
                     #ifndef NO_RSA
                         case rsa_sa_algo:
@@ -15150,9 +15963,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
 
                                 encSigSz = wc_EncodeSignature(encodedSig,
                                     ssl->buffers.digest.buffer,
-                                    ssl->buffers.digest.length, typeH);
-                                if (encSigSz != sigSz || !output ||
-                                    XMEMCMP(output, encodedSig,
+                                    ssl->buffers.digest.length, args->typeH);
+                                if (encSigSz != args->sigSz || !args->output ||
+                                    XMEMCMP(args->output, encodedSig,
                                             min(encSigSz, MAX_ENCODED_SIG_SZ)) != 0) {
                                     ret = VERIFY_SIGN_ERROR;
                                 }
@@ -15163,9 +15976,11 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                     goto exit_dske;
                                 }
                             }
-                            else if (sigSz != FINISHED_SZ || !output ||
-                                XMEMCMP(output, ssl->buffers.digest.buffer,
-                                                        FINISHED_SZ) != 0) {
+                            else if (args->sigSz != FINISHED_SZ ||
+                                    !args->output ||
+                                    XMEMCMP(args->output,
+                                            ssl->buffers.digest.buffer,
+                                            FINISHED_SZ) != 0) {
                                 ERROR_OUT(VERIFY_SIGN_ERROR, exit_dske);
                             }
                             break;
@@ -15192,13 +16007,13 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_FINALIZE;
-        } /* case KEYSHARE_VERIFY */
+            ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+        } /* case TLS_ASYNC_VERIFY */
 
-        case KEYSHARE_FINALIZE:
+        case TLS_ASYNC_FINALIZE:
         {
             if (IsEncryptionOn(ssl, 0)) {
-                idx += ssl->keys.padSz;
+                args->idx += ssl->keys.padSz;
             }
 
             /* QSH extensions */
@@ -15208,17 +16023,17 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 int    qshSz;
 
                 /* extension name */
-                ato16(input + idx, &name);
-                idx += OPAQUE16_LEN;
+                ato16(input + args->idx, &name);
+                args->idx += OPAQUE16_LEN;
 
                 if (name == TLSX_QUANTUM_SAFE_HYBRID) {
                     /* if qshSz is larger than 0 it is the length of
                        buffer used */
-                    if ((qshSz = TLSX_QSHCipher_Parse(ssl, input + idx,
+                    if ((qshSz = TLSX_QSHCipher_Parse(ssl, input + args->idx,
                                                        size, 0)) < 0) {
                         ERROR_OUT(qshSz, exit_dske);
                     }
-                    idx += qshSz;
+                    args->idx += qshSz;
                 }
                 else {
                     /* unknown extension sent server ignored handshake */
@@ -15233,63 +16048,37 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_END;
-        } /* case KEYSHARE_FINALIZE */
+            ssl->options.asyncState = TLS_ASYNC_END;
+        } /* case TLS_ASYNC_FINALIZE */
 
-        case KEYSHARE_END:
+        case TLS_ASYNC_END:
         {
             /* return index */
-            *inOutIdx = idx;
+            *inOutIdx = args->idx;
 
             ssl->options.serverState = SERVER_KEYEXCHANGE_COMPLETE;
             break;
         }
         default:
             ret = INPUT_CASE_ERROR;
-    } /* switch(ssl->options.keyShareState) */
+    } /* switch(ssl->options.asyncState) */
 
 exit_dske:
 
     WOLFSSL_LEAVE("DoServerKeyExchange", ret);
 
-    /* Handle cleanup for stack variables here */
-
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* Handle WC_PENDING_E */
+    /* Handle async operation */
     if (ret == WC_PENDING_E) {
-        /* Store variables needed for async */
-        XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-        ssl->async.idx = idx;
-        ssl->async.length = length;
-        ssl->async.output = output;
-        ssl->async.sigSz = sigSz;
-    #ifndef NO_RSA
-        ssl->async.hashAlgo = typeH;
-    #endif
-        ssl->async.sigAlgo = sigAlgo;
-    #if !defined(NO_DH) || defined(HAVE_ECC)
-        ssl->async.data = verifySig;
-    #endif
-
         /* Mark message as not recevied so it can process again */
         ssl->msgsReceived.got_server_key_exchange = 0;
 
-        /* Push event to queue */
-        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-        if (ret == 0) {
-            return WC_PENDING_E;
-        }
+        return ret;
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-#if !defined(NO_DH) || defined(HAVE_ECC)
-    if (verifySig) {
-        XFREE(verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        verifySig = NULL;
-    }
-#endif
-
     /* Final cleanup */
+    FreeDskeArgs(ssl, args);
     FreeKeyExchange(ssl);
 
     return ret;
@@ -15430,6 +16219,11 @@ static int QSH_Encrypt(QSHKey* key, byte* in, word32 szIn,
     int ret = 0;
     word16 size = *szOut;
 
+    (void)in;
+    (void)szIn;
+    (void)out;
+    (void)szOut;
+
     WOLFSSL_MSG("Encrypting QSH key material");
 
     switch (key->name) {
@@ -15452,12 +16246,16 @@ static int QSH_Encrypt(QSHKey* key, byte* in, word32 szIn,
 
 
 /* Decrypt using Quantum Safe Handshake algorithms */
-int QSH_Decrypt(QSHKey* key, byte* in, word32 szIn,
-                                                       byte* out, word16* szOut)
+int QSH_Decrypt(QSHKey* key, byte* in, word32 szIn, byte* out, word16* szOut)
 {
     int ret = 0;
     word16 size = *szOut;
 
+    (void)in;
+    (void)szIn;
+    (void)out;
+    (void)szOut;
+
     WOLFSSL_MSG("Decrypting QSH key material");
 
     switch (key->name) {
@@ -15484,12 +16282,14 @@ int QSH_Decrypt(QSHKey* key, byte* in, word32 szIn,
  */
 static word32 QSH_MaxSecret(QSHKey* key)
 {
+    int ret = 0;
+#ifdef HAVE_NTRU
     byte isNtru = 0;
     word16 inSz = 48;
     word16 outSz;
     DRBG_HANDLE drbg = 0;
     byte bufIn[48];
-    int ret = 0;
+#endif
 
     if (key == NULL || key->pub.length == 0)
         return 0;
@@ -15511,6 +16311,7 @@ static word32 QSH_MaxSecret(QSHKey* key)
             return 0;
     }
 
+#ifdef HAVE_NTRU
     if (isNtru) {
         ret = ntru_crypto_drbg_external_instantiate(GetEntropy, &drbg);
         if (ret != DRBG_OK)
@@ -15521,10 +16322,11 @@ static word32 QSH_MaxSecret(QSHKey* key)
             return NTRU_ENCRYPT_ERROR;
         }
         ntru_crypto_drbg_uninstantiate(drbg);
-        return outSz;
+        ret = outSz;
     }
+#endif
 
-    return 0;
+    return ret;
 }
 
 /* Generate the secret byte material for pms
@@ -15665,59 +16467,67 @@ static word32 QSH_KeyExchangeWrite(WOLFSSL* ssl, byte isServer)
 #endif /* HAVE_QSH */
 
 
+typedef struct SckeArgs {
+    byte*  output; /* not allocated */
+    byte*  encSecret;
+    byte*  input;
+    word32 encSz;
+    word32 length;
+    int    sendSz;
+    int    inputSz;
+} SckeArgs;
+
+static void FreeSckeArgs(WOLFSSL* ssl, void* pArgs)
+{
+    SckeArgs* args = (SckeArgs*)pArgs;
+
+    (void)ssl;
+
+    if (args->encSecret) {
+        XFREE(args->encSecret, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->encSecret = NULL;
+    }
+    if (args->input) {
+        XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->input = NULL;
+    }
+}
+
 int SendClientKeyExchange(WOLFSSL* ssl)
 {
     int ret = 0;
-    int sendSz = 0;
-    word32 length_lcl = 0;
-    word32* length = &length_lcl;
-    byte* output = NULL;
-    byte* encSecret = NULL;
-    word32 encSz = 0;
-
-    (void)length;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    SckeArgs* args = (SckeArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    SckeArgs  args[1];
+#endif
 
     WOLFSSL_ENTER("SendClientKeyExchange");
 
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* use async pointer for length */
-    length = &ssl->async.length;
-
-    ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
     if (ret != WC_NOT_PENDING_E) {
-        WOLF_EVENT_TYPE eType = ssl->event.type;
-
-        /* Clear event */
-        XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
         /* Check for error */
-        if (ret < 0) {
+        if (ret < 0)
             goto exit_scke;
-        }
-        else {
-            /* Restore variables needed for async */
-            output = ssl->async.output;
-            sendSz = ssl->async.sendSz;
-            encSecret = ssl->async.data;
-            encSz = ssl->async.sigSz;
-
-            /* Advance key share state if not wolfCrypt */
-            if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                ssl->options.keyShareState++;
-            }
-        }
     }
     else
 #endif
     {
         /* Reset state */
         ret = 0;
-        ssl->options.keyShareState = KEYSHARE_BEGIN;
+        ssl->options.asyncState = TLS_ASYNC_BEGIN;
+        XMEMSET(args, 0, sizeof(SckeArgs));
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeSckeArgs;
+    #endif
     }
 
-    switch(ssl->options.keyShareState)
+    switch(ssl->options.asyncState)
     {
-        case KEYSHARE_BEGIN:
+        case TLS_ASYNC_BEGIN:
         {
             switch (ssl->specs.kea) {
             #ifndef NO_RSA
@@ -15783,20 +16593,14 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 #endif
 
                     /* create private key */
-                    ssl->sigKey = XMALLOC(sizeof(ecc_key),
-                                               ssl->heap, DYNAMIC_TYPE_ECC);
-                    if (ssl->sigKey == NULL) {
-                        ERROR_OUT(MEMORY_E, exit_scke);
-                    }
-                    ssl->sigType = DYNAMIC_TYPE_ECC;
-
-                    ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap,
-                                                                ssl->devId);
+                    ssl->hsType = DYNAMIC_TYPE_ECC;
+                    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                     if (ret != 0) {
                         goto exit_scke;
                     }
-                    ret = EccMakeKey(ssl, (ecc_key*)ssl->sigKey,
-                                                            ssl->peerEccKey);
+
+                    ret = EccMakeKey(ssl, (ecc_key*)ssl->hsKey, ssl->peerEccKey);
+
                     break;
             #endif /* HAVE_ECC && !NO_PSK */
             #ifdef HAVE_NTRU
@@ -15839,19 +16643,14 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
 
                     /* create private key */
-                    ssl->sigKey = XMALLOC(sizeof(ecc_key),
-                                               ssl->heap, DYNAMIC_TYPE_ECC);
-                    if (ssl->sigKey == NULL) {
-                        ERROR_OUT(MEMORY_E, exit_scke);
-                    }
-                    ssl->sigType = DYNAMIC_TYPE_ECC;
-
-                    ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap,
-                                                                ssl->devId);
+                    ssl->hsType = DYNAMIC_TYPE_ECC;
+                    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                     if (ret != 0) {
                         goto exit_scke;
                     }
-                    ret = EccMakeKey(ssl, (ecc_key*)ssl->sigKey, peerKey);
+
+                    ret = EccMakeKey(ssl, (ecc_key*)ssl->hsKey, peerKey);
+
                     break;
                 }
             #endif /* HAVE_ECC */
@@ -15866,15 +16665,15 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_BUILD;
-        } /* case KEYSHARE_BEGIN */
+            ssl->options.asyncState = TLS_ASYNC_BUILD;
+        } /* case TLS_ASYNC_BEGIN */
 
-        case KEYSHARE_BUILD:
+        case TLS_ASYNC_BUILD:
         {
-            encSz = MAX_ENCRYPT_SZ;
-            encSecret = (byte*)XMALLOC(MAX_ENCRYPT_SZ, ssl->heap,
-                                                   DYNAMIC_TYPE_TMP_BUFFER);
-            if (encSecret == NULL) {
+            args->encSz = MAX_ENCRYPT_SZ;
+            args->encSecret = (byte*)XMALLOC(args->encSz, ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+            if (args->encSecret == NULL) {
                 ERROR_OUT(MEMORY_E, exit_scke);
             }
 
@@ -15904,6 +16703,26 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     if (ssl->buffers.sig.buffer == NULL) {
                         ERROR_OUT(MEMORY_E, exit_scke);
                     }
+
+                    ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                        ssl->buffers.serverDH_P.buffer,
+                        ssl->buffers.serverDH_P.length,
+                        ssl->buffers.serverDH_G.buffer,
+                        ssl->buffers.serverDH_G.length);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    /* for DH, encSecret is Yc, agree is pre-master */
+                    ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                        args->encSecret, &args->encSz);
                     break;
                 }
             #endif /* !NO_DH */
@@ -15918,23 +16737,24 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         ssl->arrays->psk_keySz > MAX_PSK_KEY_LEN) {
                         ERROR_OUT(PSK_KEY_ERROR, exit_scke);
                     }
-                    encSz = (word32)XSTRLEN(ssl->arrays->client_identity);
-                    if (encSz > MAX_PSK_ID_LEN) {
+                    args->encSz = (word32)XSTRLEN(ssl->arrays->client_identity);
+                    if (args->encSz > MAX_PSK_ID_LEN) {
                         ERROR_OUT(CLIENT_ID_ERROR, exit_scke);
                     }
-                    XMEMCPY(encSecret,
-                        ssl->arrays->client_identity, encSz);
+                    XMEMCPY(args->encSecret, ssl->arrays->client_identity,
+                                                                args->encSz);
 
                     /* make psk pre master secret */
                     /* length of key + length 0s + length of key + key */
                     c16toa((word16)ssl->arrays->psk_keySz, pms);
-                    pms += 2;
+                    pms += OPAQUE16_LEN;
                     XMEMSET(pms, 0, ssl->arrays->psk_keySz);
                     pms += ssl->arrays->psk_keySz;
                     c16toa((word16)ssl->arrays->psk_keySz, pms);
-                    pms += 2;
+                    pms += OPAQUE16_LEN;
                     XMEMCPY(pms, ssl->arrays->psk_key, ssl->arrays->psk_keySz);
-                    ssl->arrays->preMasterSz = ssl->arrays->psk_keySz * 2 + 4;
+                    ssl->arrays->preMasterSz = (ssl->arrays->psk_keySz * 2) +
+                        (2 * OPAQUE16_LEN);
                     ForceZero(ssl->arrays->psk_key, ssl->arrays->psk_keySz);
                     ssl->arrays->psk_keySz = 0; /* No further need */
                     break;
@@ -15944,7 +16764,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 case dhe_psk_kea:
                 {
                     word32 esSz = 0;
-                    output = encSecret;
+                    args->output = args->encSecret;
 
                     ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
                          ssl->arrays->server_hint, ssl->arrays->client_identity,
@@ -15966,13 +16786,33 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         ERROR_OUT(MEMORY_E, exit_scke);
                     }
 
-                    c16toa((word16)esSz, output);
-                    output += OPAQUE16_LEN;
-                    XMEMCPY(output, ssl->arrays->client_identity, esSz);
-                    output += esSz;
-                    encSz = esSz + OPAQUE16_LEN;
+                    c16toa((word16)esSz, args->output);
+                    args->output += OPAQUE16_LEN;
+                    XMEMCPY(args->output, ssl->arrays->client_identity, esSz);
+                    args->output += esSz;
+                    args->encSz = esSz + OPAQUE16_LEN;
 
-                    *length = 0;
+                    args->length = 0;
+
+                    ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                        ssl->buffers.serverDH_P.buffer,
+                        ssl->buffers.serverDH_P.length,
+                        ssl->buffers.serverDH_G.buffer,
+                        ssl->buffers.serverDH_G.length);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    /* for DH, encSecret is Yc, agree is pre-master */
+                    ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                        args->output + OPAQUE16_LEN, &args->length);
                     break;
                 }
             #endif /* !NO_DH && !NO_PSK */
@@ -15980,7 +16820,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 case ecdhe_psk_kea:
                 {
                     word32 esSz = 0;
-                    output = encSecret;
+                    args->output = args->encSecret;
 
                     /* Send PSK client identity */
                     ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
@@ -15997,14 +16837,18 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
 
                     /* place size and identity in output buffer sz:identity */
-                    c16toa((word16)esSz, output);
-                    output += OPAQUE16_LEN;
-                    XMEMCPY(output, ssl->arrays->client_identity, esSz);
-                    output += esSz;
-                    encSz = esSz + OPAQUE16_LEN;
+                    c16toa((word16)esSz, args->output);
+                    args->output += OPAQUE16_LEN;
+                    XMEMCPY(args->output, ssl->arrays->client_identity, esSz);
+                    args->output += esSz;
+                    args->encSz = esSz + OPAQUE16_LEN;
 
                     /* length is used for public key size */
-                    *length = MAX_ENCRYPT_SZ;
+                    args->length = MAX_ENCRYPT_SZ;
+
+                    /* Create shared ECC key leaving room at the begining
+                       of buffer for size of shared key. */
+                    ssl->arrays->preMasterSz = ENCRYPT_LEN - OPAQUE16_LEN;
 
                 #ifdef HAVE_PK_CALLBACKS
                     /* if callback then use it for shared secret */
@@ -16013,12 +16857,13 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
                 #endif
 
-                    /* Place ECC key in buffer, leaving room for size */
-                    ret = wc_ecc_export_x963((ecc_key*)ssl->sigKey,
-                                            output + OPAQUE8_LEN, length);
+                    /* Place ECC key in output buffer, leaving room for size */
+                    ret = wc_ecc_export_x963((ecc_key*)ssl->hsKey,
+                                    args->output + OPAQUE8_LEN, &args->length);
                     if (ret != 0) {
                         ERROR_OUT(ECC_EXPORT_ERROR, exit_scke);
                     }
+
                     break;
                 }
             #endif /* HAVE_ECC && !NO_PSK */
@@ -16032,14 +16877,16 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
 
                     ssl->arrays->preMasterSz = SECRET_LEN;
-                    encSz = MAX_ENCRYPT_SZ;
+                    args->encSz = MAX_ENCRYPT_SZ;
                     break;
                 }
             #endif /* HAVE_NTRU */
             #ifdef HAVE_ECC
                 case ecc_diffie_hellman_kea:
                 {
-                #ifdef HAVE_PK_CALLBACKS
+                    ssl->arrays->preMasterSz = ENCRYPT_LEN;
+
+                 #ifdef HAVE_PK_CALLBACKS
                     /* if callback then use it for shared secret */
                     if (ssl->ctx->EccSharedSecretCb != NULL) {
                         break;
@@ -16047,8 +16894,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 #endif
 
                     /* Place ECC key in buffer, leaving room for size */
-                    ret = wc_ecc_export_x963((ecc_key*)ssl->sigKey,
-                                        encSecret + OPAQUE8_LEN, &encSz);
+                    ret = wc_ecc_export_x963((ecc_key*)ssl->hsKey,
+                                args->encSecret + OPAQUE8_LEN, &args->encSz);
                     if (ret != 0) {
                         ERROR_OUT(ECC_EXPORT_ERROR, exit_scke);
                     }
@@ -16066,10 +16913,10 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_DO;
-        } /* case KEYSHARE_BUILD */
+            ssl->options.asyncState = TLS_ASYNC_DO;
+        } /* case TLS_ASYNC_BUILD */
 
-        case KEYSHARE_DO:
+        case TLS_ASYNC_DO:
         {
             switch(ssl->specs.kea)
             {
@@ -16078,7 +16925,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 {
                     ret = RsaEnc(ssl,
                         ssl->arrays->preMasterSecret, SECRET_LEN,
-                        encSecret, &encSz,
+                        args->encSecret, &args->encSz,
                         ssl->peerRsaKey,
                     #if defined(HAVE_PK_CALLBACKS)
                         ssl->buffers.peerRsaKey.buffer,
@@ -16088,19 +16935,15 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         NULL, 0, NULL
                     #endif
                     );
+
                     break;
                 }
             #endif /* !NO_RSA */
             #ifndef NO_DH
                 case diffie_hellman_kea:
                 {
-                    ret = DhAgree(ssl,
-                        ssl->buffers.serverDH_P.buffer,
-                        ssl->buffers.serverDH_P.length,
-                        ssl->buffers.serverDH_G.buffer,
-                        ssl->buffers.serverDH_G.length,
-                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
-                        encSecret, &encSz,
+                    ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, ssl->buffers.sig.length,
                         ssl->buffers.serverDH_Pub.buffer,
                         ssl->buffers.serverDH_Pub.length,
                         ssl->arrays->preMasterSecret,
@@ -16117,13 +16960,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #if !defined(NO_DH) && !defined(NO_PSK)
                 case dhe_psk_kea:
                 {
-                    ret = DhAgree(ssl,
-                        ssl->buffers.serverDH_P.buffer,
-                        ssl->buffers.serverDH_P.length,
-                        ssl->buffers.serverDH_G.buffer,
-                        ssl->buffers.serverDH_G.length,
-                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
-                        output + OPAQUE16_LEN, length,
+                    ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, ssl->buffers.sig.length,
                         ssl->buffers.serverDH_Pub.buffer,
                         ssl->buffers.serverDH_Pub.length,
                         ssl->arrays->preMasterSecret + OPAQUE16_LEN,
@@ -16134,13 +16972,9 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #if defined(HAVE_ECC) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                 {
-                    /* Create shared ECC key leaving room at the begining
-                       of buffer for size of shared key. */
-                    ssl->arrays->preMasterSz = ENCRYPT_LEN - OPAQUE16_LEN;
-
-                    ret = EccSharedSecret(ssl,
-                        (ecc_key*)ssl->sigKey, ssl->peerEccKey,
-                        output + OPAQUE8_LEN, length,
+                    ecc_key* key = (ecc_key*)ssl->hsKey;
+                    ret = EccSharedSecret(ssl, key, ssl->peerEccKey,
+                        args->output + OPAQUE8_LEN, &args->length,
                         ssl->arrays->preMasterSecret + OPAQUE16_LEN,
                         &ssl->arrays->preMasterSz,
                         WOLFSSL_CLIENT_END,
@@ -16167,8 +17001,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                                                   ssl->peerNtruKey,
                                                   ssl->arrays->preMasterSz,
                                                   ssl->arrays->preMasterSecret,
-                                                  (word16*)&encSz,
-                                                  encSecret);
+                                                  (word16*)&args->encSz,
+                                                  args->encSecret);
                     ntru_crypto_drbg_uninstantiate(drbg);
                     if (rc != NTRU_OK) {
                         ERROR_OUT(NTRU_ENCRYPT_ERROR, exit_scke);
@@ -16180,14 +17014,13 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #ifdef HAVE_ECC
                 case ecc_diffie_hellman_kea:
                 {
+                    ecc_key* key = (ecc_key*)ssl->hsKey;
                     ecc_key* peerKey = (ssl->specs.static_ecdh) ?
                                 ssl->peerEccDsaKey : ssl->peerEccKey;
 
-                    ssl->arrays->preMasterSz = ENCRYPT_LEN;
-
                     ret = EccSharedSecret(ssl,
-                        (ecc_key*)ssl->sigKey, peerKey,
-                        encSecret + OPAQUE8_LEN, &encSz,
+                        key, peerKey,
+                        args->encSecret + OPAQUE8_LEN, &args->encSz,
                         ssl->arrays->preMasterSecret,
                         &ssl->arrays->preMasterSz,
                         WOLFSSL_CLIENT_END,
@@ -16197,6 +17030,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         NULL
                     #endif
                     );
+
                     break;
                 }
             #endif /* HAVE_ECC */
@@ -16211,10 +17045,10 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_VERIFY;
-        } /* case KEYSHARE_DO */
+            ssl->options.asyncState = TLS_ASYNC_VERIFY;
+        } /* case TLS_ASYNC_DO */
 
-        case KEYSHARE_VERIFY:
+        case TLS_ASYNC_VERIFY:
         {
             switch(ssl->specs.kea)
             {
@@ -16239,15 +17073,15 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #if !defined(NO_DH) && !defined(NO_PSK)
                 case dhe_psk_kea:
                 {
-                    byte*  pms = ssl->arrays->preMasterSecret;
+                    byte* pms = ssl->arrays->preMasterSecret;
 
                     /* validate args */
-                    if (output == NULL || *length == 0) {
+                    if (args->output == NULL || args->length == 0) {
                         ERROR_OUT(BAD_FUNC_ARG, exit_scke);
                     }
 
-                    c16toa((word16)*length, output);
-                    encSz += *length + OPAQUE16_LEN;
+                    c16toa((word16)args->length, args->output);
+                    args->encSz += args->length + OPAQUE16_LEN;
                     c16toa((word16)ssl->arrays->preMasterSz, pms);
                     ssl->arrays->preMasterSz += OPAQUE16_LEN;
                     pms += ssl->arrays->preMasterSz;
@@ -16270,13 +17104,13 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     byte* pms = ssl->arrays->preMasterSecret;
 
                     /* validate args */
-                    if (output == NULL || *length > ENCRYPT_LEN) {
+                    if (args->output == NULL || args->length > ENCRYPT_LEN) {
                         ERROR_OUT(BAD_FUNC_ARG, exit_scke);
                     }
 
                     /* place size of public key in output buffer */
-                    *output = (byte)*length;
-                    encSz += *length + OPAQUE8_LEN;
+                    *args->output = (byte)args->length;
+                    args->encSz += args->length + OPAQUE8_LEN;
 
                     /* Create pre master secret is the concatination of
                        eccSize + eccSharedKey + pskSize + pskKey */
@@ -16305,8 +17139,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 case ecc_diffie_hellman_kea:
                 {
                     /* place size of public key in buffer */
-                    *encSecret = (byte)encSz;
-                    encSz += OPAQUE8_LEN;
+                    *args->encSecret = (byte)args->encSz;
+                    args->encSz += OPAQUE8_LEN;
                     break;
                 }
             #endif /* HAVE_ECC */
@@ -16321,10 +17155,10 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_FINALIZE;
-        } /* case KEYSHARE_VERIFY */
+            ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+        } /* case TLS_ASYNC_VERIFY */
 
-        case KEYSHARE_FINALIZE:
+        case TLS_ASYNC_FINALIZE:
         {
             word32 tlsSz = 0;
             word32 idx = 0;
@@ -16346,50 +17180,50 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 tlsSz = 0;
             }
 
-            idx    = HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
-            sendSz = encSz + tlsSz + idx;
+            idx = HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+            args->sendSz = args->encSz + tlsSz + idx;
 
         #ifdef WOLFSSL_DTLS
             if (ssl->options.dtls) {
                 idx    += DTLS_HANDSHAKE_EXTRA + DTLS_RECORD_EXTRA;
-                sendSz += DTLS_HANDSHAKE_EXTRA + DTLS_RECORD_EXTRA;
+                args->sendSz += DTLS_HANDSHAKE_EXTRA + DTLS_RECORD_EXTRA;
             }
         #endif
 
             if (IsEncryptionOn(ssl, 1)) {
-                sendSz += MAX_MSG_EXTRA;
+                args->sendSz += MAX_MSG_EXTRA;
             }
 
         #ifdef HAVE_QSH
-            encSz += qshSz;
-            sendSz += qshSz;
+            args->encSz += qshSz;
+            args->sendSz += qshSz;
         #endif
 
             /* check for available size */
-            if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+            if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                 goto exit_scke;
             }
 
             /* get output buffer */
-            output = ssl->buffers.outputBuffer.buffer +
-                     ssl->buffers.outputBuffer.length;
+            args->output = ssl->buffers.outputBuffer.buffer +
+                           ssl->buffers.outputBuffer.length;
 
         #ifdef HAVE_QSH
             if (ssl->peerQSHKeyPresent) {
                 byte idxSave = idx;
-                idx = sendSz - qshSz;
+                idx = args->sendSz - qshSz;
 
                 if (QSH_KeyExchangeWrite(ssl, 0) != 0) {
                     ERROR_OUT(MEMORY_E, exit_scke);
                 }
 
                 /* extension type */
-                c16toa(TLSX_QUANTUM_SAFE_HYBRID, output + idx);
+                c16toa(TLSX_QUANTUM_SAFE_HYBRID, args->output + idx);
                 idx += OPAQUE16_LEN;
 
                 /* write to output and check amount written */
-                if (TLSX_QSHPK_Write(ssl->QSH_secret->list, output + idx)
-                                                     > qshSz - OPAQUE16_LEN) {
+                if (TLSX_QSHPK_Write(ssl->QSH_secret->list,
+                            args->output + idx) > qshSz - OPAQUE16_LEN) {
                     ERROR_OUT(MEMORY_E, exit_scke);
                 }
 
@@ -16397,52 +17231,73 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             }
         #endif
 
-            AddHeaders(output, encSz + tlsSz, client_key_exchange, ssl);
+            AddHeaders(args->output, args->encSz + tlsSz, client_key_exchange, ssl);
 
         #ifdef HAVE_QSH
             if (ssl->peerQSHKeyPresent) {
-                encSz -= qshSz;
+                args->encSz -= qshSz;
             }
         #endif
             if (tlsSz) {
-                c16toa((word16)encSz, &output[idx]);
-                idx += 2;
+                c16toa((word16)args->encSz, &args->output[idx]);
+                idx += OPAQUE16_LEN;
             }
-            XMEMCPY(output + idx, encSecret, encSz);
-            idx += encSz;
+            XMEMCPY(args->output + idx, args->encSecret, args->encSz);
+            idx += args->encSz;
 
             if (IsEncryptionOn(ssl, 1)) {
-                byte* input;
-                int   inputSz = idx-RECORD_HEADER_SZ; /* buildmsg adds rechdr */
-
-                input = (byte*)XMALLOC(inputSz, ssl->heap,
-                                       DYNAMIC_TYPE_TMP_BUFFER);
-                if (input == NULL) {
+                args->inputSz = idx - RECORD_HEADER_SZ; /* buildmsg adds rechdr */
+                args->input = (byte*)XMALLOC(args->inputSz, ssl->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (args->input == NULL) {
                     ERROR_OUT(MEMORY_E, exit_scke);
                 }
 
-                XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
-                sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                      handshake, 1, 0);
-                XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-                if (sendSz < 0) {
-                    ERROR_OUT(sendSz, exit_scke);
+                XMEMCPY(args->input, args->output + RECORD_HEADER_SZ,
+                                                                args->inputSz);
+            }
+
+            /* Check for error */
+            if (ret != 0) {
+                goto exit_scke;
+            }
+
+            /* Advance state and proceed */
+            ssl->options.asyncState = TLS_ASYNC_END;
+        } /* case TLS_ASYNC_FINALIZE */
+
+        case TLS_ASYNC_END:
+        {
+            if (IsEncryptionOn(ssl, 1)) {
+                ret = BuildMessage(ssl, args->output, args->sendSz,
+                            args->input, args->inputSz, handshake, 1, 0, 1);
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    goto exit_scke;
+            #endif
+                XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                args->input = NULL; /* make sure its not double free'd on cleanup */
+
+                if (ret >= 0) {
+                    args->sendSz = ret;
+                    ret = 0;
                 }
             }
             else {
-                #ifdef WOLFSSL_DTLS
-                    if (ssl->options.dtls)
-                        DtlsSEQIncrement(ssl, CUR_ORDER);
-                #endif
-                ret = HashOutput(ssl, output, sendSz, 0);
-                if (ret != 0) {
-                    goto exit_scke;
-                }
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+                ret = HashOutput(ssl, args->output, args->sendSz, 0);
+            }
+
+            if (ret != 0) {
+                goto exit_scke;
             }
 
         #ifdef WOLFSSL_DTLS
             if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0) {
+                if ((ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz)) != 0) {
                     goto exit_scke;
                 }
             }
@@ -16453,21 +17308,10 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 AddPacketName("ClientKeyExchange", &ssl->handShakeInfo);
             if (ssl->toInfoOn)
                 AddPacketInfo("ClientKeyExchange", &ssl->timeoutInfo,
-                              output, sendSz, ssl->heap);
+                              args->output, args->sendSz, ssl->heap);
         #endif
 
-            /* Check for error */
-            if (ret != 0) {
-                goto exit_scke;
-            }
-
-            /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_END;
-        } /* case KEYSHARE_FINALIZE */
-
-        case KEYSHARE_END:
-        {
-            ssl->buffers.outputBuffer.length += sendSz;
+            ssl->buffers.outputBuffer.length += args->sendSz;
 
             if (!ssl->options.groupMessages) {
                 ret = SendBuffered(ssl);
@@ -16483,45 +17327,24 @@ int SendClientKeyExchange(WOLFSSL* ssl)
         }
         default:
             ret = INPUT_CASE_ERROR;
-    } /* switch(ssl->options.keyShareState) */
+    } /* switch(ssl->options.asyncState) */
 
 exit_scke:
 
     WOLFSSL_LEAVE("SendClientKeyExchange", ret);
 
-    /* Handle cleanup for stack variables here */
-
-
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* Handle WC_PENDING_E */
-    if (ret == WC_PENDING_E) {
-        /* Store variables needed for async */
-        length_lcl = ssl->async.length;
-        XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-        ssl->async.output = output;
-        ssl->async.sendSz = sendSz;
-        ssl->async.data = encSecret;
-        ssl->async.sigSz = encSz;
-        ssl->async.length = length_lcl;
-
-        /* Push event to queue */
-        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-        if (ret == 0) {
-            return WC_PENDING_E;
-        }
-    }
+    /* Handle async operation */
+    if (ret == WC_PENDING_E)
+        return ret;
 #endif
 
     /* No further need for PMS */
     ForceZero(ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
     ssl->arrays->preMasterSz = 0;
 
-    if (encSecret) {
-        XFREE(encSecret, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        encSecret = NULL;
-    }
-
     /* Final cleanup */
+    FreeSckeArgs(ssl, args);
     FreeKeyExchange(ssl);
 
     return ret;
@@ -16530,83 +17353,98 @@ exit_scke:
 
 #ifndef NO_CERTS
 
+typedef struct ScvArgs {
+    byte*  output; /* not allocated */
+#ifndef NO_RSA
+    byte*  verifySig;
+#endif
+    byte*  verify; /* not allocated */
+    byte*  input;
+    word32 idx;
+    word32 extraSz;
+    word32 sigSz;
+    int    sendSz;
+    int    length;
+    int    inputSz;
+} ScvArgs;
+
+static void FreeScvArgs(WOLFSSL* ssl, void* pArgs)
+{
+    ScvArgs* args = (ScvArgs*)pArgs;
+
+    (void)ssl;
+
+#ifndef NO_RSA
+    if (args->verifySig) {
+        XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->verifySig = NULL;
+    }
+#endif
+    if (args->input) {
+        XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->input = NULL;
+    }
+}
+
 int SendCertificateVerify(WOLFSSL* ssl)
 {
-    byte*  output = NULL;
-    int    sendSz = 0, length = 0, ret;
-    byte*  verify = NULL;
-    word32 idx = 0;
-    word32 extraSz = 0;
-#ifndef NO_RSA
-    byte*  verifySig = NULL;
+    int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ScvArgs* args = (ScvArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    ScvArgs  args[1];
 #endif
 
     WOLFSSL_ENTER("SendCertificateVerify");
 
 #ifdef WOLFSSL_ASYNC_CRYPT
-    ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
     if (ret != WC_NOT_PENDING_E) {
-        WOLF_EVENT_TYPE eType = ssl->event.type;
-
-        /* Clear event */
-        XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
         /* Check for error */
-        if (ret < 0) {
+        if (ret < 0)
             goto exit_scv;
-        }
-        else  {
-            /* Restore variables needed for async */
-            output = ssl->async.output;
-            sendSz = ssl->async.sendSz;
-            extraSz = ssl->async.sigSz;
-            length = ssl->async.length;
-            idx = ssl->async.idx;
-        #ifndef NO_RSA
-            verifySig = ssl->async.data;
-        #endif
-
-            /* Advance key share state if not wolfCrypt */
-            if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                ssl->options.keyShareState++;
-            }
-        }
     }
     else
 #endif
     {
         /* Reset state */
         ret = 0;
-        ssl->options.keyShareState = KEYSHARE_BEGIN;
+        ssl->options.asyncState = TLS_ASYNC_BEGIN;
+        XMEMSET(args, 0, sizeof(ScvArgs));
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeScvArgs;
+    #endif
     }
 
-    switch(ssl->options.keyShareState)
+    switch(ssl->options.asyncState)
     {
-        case KEYSHARE_BEGIN:
+        case TLS_ASYNC_BEGIN:
         {
             if (ssl->options.sendVerify == SEND_BLANK_CERT) {
                 return 0;  /* sent blank cert, can't verify */
             }
 
-            sendSz = MAX_CERT_VERIFY_SZ;
+            args->sendSz = MAX_CERT_VERIFY_SZ;
             if (IsEncryptionOn(ssl, 1)) {
-                sendSz += MAX_MSG_EXTRA;
+                args->sendSz += MAX_MSG_EXTRA;
             }
 
             /* check for available size */
-            if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+            if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                 goto exit_scv;
             }
 
             /* get output buffer */
-            output = ssl->buffers.outputBuffer.buffer +
-                     ssl->buffers.outputBuffer.length;
+            args->output = ssl->buffers.outputBuffer.buffer +
+                           ssl->buffers.outputBuffer.length;
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_BUILD;
-        } /* case KEYSHARE_BEGIN */
+            ssl->options.asyncState = TLS_ASYNC_BUILD;
+        } /* case TLS_ASYNC_BEGIN */
 
-        case KEYSHARE_BUILD:
+        case TLS_ASYNC_BUILD:
         {
             int keySz;
             int typeH = 0;
@@ -16616,30 +17454,30 @@ int SendCertificateVerify(WOLFSSL* ssl)
                 goto exit_scv;
             }
 
-        #ifndef NO_RSA
-            ssl->sigKey = (RsaKey*)XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                            DYNAMIC_TYPE_RSA);
-            if (ssl->sigKey == NULL) {
-                ERROR_OUT(MEMORY_E, exit_scv);
+            /* make sure private key exists */
+            if (ssl->buffers.key == NULL || ssl->buffers.key->buffer == NULL) {
+                WOLFSSL_MSG("Private key missing!");
+                ERROR_OUT(NO_PRIVATE_KEY, exit_scv);
             }
-            ssl->sigType = DYNAMIC_TYPE_RSA;
 
-            ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey, ssl->heap, ssl->devId);
+        #ifndef NO_RSA
+            ssl->hsType = DYNAMIC_TYPE_RSA;
+            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
             if (ret != 0) {
                 goto exit_scv;
             }
 
             WOLFSSL_MSG("Trying RSA client cert");
 
-            ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                        (RsaKey*)ssl->sigKey, ssl->buffers.key->length);
+            ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &args->idx,
+                                (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
             if (ret == 0) {
-                keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                 if (keySz < 0) { /* check if keySz has error case */
                     ERROR_OUT(keySz, exit_scv);
                 }
 
-                length = (word32)keySz;
+                args->length = (word32)keySz;
                 if (keySz < ssl->options.minRsaKeySz) {
                     WOLFSSL_MSG("RSA key size too small");
                     ERROR_OUT(RSA_KEY_SIZE_E, exit_scv);
@@ -16649,41 +17487,31 @@ int SendCertificateVerify(WOLFSSL* ssl)
         #endif /* !NO_RSA */
             {
         #ifdef HAVE_ECC
-                if (ssl->sigKey) {
-                    XFREE(ssl->sigKey, ssl->heap, DYNAMIC_TYPE_RSA);
-                }
-                ssl->sigKey = (ecc_key*)XMALLOC(sizeof(ecc_key), ssl->heap,
-                                                            DYNAMIC_TYPE_ECC);
-                if (ssl->sigKey == NULL) {
-                    ERROR_OUT(MEMORY_E, exit_scv);
-                }
-                ssl->sigType = DYNAMIC_TYPE_ECC;
+            #ifndef NO_RSA
+                FreeKey(ssl, ssl->hsType, (void**)&ssl->hsKey);
+            #endif /* !NO_RSA */
 
-                ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap, ssl->devId);
+                ssl->hsType = DYNAMIC_TYPE_ECC;
+                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                 if (ret != 0) {
                     goto exit_scv;
                 }
 
                 WOLFSSL_MSG("Trying ECC client cert, RSA didn't work");
 
-                if (ssl->buffers.key == NULL) {
-                    WOLFSSL_MSG("ECC Key missing");
-                    ERROR_OUT(NO_PRIVATE_KEY, exit_scv);
-                }
-
-                idx = 0;
-                ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                            (ecc_key*)ssl->sigKey, ssl->buffers.key->length);
+                args->idx = 0;
+                ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer,
+                    &args->idx, (ecc_key*)ssl->hsKey, ssl->buffers.key->length);
                 if (ret != 0) {
                     WOLFSSL_MSG("Bad client cert type");
                     goto exit_scv;
                 }
 
                 WOLFSSL_MSG("Using ECC client cert");
-                length = MAX_ENCODED_SIG_SZ;
+                args->length = MAX_ENCODED_SIG_SZ;
 
                 /* check minimum size of ECC key */
-                keySz = wc_ecc_size((ecc_key*)ssl->sigKey);
+                keySz = wc_ecc_size((ecc_key*)ssl->hsKey);
                 if (keySz < ssl->options.minEccKeySz) {
                     WOLFSSL_MSG("ECC key size too small");
                     ERROR_OUT(ECC_KEY_SIZE_E, exit_scv);
@@ -16691,11 +17519,10 @@ int SendCertificateVerify(WOLFSSL* ssl)
         #endif
             }
 
-
             /* idx is used to track verify pointer offset to output */
-            idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-            verify = &output[RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ];
-            extraSz = 0;  /* tls 1.2 hash/sig */
+            args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+            args->verify = &args->output[RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ];
+            args->extraSz = 0;  /* tls 1.2 hash/sig */
 
             /* build encoded signature buffer */
             ssl->buffers.sig.length = MAX_ENCODED_SIG_SZ;
@@ -16707,8 +17534,8 @@ int SendCertificateVerify(WOLFSSL* ssl)
 
         #ifdef WOLFSSL_DTLS
             if (ssl->options.dtls) {
-                idx += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                verify += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                args->idx += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                args->verify += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
             }
         #endif
 
@@ -16729,37 +17556,41 @@ int SendCertificateVerify(WOLFSSL* ssl)
     #endif /* !NO_OLD_TLS */
 
             if (IsAtLeastTLSv1_2(ssl)) {
-                verify[0] = ssl->suites->hashAlgo;
-                verify[1] = (ssl->sigType == DYNAMIC_TYPE_ECC) ?
+                args->verify[0] = ssl->suites->hashAlgo;
+                args->verify[1] = (ssl->hsType == DYNAMIC_TYPE_ECC) ?
                                                 ecc_dsa_sa_algo : rsa_sa_algo;
-                extraSz = HASH_SIG_SIZE;
+                args->extraSz = HASH_SIG_SIZE;
 
                 switch (ssl->suites->hashAlgo) {
                 #ifndef NO_SHA
                     case sha_mac:
                         ssl->buffers.digest.length = SHA_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha;
                         typeH    = SHAh;
                         break;
                 #endif /* NO_SHA */
                 #ifndef NO_SHA256
                     case sha256_mac:
                         ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha256;
                         typeH    = SHA256h;
                         break;
                 #endif /* !NO_SHA256 */
                 #ifdef WOLFSSL_SHA384
                     case sha384_mac:
                         ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha384;
                         typeH    = SHA384h;
                         break;
                 #endif /* WOLFSSL_SHA384 */
                 #ifdef WOLFSSL_SHA512
                     case sha512_mac:
                         ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha512;
                         typeH    = SHA512h;
                         break;
                 #endif /* WOLFSSL_SHA512 */
@@ -16778,9 +17609,9 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
 
         #ifndef NO_RSA
-            if (ssl->sigType == DYNAMIC_TYPE_RSA) {
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
                 ssl->buffers.sig.length = FINISHED_SZ;
-                ssl->sigLen = ENCRYPT_LEN;
+                args->sigSz = ENCRYPT_LEN;
 
                 if (IsAtLeastTLSv1_2(ssl)) {
                     ssl->buffers.sig.length = wc_EncodeSignature(
@@ -16788,22 +17619,25 @@ int SendCertificateVerify(WOLFSSL* ssl)
                             ssl->buffers.digest.length, typeH);
                 }
 
-                c16toa((word16)length, verify + extraSz); /* prepend hdr */
+                /* prepend hdr */
+                c16toa((word16)args->length, args->verify + args->extraSz);
             }
         #endif /* !NO_RSA */
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_DO;
-        } /* case KEYSHARE_BUILD */
+            ssl->options.asyncState = TLS_ASYNC_DO;
+        } /* case TLS_ASYNC_BUILD */
 
-        case KEYSHARE_DO:
+        case TLS_ASYNC_DO:
         {
         #ifdef HAVE_ECC
-           if (ssl->sigType == DYNAMIC_TYPE_ECC) {
+           if (ssl->hsType == DYNAMIC_TYPE_ECC) {
+                ecc_key* key = (ecc_key*)ssl->hsKey;
+
                 ret = EccSign(ssl,
                     ssl->buffers.digest.buffer, ssl->buffers.digest.length,
                     ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
-                    (ecc_key*)ssl->sigKey,
+                    key,
             #if defined(HAVE_PK_CALLBACKS)
                     ssl->buffers.key->buffer,
                     ssl->buffers.key->length,
@@ -16815,14 +17649,16 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
         #endif /* HAVE_ECC */
         #ifndef NO_RSA
-            if (ssl->sigType == DYNAMIC_TYPE_RSA) {
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
+                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                 /* restore verify pointer */
-                verify = &output[idx];
+                args->verify = &args->output[args->idx];
 
                 ret = RsaSign(ssl,
                     ssl->buffers.sig.buffer, ssl->buffers.sig.length,
-                    verify + extraSz + VERIFY_HEADER, &ssl->sigLen,
-                    (RsaKey*)ssl->sigKey,
+                    args->verify + args->extraSz + VERIFY_HEADER, &args->sigSz,
+                    key,
                     ssl->buffers.key->buffer,
                     ssl->buffers.key->length,
                 #ifdef HAVE_PK_CALLBACKS
@@ -16840,39 +17676,44 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_VERIFY;
-        } /* case KEYSHARE_DO */
+            ssl->options.asyncState = TLS_ASYNC_VERIFY;
+        } /* case TLS_ASYNC_DO */
 
-        case KEYSHARE_VERIFY:
+        case TLS_ASYNC_VERIFY:
         {
             /* restore verify pointer */
-            verify = &output[idx];
+            args->verify = &args->output[args->idx];
 
         #ifdef HAVE_ECC
-            if (ssl->sigType == DYNAMIC_TYPE_ECC) {
-                length = ssl->buffers.sig.length;
-                c16toa((word16)ssl->buffers.sig.length, verify + extraSz); /* prepend hdr */
-                XMEMCPY(verify + extraSz + VERIFY_HEADER,
+            if (ssl->hsType == DYNAMIC_TYPE_ECC) {
+                args->length = ssl->buffers.sig.length;
+                /* prepend hdr */
+                c16toa((word16)ssl->buffers.sig.length, args->verify +
+                                                                args->extraSz);
+                XMEMCPY(args->verify + args->extraSz + VERIFY_HEADER,
                         ssl->buffers.sig.buffer, ssl->buffers.sig.length);
             }
         #endif /* HAVE_ECC */
         #ifndef NO_RSA
-            if (ssl->sigType == DYNAMIC_TYPE_RSA) {
-                if (verifySig == NULL) {
-                    verifySig = (byte*)XMALLOC(ssl->sigLen, ssl->heap,
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
+                RsaKey* key = (RsaKey*)ssl->hsKey;
+
+                if (args->verifySig == NULL) {
+                    args->verifySig = (byte*)XMALLOC(args->sigSz, ssl->heap,
                                       DYNAMIC_TYPE_TMP_BUFFER);
-                    if (verifySig == NULL) {
+                    if (args->verifySig == NULL) {
                         ERROR_OUT(MEMORY_E, exit_scv);
                     }
-                    XMEMCPY(verifySig, verify + extraSz + VERIFY_HEADER,
-                                                                ssl->sigLen);
+                    XMEMCPY(args->verifySig, args->verify + args->extraSz +
+                                                    VERIFY_HEADER, args->sigSz);
                 }
 
                 /* check for signature faults */
                 ret = VerifyRsaSign(ssl,
-                    verifySig, ssl->sigLen,
+                    args->verifySig, args->sigSz,
                     ssl->buffers.sig.buffer, ssl->buffers.sig.length,
-                    (RsaKey*)ssl->sigKey);
+                    key
+                );
             }
         #endif /* !NO_RSA */
 
@@ -16882,56 +17723,38 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_FINALIZE;
-        } /* case KEYSHARE_VERIFY */
+            ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+        } /* case TLS_ASYNC_VERIFY */
 
-        case KEYSHARE_FINALIZE:
+        case TLS_ASYNC_FINALIZE:
         {
-            AddHeaders(output, length + extraSz + VERIFY_HEADER,
-                                                   certificate_verify, ssl);
+            if (args->output == NULL) {
+                ERROR_OUT(BUFFER_ERROR, exit_scv);
+            }
+            AddHeaders(args->output, args->length + args->extraSz +
+                                        VERIFY_HEADER, certificate_verify, ssl);
 
-            sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ + length +
-                                                     extraSz + VERIFY_HEADER;
+            args->sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ +
+                                    args->length + args->extraSz + VERIFY_HEADER;
 
         #ifdef WOLFSSL_DTLS
             if (ssl->options.dtls) {
-                sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
             }
         #endif
 
             if (IsEncryptionOn(ssl, 1)) {
-                byte* input;
-                int   inputSz = sendSz - RECORD_HEADER_SZ;
+                args->inputSz = args->sendSz - RECORD_HEADER_SZ;
                                 /* build msg adds rec hdr */
-                input = (byte*)XMALLOC(inputSz, ssl->heap,
-                                       DYNAMIC_TYPE_TMP_BUFFER);
-                if (input == NULL) {
+                args->input = (byte*)XMALLOC(args->inputSz, ssl->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (args->input == NULL) {
                     ERROR_OUT(MEMORY_E, exit_scv);
                 }
 
-                XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
-                sendSz = BuildMessage(ssl, output,
-                                      MAX_CERT_VERIFY_SZ +MAX_MSG_EXTRA,
-                                      input, inputSz, handshake, 1, 0);
-                XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-
-                if (sendSz < 0) {
-                    ret = sendSz;
-                }
+                XMEMCPY(args->input, args->output + RECORD_HEADER_SZ,
+                                                                args->inputSz);
             }
-            else {
-                #ifdef WOLFSSL_DTLS
-                    if (ssl->options.dtls)
-                        DtlsSEQIncrement(ssl, CUR_ORDER);
-                #endif
-                ret = HashOutput(ssl, output, sendSz, 0);
-            }
-
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                ret = DtlsMsgPoolSave(ssl, output, sendSz);
-            }
-        #endif
 
             /* Check for error */
             if (ret != 0) {
@@ -16939,20 +17762,57 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
 
             /* Advance state and proceed */
-            ssl->options.keyShareState = KEYSHARE_END;
-        } /* case KEYSHARE_FINALIZE */
+            ssl->options.asyncState = TLS_ASYNC_END;
+        } /* case TLS_ASYNC_FINALIZE */
 
-        case KEYSHARE_END:
+        case TLS_ASYNC_END:
         {
+            if (IsEncryptionOn(ssl, 1)) {
+                ret = BuildMessage(ssl, args->output,
+                                      MAX_CERT_VERIFY_SZ + MAX_MSG_EXTRA,
+                                      args->input, args->inputSz, handshake,
+                                      1, 0, 1);
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    goto exit_scv;
+            #endif
+
+                XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                args->input = NULL;  /* make sure its not double free'd on cleanup */
+
+                if (ret >= 0) {
+                    args->sendSz = ret;
+                    ret = 0;
+                }
+            }
+            else {
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+                ret = HashOutput(ssl, args->output, args->sendSz, 0);
+            }
+
+            if (ret != 0) {
+                goto exit_scv;
+            }
+
+        #ifdef WOLFSSL_DTLS
+            if (IsDtlsNotSctpMode(ssl)) {
+                ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz);
+            }
+        #endif
+
+
         #ifdef WOLFSSL_CALLBACKS
             if (ssl->hsInfoOn)
                 AddPacketName("CertificateVerify", &ssl->handShakeInfo);
             if (ssl->toInfoOn)
                 AddPacketInfo("CertificateVerify", &ssl->timeoutInfo,
-                              output, sendSz, ssl->heap);
+                              args->output, args->sendSz, ssl->heap);
         #endif
 
-            ssl->buffers.outputBuffer.length += sendSz;
+            ssl->buffers.outputBuffer.length += args->sendSz;
 
             if (!ssl->options.groupMessages) {
                 ret = SendBuffered(ssl);
@@ -16961,49 +17821,25 @@ int SendCertificateVerify(WOLFSSL* ssl)
         }
         default:
             ret = INPUT_CASE_ERROR;
-    } /* switch(ssl->options.keyShareState) */
+    } /* switch(ssl->options.asyncState) */
 
 exit_scv:
 
     WOLFSSL_LEAVE("SendCertificateVerify", ret);
 
-    /* Handle cleanup for stack variables here */
-
-
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* Handle WC_PENDING_E */
+    /* Handle async operation */
     if (ret == WC_PENDING_E) {
-        /* Store variables needed for async */
-        XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-        ssl->async.output = output;
-        ssl->async.sendSz = sendSz;
-        ssl->async.sigSz = extraSz;
-        ssl->async.length = length;
-        ssl->async.idx = idx;
-    #ifndef NO_RSA
-        ssl->async.data = verifySig;
-    #endif
-
-        /* Push event to queue */
-        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-        if (ret == 0) {
-            return WC_PENDING_E;
-        }
+        return ret;
     }
-#endif
-
-#ifndef NO_RSA
-    if (verifySig) {
-        XFREE(verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        verifySig = NULL;
-    }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
     /* Digest is not allocated, so do this to prevent free */
     ssl->buffers.digest.buffer = NULL;
     ssl->buffers.digest.length = 0;
 
     /* Final cleanup */
+    FreeScvArgs(ssl, args);
     FreeKeyExchange(ssl);
 
     return ret;
@@ -17109,6 +17945,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         int                sendSz;
         int                ret;
         byte               sessIdSz = ID_LEN;
+        byte               echoId   = 0;  /* ticket echo id flag */
+        byte               cacheOff = 0;  /* session cache off flag */
 
         length = VERSION_SZ + RAN_LEN
                + ID_LEN + ENUM_LEN
@@ -17126,6 +17964,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 return BUFFER_ERROR;
             }
             length -= (ID_LEN - sessIdSz);  /* adjust ID_LEN assumption */
+            echoId = 1;
         }
     #endif /* HAVE_SESSION_TICKET */
 #else
@@ -17134,6 +17973,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         }
 #endif
 
+        /* is the session cahce off at build or runtime */
+#ifdef NO_SESSION_CACHE
+        cacheOff = 1;
+#else
+        if (ssl->options.sessionCacheOff == 1) {
+            cacheOff = 1;
+        }
+#endif
+
+        /* if no session cache don't send a session ID unless we're echoing
+         * an ID as part of session tickets */
+        if (echoId == 0 && cacheOff == 1) {
+            length -= ID_LEN;    /* adjust ID_LEN assumption */
+            sessIdSz = 0;
+        }
+
         /* check for avalaible size */
         if ((ret = CheckAvailableSize(ssl, MAX_HELLO_SZ)) != 0)
             return ret;
@@ -17343,80 +18198,102 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
 #endif /* HAVE_ECC */
 
+    typedef struct SskeArgs {
+        byte*  output; /* not allocated */
+    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
+        byte*  sigDataBuf;
+    #endif
+    #if defined(HAVE_ECC)
+        byte*  exportBuf;
+    #endif
+    #ifndef NO_RSA
+        byte*  verifySig;
+    #endif
+        word32 idx;
+        word32 tmpSigSz;
+        word32 length;
+        word32 sigSz;
+    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
+        word32 sigDataSz;
+    #endif
+    #if defined(HAVE_ECC)
+        word32 exportSz;
+    #endif
+    #ifdef HAVE_QSH
+        word32 qshSz;
+    #endif
+        int    sendSz;
+    } SskeArgs;
+
+    static void FreeSskeArgs(WOLFSSL* ssl, void* pArgs)
+    {
+        SskeArgs* args = (SskeArgs*)pArgs;
+
+        (void)ssl;
+
+    #if defined(HAVE_ECC)
+        if (args->exportBuf) {
+            XFREE(args->exportBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            args->exportBuf = NULL;
+        }
+    #endif
+    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
+        if (args->sigDataBuf) {
+            XFREE(args->sigDataBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            args->sigDataBuf = NULL;
+        }
+    #endif
+    #ifndef NO_RSA
+        if (args->verifySig) {
+            XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            args->verifySig = NULL;
+        }
+    #endif
+        (void)args;
+    }
 
     int SendServerKeyExchange(WOLFSSL* ssl)
     {
         int ret;
-        int sendSz = 0;
-        byte *output = NULL;
-        word32 idx = 0, sigSz = 0, length = 0;
-    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
-        byte *sigDataBuf = NULL;
-        word32 sigDataSz = 0;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        SskeArgs* args = (SskeArgs*)ssl->async.args;
+        typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+        (void)sizeof(args_test);
+    #else
+        SskeArgs  args[1];
     #endif
-    #if defined(HAVE_ECC)
-        byte *exportBuf = NULL;
-        word32 exportSz = 0;
-    #endif
-
-    #ifdef HAVE_QSH
-        word32 qshSz = 0;
-        if (ssl->peerQSHKeyPresent) {
-            qshSz = QSH_KeyGetSize(ssl);
-        }
-    #endif
-    #ifndef NO_RSA
-        byte* verifySig = NULL;
-    #endif
-
-        (void)ssl;
-        (void)sigSz;
-        (void)length;
-        (void)idx;
 
         WOLFSSL_ENTER("SendServerKeyExchange");
 
     #ifdef WOLFSSL_ASYNC_CRYPT
-        ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
         if (ret != WC_NOT_PENDING_E) {
-            WOLF_EVENT_TYPE eType = ssl->event.type;
-
-            /* Clear event */
-            XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
             /* Check for error */
-            if (ret < 0) {
+            if (ret < 0)
                 goto exit_sske;
-            }
-            else  {
-                /* Restore variables needed for async */
-                output = ssl->async.output;
-                sendSz = ssl->async.sendSz;
-                idx = ssl->async.idx;
-                sigSz = ssl->async.sigSz;
-                length = ssl->async.length;
-            #ifndef NO_RSA
-                verifySig = ssl->async.data;
-            #endif
-
-                /* Advance key share state if not wolfCrypt */
-                if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                    ssl->options.keyShareState++;
-                }
-            }
         }
         else
     #endif
         {
             /* Reset state */
             ret = 0;
-            ssl->options.keyShareState = KEYSHARE_BEGIN;
+            ssl->options.asyncState = TLS_ASYNC_BEGIN;
+            XMEMSET(args, 0, sizeof(SskeArgs));
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ssl->async.freeArgs = FreeSskeArgs;
+        #endif
         }
 
-        switch(ssl->options.keyShareState)
+        switch(ssl->options.asyncState)
         {
-            case KEYSHARE_BEGIN:
+            case TLS_ASYNC_BEGIN:
             {
+            #ifdef HAVE_QSH
+                if (ssl->peerQSHKeyPresent) {
+                    args->qshSz = QSH_KeyGetSize(ssl);
+                }
+            #endif
+
                 /* Do some checks / debug msgs */
                 switch(ssl->specs.kea)
                 {
@@ -17435,7 +18312,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             ERROR_OUT(0, exit_sske);
                         }
 
-                        if (!ssl->buffers.key->buffer) {
+                        /* make sure private key exists */
+                        if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
                             ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
                         }
 
@@ -17472,8 +18351,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->buffers.serverDH_Pub.buffer == NULL) {
                             /* Free'd in SSL_ResourceFree and FreeHandshakeResources */
                             ssl->buffers.serverDH_Pub.buffer = (byte*)XMALLOC(
-                                    ssl->buffers.serverDH_P.length + 2, ssl->heap,
-                                    DYNAMIC_TYPE_DH);
+                                    ssl->buffers.serverDH_P.length + OPAQUE16_LEN,
+                                    ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                             if (ssl->buffers.serverDH_Pub.buffer == NULL) {
                                 ERROR_OUT(MEMORY_E, exit_sske);
                             }
@@ -17482,8 +18361,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->buffers.serverDH_Priv.buffer == NULL) {
                             /* Free'd in SSL_ResourceFree and FreeHandshakeResources */
                             ssl->buffers.serverDH_Priv.buffer = (byte*)XMALLOC(
-                                    ssl->buffers.serverDH_P.length + 2, ssl->heap,
-                                    DYNAMIC_TYPE_DH);
+                                    ssl->buffers.serverDH_P.length + OPAQUE16_LEN,
+                                    ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                             if (ssl->buffers.serverDH_Priv.buffer == NULL) {
                                 ERROR_OUT(MEMORY_E, exit_sske);
                             }
@@ -17492,18 +18371,29 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         ssl->options.dhKeySz =
                                 (word16)ssl->buffers.serverDH_P.length;
 
-                        ret = DhGenKeyPair(ssl,
+                        ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                        if (ret != 0) {
+                            goto exit_sske;
+                        }
+
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_P.buffer,
                             ssl->buffers.serverDH_P.length,
                             ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length,
+                            ssl->buffers.serverDH_G.length);
+                        if (ret != 0) {
+                            goto exit_sske;
+                        }
+
+                        ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
                             &ssl->buffers.serverDH_Priv.length,
                             ssl->buffers.serverDH_Pub.buffer,
                             &ssl->buffers.serverDH_Pub.length);
                         break;
                     }
-                #endif /* !defined(NO_DH) && (!defined(NO_PSK) || !defined(NO_RSA)) */
+                #endif /* !NO_DH && (!NO_PSK || !NO_RSA) */
                 #if defined(HAVE_ECC) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                         /* Fall through to create temp ECC key */
@@ -17514,19 +18404,16 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         /* need ephemeral key now, create it if missing */
                         if (ssl->eccTempKey == NULL) {
                             /* alloc/init on demand */
-                            ssl->eccTempKey = (ecc_key*)XMALLOC(sizeof(ecc_key),
-                                                         ssl->heap, DYNAMIC_TYPE_ECC);
-                            if (ssl->eccTempKey == NULL) {
-                                WOLFSSL_MSG("EccTempKey Memory error");
-                                ERROR_OUT(MEMORY_E, exit_sske);
-                            }
-                            ret = wc_ecc_init_ex(ssl->eccTempKey, ssl->heap, ssl->devId);
-                            if (ret != 0)
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                (void**)&ssl->eccTempKey);
+                            if (ret != 0) {
                                 goto exit_sske;
+                            }
                         }
 
                         if (ssl->eccTempKeyPresent == 0) {
-                            /* TODO: Need to first do wc_EccPrivateKeyDecode, then we know curve dp */
+                            /* TODO: Need to first do wc_EccPrivateKeyDecode,
+                                then we know curve dp */
                             ret = EccMakeKey(ssl, ssl->eccTempKey, NULL);
                             if (ret == 0 || ret == WC_PENDING_E) {
                                 ssl->eccTempKeyPresent = 1;
@@ -17546,10 +18433,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_BUILD;
-            } /* case KEYSHARE_BEGIN */
+                ssl->options.asyncState = TLS_ASYNC_BUILD;
+            } /* case TLS_ASYNC_BEGIN */
 
-            case KEYSHARE_BUILD:
+            case TLS_ASYNC_BUILD:
             {
             #if (!defined(NO_DH) && !defined(NO_RSA)) || defined(HAVE_ECC)
                 word32 preSigSz, preSigIdx;
@@ -17560,52 +18447,58 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_PSK
                     case psk_kea:
                     {
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
 
                         if (ssl->arrays->server_hint[0] == 0) {
                             ERROR_OUT(0, exit_sske); /* don't send */
                         }
 
                         /* include size part */
-                        length = (word32)XSTRLEN(ssl->arrays->server_hint);
-                        if (length > MAX_PSK_ID_LEN) {
+                        args->length = (word32)XSTRLEN(ssl->arrays->server_hint);
+                        if (args->length > MAX_PSK_ID_LEN) {
                             ERROR_OUT(SERVER_HINT_ERROR, exit_sske);
                         }
 
-                        length += HINT_LEN_SZ;
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->length += HINT_LEN_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ +
+                                                            RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
 
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
 
                         /* key data */
                     #ifdef HAVE_QSH
-                        c16toa((word16)(length - qshSz - HINT_LEN_SZ), output + idx);
+                        c16toa((word16)(args->length - args->qshSz -
+                                        HINT_LEN_SZ), args->output + args->idx);
                     #else
-                        c16toa((word16)(length - HINT_LEN_SZ), output + idx);
+                        c16toa((word16)(args->length - HINT_LEN_SZ),
+                                                      args->output + args->idx);
                     #endif
 
-                        idx += HINT_LEN_SZ;
-                        XMEMCPY(output + idx, ssl->arrays->server_hint, length - HINT_LEN_SZ);
+                        args->idx += HINT_LEN_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->arrays->server_hint,
+                                args->length - HINT_LEN_SZ);
                         break;
                     }
                 #endif /* !NO_PSK */
@@ -17614,8 +18507,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         word32 hintLen;
 
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = LENGTH_SZ * 3 + /* p, g, pub */
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = LENGTH_SZ * 3 + /* p, g, pub */
                                  ssl->buffers.serverDH_P.length +
                                  ssl->buffers.serverDH_G.length +
                                  ssl->buffers.serverDH_Pub.length;
@@ -17625,58 +18518,67 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (hintLen > MAX_PSK_ID_LEN) {
                             ERROR_OUT(SERVER_HINT_ERROR, exit_sske);
                         }
-                        length += hintLen + HINT_LEN_SZ;
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->length += hintLen + HINT_LEN_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ +
+                                                            RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
 
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
 
                         /* key data */
-                        c16toa((word16)hintLen, output + idx);
-                        idx += HINT_LEN_SZ;
-                        XMEMCPY(output + idx, ssl->arrays->server_hint, hintLen);
-                        idx += hintLen;
+                        c16toa((word16)hintLen, args->output + args->idx);
+                        args->idx += HINT_LEN_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                            ssl->arrays->server_hint, hintLen);
+                        args->idx += hintLen;
 
                         /* add p, g, pub */
-                        c16toa((word16)ssl->buffers.serverDH_P.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_P.buffer,
-                                              ssl->buffers.serverDH_P.length);
-                        idx += ssl->buffers.serverDH_P.length;
+                        c16toa((word16)ssl->buffers.serverDH_P.length,
+                            args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->buffers.serverDH_P.buffer,
+                                ssl->buffers.serverDH_P.length);
+                        args->idx += ssl->buffers.serverDH_P.length;
 
                         /*  g */
-                        c16toa((word16)ssl->buffers.serverDH_G.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_G.buffer,
-                                              ssl->buffers.serverDH_G.length);
-                        idx += ssl->buffers.serverDH_G.length;
+                        c16toa((word16)ssl->buffers.serverDH_G.length,
+                            args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->buffers.serverDH_G.buffer,
+                                ssl->buffers.serverDH_G.length);
+                        args->idx += ssl->buffers.serverDH_G.length;
 
                         /*  pub */
-                        c16toa((word16)ssl->buffers.serverDH_Pub.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_Pub.buffer,
-                                              ssl->buffers.serverDH_Pub.length);
+                        c16toa((word16)ssl->buffers.serverDH_Pub.length,
+                            args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->buffers.serverDH_Pub.buffer,
+                                ssl->buffers.serverDH_Pub.length);
                         /* No need to update idx, since sizes are already set */
-                        /* idx += ssl->buffers.serverDH_Pub.length; */
+                        /* args->idx += ssl->buffers.serverDH_Pub.length; */
                         break;
                     }
                 #endif /* !defined(NO_DH) && !defined(NO_PSK) */
@@ -17686,59 +18588,62 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word32 hintLen;
 
                         /* curve type, named curve, length(1) */
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
 
-                        exportSz = MAX_EXPORT_ECC_SZ;
-                        exportBuf = (byte*)XMALLOC(exportSz, ssl->heap,
-                                                      DYNAMIC_TYPE_TMP_BUFFER);
-                        if (exportBuf == NULL) {
+                        args->exportSz = MAX_EXPORT_ECC_SZ;
+                        args->exportBuf = (byte*)XMALLOC(args->exportSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->exportBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        if (wc_ecc_export_x963(ssl->eccTempKey, exportBuf, &exportSz) != 0) {
+                        if (wc_ecc_export_x963(ssl->eccTempKey, args->exportBuf,
+                                                      &args->exportSz) != 0) {
                             ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
                         }
-                        length += exportSz;
+                        args->length += args->exportSz;
 
                         /* include size part */
                         hintLen = (word32)XSTRLEN(ssl->arrays->server_hint);
                         if (hintLen > MAX_PSK_ID_LEN) {
                             ERROR_OUT(SERVER_HINT_ERROR, exit_sske);
                         }
-                        length += hintLen + HINT_LEN_SZ;
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->length += hintLen + HINT_LEN_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get output buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
                         /* key data */
-                        c16toa((word16)hintLen, output + idx);
-                        idx += HINT_LEN_SZ;
-                        XMEMCPY(output + idx, ssl->arrays->server_hint, hintLen);
-                        idx += hintLen;
+                        c16toa((word16)hintLen, args->output + args->idx);
+                        args->idx += HINT_LEN_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                            ssl->arrays->server_hint, hintLen);
+                        args->idx += hintLen;
 
                         /* ECC key exchange data */
-                        output[idx++] = named_curve;
-                        output[idx++] = 0x00;          /* leading zero */
-                        output[idx++] = SetCurveId(ssl->eccTempKey);
-                        output[idx++] = (byte)exportSz;
-                        XMEMCPY(output + idx, exportBuf, exportSz);
+                        args->output[args->idx++] = named_curve;
+                        args->output[args->idx++] = 0x00;          /* leading zero */
+                        args->output[args->idx++] = SetCurveId(ssl->eccTempKey);
+                        args->output[args->idx++] = (byte)args->exportSz;
+                        XMEMCPY(args->output + args->idx, args->exportBuf,
+                                                                args->exportSz);
                         break;
                     }
                 #endif /* HAVE_ECC && !NO_PSK */
@@ -17748,23 +18653,24 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         enum wc_HashType hashType = WC_HASH_TYPE_NONE;
 
                         /* curve type, named curve, length(1) */
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
 
                         /* Export temp ECC key and add to length */
-                        exportSz = MAX_EXPORT_ECC_SZ;
-                        exportBuf = (byte*)XMALLOC(exportSz, ssl->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                        if (exportBuf == NULL) {
+                        args->exportSz = MAX_EXPORT_ECC_SZ;
+                        args->exportBuf = (byte*)XMALLOC(args->exportSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->exportBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        if (wc_ecc_export_x963(ssl->eccTempKey, exportBuf, &exportSz) != 0) {
+                        if (wc_ecc_export_x963(ssl->eccTempKey, args->exportBuf,
+                                                        &args->exportSz) != 0) {
                             ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
                         }
-                        length += exportSz;
+                        args->length += args->exportSz;
 
-                        preSigSz  = length;
-                        preSigIdx = idx;
+                        preSigSz  = args->length;
+                        preSigIdx = args->idx;
 
                         switch(ssl->specs.sig_algo)
                         {
@@ -17774,31 +18680,26 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 word32 i = 0;
                                 int    keySz;
 
-                                ssl->sigKey = XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                              DYNAMIC_TYPE_RSA);
-                                if (ssl->sigKey == NULL) {
-                                    ERROR_OUT(MEMORY_E, exit_sske);
-                                }
-                                ssl->sigType = DYNAMIC_TYPE_RSA;
-
-                                ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey,
-                                                     ssl->heap, ssl->devId);
+                                ssl->hsType = DYNAMIC_TYPE_RSA;
+                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
 
-                                ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer,
-                                                      &i, (RsaKey*)ssl->sigKey,
-                                                      ssl->buffers.key->length);
+                                ret = wc_RsaPrivateKeyDecode(
+                                    ssl->buffers.key->buffer,
+                                    &i,
+                                    (RsaKey*)ssl->hsKey,
+                                    ssl->buffers.key->length);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
-                                keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                                keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                                 if (keySz < 0) { /* test if keySz has error */
                                     ERROR_OUT(keySz, exit_sske);
                                 }
 
-                                sigSz = (word32)keySz;
+                                args->tmpSigSz = (word32)keySz;
                                 if (keySz < ssl->options.minRsaKeySz) {
                                     WOLFSSL_MSG("RSA signature key size too small");
                                     ERROR_OUT(RSA_KEY_SIZE_E, exit_sske);
@@ -17809,27 +18710,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             case ecc_dsa_sa_algo:
                             {
                                 word32 i = 0;
-                                ssl->sigKey = XMALLOC(sizeof(ecc_key),
-                                                   ssl->heap, DYNAMIC_TYPE_ECC);
-                                if (ssl->sigKey == NULL) {
-                                    ERROR_OUT(MEMORY_E, exit_sske);
-                                }
-                                ssl->sigType = DYNAMIC_TYPE_ECC;
 
-                                ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap, ssl->devId);
-                                if (ret != 0)
-                                    goto exit_sske;
-
-                                ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer,
-                                                      &i, (ecc_key*)ssl->sigKey,
-                                                      ssl->buffers.key->length);
+                                ssl->hsType = DYNAMIC_TYPE_ECC;
+                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
-                                sigSz = wc_ecc_sig_size((ecc_key*)ssl->sigKey);  /* worst case estimate */
+
+                                ret = wc_EccPrivateKeyDecode(
+                                    ssl->buffers.key->buffer,
+                                    &i,
+                                    (ecc_key*)ssl->hsKey,
+                                    ssl->buffers.key->length);
+                                if (ret != 0) {
+                                    goto exit_sske;
+                                }
+                                /* worst case estimate */
+                                args->tmpSigSz = wc_ecc_sig_size(
+                                    (ecc_key*)ssl->hsKey);
 
                                 /* check the minimum ECC key size */
-                                if (wc_ecc_size((ecc_key*)ssl->sigKey) <
+                                if (wc_ecc_size((ecc_key*)ssl->hsKey) <
                                         ssl->options.minEccKeySz) {
                                     WOLFSSL_MSG("ECC key size too small");
                                     ret = ECC_KEY_SIZE_E;
@@ -17842,66 +18743,66 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         } /* switch(ssl->specs.sig_algo) */
 
                         /* sig length */
-                        length += LENGTH_SZ;
-                        length += sigSz;
+                        args->length += LENGTH_SZ;
+                        args->length += args->tmpSigSz;
 
                         if (IsAtLeastTLSv1_2(ssl)) {
-                            length += HASH_SIG_SIZE;
+                            args->length += HASH_SIG_SIZE;
                         }
 
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            preSigIdx = idx;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            preSigIdx = args->idx;
                         }
                     #endif
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
                         /* record and message headers will be added below, when we're sure
                            of the sig length */
 
                         /* key exchange data */
-                        output[idx++] = named_curve;
-                        output[idx++] = 0x00;          /* leading zero */
-                        output[idx++] = SetCurveId(ssl->eccTempKey);
-                        output[idx++] = (byte)exportSz;
-                        XMEMCPY(output + idx, exportBuf, exportSz);
-                        idx += exportSz;
+                        args->output[args->idx++] = named_curve;
+                        args->output[args->idx++] = 0x00;          /* leading zero */
+                        args->output[args->idx++] = SetCurveId(ssl->eccTempKey);
+                        args->output[args->idx++] = (byte)args->exportSz;
+                        XMEMCPY(args->output + args->idx, args->exportBuf, args->exportSz);
+                        args->idx += args->exportSz;
 
                         /* Determine hash type */
                         if (IsAtLeastTLSv1_2(ssl)) {
-                            output[idx++] = ssl->suites->hashAlgo;
-                            output[idx++] = ssl->suites->sigAlgo;
+                            args->output[args->idx++] = ssl->suites->hashAlgo;
+                            args->output[args->idx++] = ssl->suites->sigAlgo;
 
                             switch (ssl->suites->hashAlgo) {
                                 case sha512_mac:
-                                    #ifdef WOLFSSL_SHA512
-                                        hashType = WC_HASH_TYPE_SHA512;
-                                    #endif
+                                #ifdef WOLFSSL_SHA512
+                                    hashType = WC_HASH_TYPE_SHA512;
+                                #endif
                                     break;
                                 case sha384_mac:
-                                    #ifdef WOLFSSL_SHA384
-                                        hashType = WC_HASH_TYPE_SHA384;
-                                    #endif
+                                #ifdef WOLFSSL_SHA384
+                                    hashType = WC_HASH_TYPE_SHA384;
+                                #endif
                                     break;
                                 case sha256_mac:
-                                    #ifndef NO_SHA256
-                                        hashType = WC_HASH_TYPE_SHA256;
-                                    #endif
+                                #ifndef NO_SHA256
+                                    hashType = WC_HASH_TYPE_SHA256;
+                                #endif
                                     break;
                                 case sha_mac:
                                     #if !defined(NO_SHA) && \
@@ -17935,37 +18836,42 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                     #ifdef HAVE_FUZZER
                         if (ssl->fuzzerCb) {
-                            ssl->fuzzerCb(ssl, output + preSigIdx, preSigSz,
-                                                           FUZZ_SIGNATURE, ssl->fuzzerCtx);
+                            ssl->fuzzerCb(ssl, args->output + preSigIdx,
+                                preSigSz, FUZZ_SIGNATURE, ssl->fuzzerCtx);
                         }
                     #endif
 
                         /* Assemble buffer to hash for signature */
-                        sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
-                        sigDataBuf = (byte*)XMALLOC(sigDataSz, ssl->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                        if (sigDataBuf == NULL) {
+                        args->sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
+                        args->sigDataBuf = (byte*)XMALLOC(args->sigDataSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->sigDataBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        XMEMCPY(sigDataBuf, ssl->arrays->clientRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN, ssl->arrays->serverRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN+RAN_LEN, output + preSigIdx, preSigSz);
+                        XMEMCPY(args->sigDataBuf, ssl->arrays->clientRandom,
+                                                                       RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN,
+                                            ssl->arrays->serverRandom, RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN+RAN_LEN,
+                                args->output + preSigIdx, preSigSz);
 
                         ssl->buffers.sig.length = wc_HashGetDigestSize(hashType);
-                        ssl->buffers.sig.buffer = (byte*)XMALLOC(ssl->buffers.sig.length,
+                        ssl->buffers.sig.buffer = (byte*)XMALLOC(
+                                            ssl->buffers.sig.length,
                                             ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                         if (ssl->buffers.sig.buffer == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
 
                         /* Perform hash */
-                        ret = wc_Hash(hashType, sigDataBuf, sigDataSz,
+                        ret = wc_Hash(hashType,
+                            args->sigDataBuf, args->sigDataSz,
                             ssl->buffers.sig.buffer, ssl->buffers.sig.length);
                         if (ret != 0) {
                             goto exit_sske;
                         }
 
-                        ssl->sigLen = sigSz;
+                        args->sigSz = args->tmpSigSz;
 
                         /* Sign hash to create signature */
                         switch (ssl->specs.sig_algo)
@@ -17985,19 +18891,19 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                                     switch (ssl->suites->hashAlgo) {
                                         case sha512_mac:
-                                            #ifdef WOLFSSL_SHA512
-                                                typeH    = SHA512h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA512
+                                            typeH    = SHA512h;
+                                        #endif
                                             break;
                                         case sha384_mac:
-                                            #ifdef WOLFSSL_SHA384
-                                                typeH    = SHA384h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA384
+                                            typeH    = SHA384h;
+                                        #endif
                                             break;
                                         case sha256_mac:
-                                            #ifndef NO_SHA256
-                                                typeH    = SHA256h;
-                                            #endif
+                                        #ifndef NO_SHA256
+                                            typeH    = SHA256h;
+                                        #endif
                                             break;
                                         case sha_mac:
                                             #if !defined(NO_SHA) && \
@@ -18010,8 +18916,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                             break;
                                     }
 
-                                    ssl->buffers.sig.length = wc_EncodeSignature(encodedSig,
-                                        ssl->buffers.sig.buffer, ssl->buffers.sig.length, typeH);
+                                    ssl->buffers.sig.length =
+                                        wc_EncodeSignature(encodedSig,
+                                        ssl->buffers.sig.buffer,
+                                        ssl->buffers.sig.length, typeH);
 
                                     /* Replace sig buffer with new one */
                                     XFREE(ssl->buffers.sig.buffer, ssl->heap,
@@ -18020,8 +18928,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 }
 
                                 /* write sig size here */
-                                c16toa((word16)ssl->sigLen, output + idx);
-                                idx += LENGTH_SZ;
+                                c16toa((word16)args->sigSz,
+                                    args->output + args->idx);
+                                args->idx += LENGTH_SZ;
                                 break;
                             }
                         #endif /* !NO_RSA */
@@ -18038,50 +18947,46 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         enum wc_HashType hashType = WC_HASH_TYPE_NONE;
 
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = LENGTH_SZ * 3;  /* p, g, pub */
-                        length += ssl->buffers.serverDH_P.length +
-                                  ssl->buffers.serverDH_G.length +
-                                  ssl->buffers.serverDH_Pub.length;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = LENGTH_SZ * 3;  /* p, g, pub */
+                        args->length += ssl->buffers.serverDH_P.length +
+                                        ssl->buffers.serverDH_G.length +
+                                        ssl->buffers.serverDH_Pub.length;
 
-                        preSigIdx = idx;
-                        preSigSz  = length;
+                        preSigIdx = args->idx;
+                        preSigSz  = args->length;
 
                         if (!ssl->options.usingAnon_cipher) {
                             word32   i = 0;
                             int      keySz;
 
-                            ssl->sigKey = (RsaKey*)XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                              DYNAMIC_TYPE_RSA);
-                            if (ssl->sigKey == NULL) {
-                                ERROR_OUT(MEMORY_E, exit_sske);
+                            /* make sure private key exists */
+                            if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
+                                ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
                             }
-                            ssl->sigType = DYNAMIC_TYPE_RSA;
 
-                            ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey,
-                                                        ssl->heap, ssl->devId);
+                            ssl->hsType = DYNAMIC_TYPE_RSA;
+                            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                             if (ret != 0) {
                                 goto exit_sske;
                             }
 
                             /* sig length */
-                            length += LENGTH_SZ;
+                            args->length += LENGTH_SZ;
 
-                            if (!ssl->buffers.key->buffer) {
-                                ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
-                            }
-
-                            ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &i,
-                                                         (RsaKey*)ssl->sigKey, ssl->buffers.key->length);
+                            ret = wc_RsaPrivateKeyDecode(
+                                ssl->buffers.key->buffer, &i,
+                                (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
                             if (ret != 0) {
                                 goto exit_sske;
                             }
-                            keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                            keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                             if (keySz < 0) { /* test if keySz has error */
                                 ERROR_OUT(keySz, exit_sske);
                             }
-                            sigSz = (word32)keySz;
-                            length += sigSz;
+                            args->tmpSigSz = (word32)keySz;
+                            args->length += args->tmpSigSz;
 
                             if (keySz < ssl->options.minRsaKeySz) {
                                 WOLFSSL_MSG("RSA key size too small");
@@ -18089,60 +18994,68 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             }
 
                             if (IsAtLeastTLSv1_2(ssl)) {
-                                length += HASH_SIG_SIZE;
+                                args->length += HASH_SIG_SIZE;
                             }
                         }
 
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ +
+                                                            RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            preSigIdx = idx;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            preSigIdx = args->idx;
                         }
                     #endif
 
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
 
                         /* add p, g, pub */
-                        c16toa((word16)ssl->buffers.serverDH_P.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_P.buffer,
+                        c16toa((word16)ssl->buffers.serverDH_P.length,
+                                                    args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                              ssl->buffers.serverDH_P.buffer,
                                               ssl->buffers.serverDH_P.length);
-                        idx += ssl->buffers.serverDH_P.length;
+                        args->idx += ssl->buffers.serverDH_P.length;
 
                         /*  g */
-                        c16toa((word16)ssl->buffers.serverDH_G.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_G.buffer,
+                        c16toa((word16)ssl->buffers.serverDH_G.length,
+                                                    args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                              ssl->buffers.serverDH_G.buffer,
                                               ssl->buffers.serverDH_G.length);
-                        idx += ssl->buffers.serverDH_G.length;
+                        args->idx += ssl->buffers.serverDH_G.length;
 
                         /*  pub */
-                        c16toa((word16)ssl->buffers.serverDH_Pub.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_Pub.buffer,
+                        c16toa((word16)ssl->buffers.serverDH_Pub.length,
+                                                    args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                              ssl->buffers.serverDH_Pub.buffer,
                                               ssl->buffers.serverDH_Pub.length);
-                        idx += ssl->buffers.serverDH_Pub.length;
+                        args->idx += ssl->buffers.serverDH_Pub.length;
 
                     #ifdef HAVE_FUZZER
                         if (ssl->fuzzerCb) {
-                            ssl->fuzzerCb(ssl, output + preSigIdx, preSigSz,
-                                                           FUZZ_SIGNATURE, ssl->fuzzerCtx);
+                            ssl->fuzzerCb(ssl, args->output + preSigIdx,
+                                preSigSz, FUZZ_SIGNATURE, ssl->fuzzerCtx);
                         }
                     #endif
 
@@ -18152,24 +19065,24 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                         /* Determine hash type */
                         if (IsAtLeastTLSv1_2(ssl)) {
-                            output[idx++] = ssl->suites->hashAlgo;
-                            output[idx++] = ssl->suites->sigAlgo;
+                            args->output[args->idx++] = ssl->suites->hashAlgo;
+                            args->output[args->idx++] = ssl->suites->sigAlgo;
 
                             switch (ssl->suites->hashAlgo) {
                                 case sha512_mac:
-                                    #ifdef WOLFSSL_SHA512
-                                        hashType = WC_HASH_TYPE_SHA512;
-                                    #endif
+                                #ifdef WOLFSSL_SHA512
+                                    hashType = WC_HASH_TYPE_SHA512;
+                                #endif
                                     break;
                                 case sha384_mac:
-                                    #ifdef WOLFSSL_SHA384
-                                        hashType = WC_HASH_TYPE_SHA384;
-                                    #endif
+                                #ifdef WOLFSSL_SHA384
+                                    hashType = WC_HASH_TYPE_SHA384;
+                                #endif
                                     break;
                                 case sha256_mac:
-                                    #ifndef NO_SHA256
-                                        hashType = WC_HASH_TYPE_SHA256;
-                                    #endif
+                                #ifndef NO_SHA256
+                                    hashType = WC_HASH_TYPE_SHA256;
+                                #endif
                                     break;
                                 case sha_mac:
                                     #if !defined(NO_SHA) && \
@@ -18199,19 +19112,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         }
 
                         /* signature size */
-                        c16toa((word16)sigSz, output + idx);
-                        idx += LENGTH_SZ;
+                        c16toa((word16)args->tmpSigSz, args->output + args->idx);
+                        args->idx += LENGTH_SZ;
 
                         /* Assemble buffer to hash for signature */
-                        sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
-                        sigDataBuf = (byte*)XMALLOC(sigDataSz, ssl->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                        if (sigDataBuf == NULL) {
+                        args->sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
+                        args->sigDataBuf = (byte*)XMALLOC(args->sigDataSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->sigDataBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        XMEMCPY(sigDataBuf, ssl->arrays->clientRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN, ssl->arrays->serverRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN+RAN_LEN, output + preSigIdx, preSigSz);
+                        XMEMCPY(args->sigDataBuf, ssl->arrays->clientRandom,
+                                                                    RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN,
+                                        ssl->arrays->serverRandom, RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN+RAN_LEN,
+                            args->output + preSigIdx, preSigSz);
 
                         ssl->buffers.sig.length = wc_HashGetDigestSize(hashType);
                         ssl->buffers.sig.buffer = (byte*)XMALLOC(
@@ -18222,13 +19138,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         }
 
                         /* Perform hash */
-                        ret = wc_Hash(hashType, sigDataBuf, sigDataSz,
+                        ret = wc_Hash(hashType,
+                            args->sigDataBuf, args->sigDataSz,
                             ssl->buffers.sig.buffer, ssl->buffers.sig.length);
                         if (ret != 0) {
                             goto exit_sske;
                         }
 
-                        ssl->sigLen = sigSz;
+                        args->sigSz = args->tmpSigSz;
 
                         /* Sign hash to create signature */
                         switch (ssl->suites->sigAlgo)
@@ -18248,19 +19165,19 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                                     switch (ssl->suites->hashAlgo) {
                                         case sha512_mac:
-                                            #ifdef WOLFSSL_SHA512
-                                                typeH    = SHA512h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA512
+                                            typeH    = SHA512h;
+                                        #endif
                                             break;
                                         case sha384_mac:
-                                            #ifdef WOLFSSL_SHA384
-                                                typeH    = SHA384h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA384
+                                            typeH    = SHA384h;
+                                        #endif
                                             break;
                                         case sha256_mac:
-                                            #ifndef NO_SHA256
-                                                typeH    = SHA256h;
-                                            #endif
+                                        #ifndef NO_SHA256
+                                            typeH    = SHA256h;
+                                        #endif
                                             break;
                                         case sha_mac:
                                             #if !defined(NO_SHA) && \
@@ -18273,8 +19190,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                             break;
                                     }
 
-                                    ssl->buffers.sig.length = wc_EncodeSignature(encodedSig,
-                                        ssl->buffers.sig.buffer, ssl->buffers.sig.length, typeH);
+                                    ssl->buffers.sig.length =
+                                    wc_EncodeSignature(encodedSig,
+                                        ssl->buffers.sig.buffer,
+                                        ssl->buffers.sig.length, typeH);
 
                                     /* Replace sig buffer with new one */
                                     XFREE(ssl->buffers.sig.buffer, ssl->heap,
@@ -18296,10 +19215,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_DO;
-            } /* case KEYSHARE_BUILD */
+                ssl->options.asyncState = TLS_ASYNC_DO;
+            } /* case TLS_ASYNC_BUILD */
 
-            case KEYSHARE_DO:
+            case TLS_ASYNC_DO:
             {
                 switch(ssl->specs.kea)
                 {
@@ -18330,12 +19249,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                                 ret = RsaSign(ssl,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    output + idx,
-                                    &ssl->sigLen,
-                                    (RsaKey*)ssl->sigKey,
+                                    args->output + args->idx,
+                                    &args->sigSz,
+                                    key,
                                     ssl->buffers.key->buffer,
                                     ssl->buffers.key->length,
                             #ifdef HAVE_PK_CALLBACKS
@@ -18349,12 +19270,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #endif /* !NO_RSA */
                             case ecc_dsa_sa_algo:
                             {
+                                ecc_key* key = (ecc_key*)ssl->hsKey;
+
                                 ret = EccSign(ssl,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    output + LENGTH_SZ + idx,
-                                    &ssl->sigLen,
-                                    (ecc_key*)ssl->sigKey,
+                                    args->output + LENGTH_SZ + args->idx,
+                                    &args->sigSz,
+                                    key,
                             #if defined(HAVE_PK_CALLBACKS)
                                     ssl->buffers.key->buffer,
                                     ssl->buffers.key->length,
@@ -18378,6 +19301,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                                 if (ssl->options.usingAnon_cipher) {
                                     break;
                                 }
@@ -18385,9 +19310,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 ret = RsaSign(ssl,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    output + idx,
-                                    &ssl->sigLen,
-                                    (RsaKey*)ssl->sigKey,
+                                    args->output + args->idx,
+                                    &args->sigSz,
+                                    key,
                                     ssl->buffers.key->buffer,
                                     ssl->buffers.key->length,
                                 #ifdef HAVE_PK_CALLBACKS
@@ -18412,10 +19337,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_VERIFY;
-            } /* case KEYSHARE_DO */
+                ssl->options.asyncState = TLS_ASYNC_VERIFY;
+            } /* case TLS_ASYNC_DO */
 
-            case KEYSHARE_VERIFY:
+            case TLS_ASYNC_VERIFY:
             {
                 switch(ssl->specs.kea)
                 {
@@ -18448,35 +19373,41 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
-                                if (verifySig == NULL) {
-                                    if (ssl->sigLen == 0) {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
+                                if (args->verifySig == NULL) {
+                                    if (args->sigSz == 0) {
                                         ERROR_OUT(BAD_COND_E, exit_sske);
                                     }
-                                    verifySig = (byte*)XMALLOC(ssl->sigLen, ssl->heap,
-                                                      DYNAMIC_TYPE_TMP_BUFFER);
-                                    if (!verifySig) {
+                                    args->verifySig = (byte*)XMALLOC(
+                                                    args->sigSz, ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+                                    if (!args->verifySig) {
                                         ERROR_OUT(MEMORY_E, exit_sske);
                                     }
-                                    XMEMCPY(verifySig, output + idx, ssl->sigLen);
+                                    XMEMCPY(args->verifySig,
+                                        args->output + args->idx, args->sigSz);
                                 }
 
                                 /* check for signature faults */
                                 ret = VerifyRsaSign(ssl,
-                                    verifySig, ssl->sigLen,
+                                    args->verifySig, args->sigSz,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    (RsaKey*)ssl->sigKey);
+                                    key
+                                );
                                 break;
                             }
                         #endif
                             case ecc_dsa_sa_algo:
                             {
                                 /* Now that we know the real sig size, write it. */
-                                c16toa((word16)ssl->sigLen, output + idx);
+                                c16toa((word16)args->sigSz,
+                                                    args->output + args->idx);
 
                                 /* And adjust length and sendSz from estimates */
-                                length += ssl->sigLen - sigSz;
-                                sendSz += ssl->sigLen - sigSz;
+                                args->length += args->sigSz - args->tmpSigSz;
+                                args->sendSz += args->sigSz - args->tmpSigSz;
                                 break;
                             }
                             default:
@@ -18493,28 +19424,33 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                                 if (ssl->options.usingAnon_cipher) {
                                     break;
                                 }
 
-                                if (verifySig == NULL) {
-                                    if (ssl->sigLen == 0) {
+                                if (args->verifySig == NULL) {
+                                    if (args->sigSz == 0) {
                                         ERROR_OUT(BAD_COND_E, exit_sske);
                                     }
-                                    verifySig = (byte*)XMALLOC(ssl->sigLen, ssl->heap,
+                                    args->verifySig = (byte*)XMALLOC(
+                                                      args->sigSz, ssl->heap,
                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                                    if (!verifySig) {
+                                    if (!args->verifySig) {
                                         ERROR_OUT(MEMORY_E, exit_sske);
                                     }
-                                    XMEMCPY(verifySig, output + idx, ssl->sigLen);
+                                    XMEMCPY(args->verifySig,
+                                        args->output + args->idx, args->sigSz);
                                 }
 
                                 /* check for signature faults */
                                 ret = VerifyRsaSign(ssl,
-                                    verifySig, ssl->sigLen,
+                                    args->verifySig, args->sigSz,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    (RsaKey*)ssl->sigKey);
+                                    key
+                                );
                                 break;
                             }
                         #endif
@@ -18530,26 +19466,28 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_FINALIZE;
-            } /* case KEYSHARE_VERIFY */
+                ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+            } /* case TLS_ASYNC_VERIFY */
 
-            case KEYSHARE_FINALIZE:
+            case TLS_ASYNC_FINALIZE:
             {
             #ifdef HAVE_QSH
                 if (ssl->peerQSHKeyPresent) {
-                    if (qshSz > 0) {
-                        idx = sendSz - qshSz;
+                    if (args->qshSz > 0) {
+                        args->idx = args->sendSz - args->qshSz;
                         if (QSH_KeyExchangeWrite(ssl, 1) != 0) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
 
                         /* extension type */
-                        c16toa(TLSX_QUANTUM_SAFE_HYBRID, output + idx);
-                        idx += OPAQUE16_LEN;
+                        c16toa(TLSX_QUANTUM_SAFE_HYBRID,
+                                                    args->output + args->idx);
+                        args->idx += OPAQUE16_LEN;
 
                         /* write to output and check amount written */
-                        if (TLSX_QSHPK_Write(ssl->QSH_secret->list, output + idx)
-                                                          > qshSz - OPAQUE16_LEN) {
+                        if (TLSX_QSHPK_Write(ssl->QSH_secret->list,
+                            args->output + args->idx) >
+                                                args->qshSz - OPAQUE16_LEN) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
                     }
@@ -18560,8 +19498,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 if (ssl->specs.kea == ecdhe_psk_kea ||
                     ssl->specs.kea == ecc_diffie_hellman_kea) {
                     /* Check output to make sure it was set */
-                    if (output) {
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                    if (args->output) {
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
                     }
                     else {
                         ERROR_OUT(BUFFER_ERROR, exit_sske);
@@ -18571,7 +19510,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
             #ifdef WOLFSSL_DTLS
                 if (IsDtlsNotSctpMode(ssl)) {
-                    if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0) {
+                    if ((ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz)) != 0) {
                         goto exit_sske;
                     }
                 }
@@ -18580,7 +19519,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     DtlsSEQIncrement(ssl, CUR_ORDER);
             #endif
 
-                ret = HashOutput(ssl, output, sendSz, 0);
+                ret = HashOutput(ssl, args->output, args->sendSz, 0);
                 if (ret != 0) {
                     goto exit_sske;
                 }
@@ -18590,8 +19529,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     AddPacketName("ServerKeyExchange", &ssl->handShakeInfo);
                 }
                 if (ssl->toInfoOn) {
-                    AddPacketInfo("ServerKeyExchange", &ssl->timeoutInfo, output,
-                                                                sendSz, ssl->heap);
+                    AddPacketInfo("ServerKeyExchange", &ssl->timeoutInfo,
+                        args->output, args->sendSz, ssl->heap);
                 }
             #endif
 
@@ -18601,12 +19540,12 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_END;
-            } /* case KEYSHARE_FINALIZE */
+                ssl->options.asyncState = TLS_ASYNC_END;
+            } /* case TLS_ASYNC_FINALIZE */
 
-            case KEYSHARE_END:
+            case TLS_ASYNC_END:
             {
-                ssl->buffers.outputBuffer.length += sendSz;
+                ssl->buffers.outputBuffer.length += args->sendSz;
                 if (!ssl->options.groupMessages) {
                     ret = SendBuffered(ssl);
                 }
@@ -18616,57 +19555,20 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             }
             default:
                 ret = INPUT_CASE_ERROR;
-        } /* switch(ssl->options.keyShareState) */
+        } /* switch(ssl->options.asyncState) */
 
     exit_sske:
 
         WOLFSSL_LEAVE("SendServerKeyExchange", ret);
 
-        /* Handle cleanup for stack variables here */
-    #if defined(HAVE_ECC)
-        if (exportBuf) {
-            XFREE(exportBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            exportBuf = NULL;
-        }
-    #endif
-    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
-        if (sigDataBuf) {
-            XFREE(sigDataBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            sigDataBuf = NULL;
-        }
-    #endif
-
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* Handle WC_PENDING_E */
-        if (ret == WC_PENDING_E) {
-            /* Store variables needed for async */
-            XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-            ssl->async.output = output;
-            ssl->async.sendSz = sendSz;
-            ssl->async.idx = idx;
-            ssl->async.length = length;
-            ssl->async.sigSz = sigSz;
-        #ifndef NO_RSA
-            ssl->async.data = verifySig;
-        #endif
-
-            /* Push event to queue */
-            ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-            if (ret == 0) {
-                return WC_PENDING_E;
-            }
-        }
-    #endif
-
-    #ifndef NO_RSA
-        if (verifySig) {
-            XFREE(verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            verifySig = NULL;
-        }
-    #endif
+        /* Handle async operation */
+        if (ret == WC_PENDING_E)
+            return ret;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
         /* Final cleanup */
+        FreeSskeArgs(ssl, args);
         FreeKeyExchange(ssl);
 
         return ret;
@@ -18958,7 +19860,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         /* suite size */
         ato16(&input[idx], &clSuites.suiteSz);
-        idx += 2;
+        idx += OPAQUE16_LEN;
 
         if (clSuites.suiteSz > WOLFSSL_MAX_SUITE_SZ)
             return BUFFER_ERROR;
@@ -18966,14 +19868,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         /* session size */
         ato16(&input[idx], &sessionSz);
-        idx += 2;
+        idx += OPAQUE16_LEN;
 
         if (sessionSz > ID_LEN)
             return BUFFER_ERROR;
 
         /* random size */
         ato16(&input[idx], &randomSz);
-        idx += 2;
+        idx += OPAQUE16_LEN;
 
         if (randomSz > RAN_LEN)
             return BUFFER_ERROR;
@@ -18982,10 +19884,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         for (i = 0, j = 0; i < clSuites.suiteSz; i += 3) {
             byte first = input[idx++];
             if (!first) { /* implicit: skip sslv2 type */
-                XMEMCPY(&clSuites.suites[j], &input[idx], 2);
-                j += 2;
+                XMEMCPY(&clSuites.suites[j], &input[idx], SUITE_LEN);
+                j += SUITE_LEN;
             }
-            idx += 2;
+            idx += SUITE_LEN;
         }
         clSuites.suiteSz = j;
 
@@ -19076,6 +19978,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         byte            peerCookieSz = 0;
         byte            cookieType;
         byte            cookieSz = 0;
+
+        XMEMSET(&cookieHmac, 0, sizeof(Hmac));
 #endif /* WOLFSSL_DTLS */
 
 #ifdef WOLFSSL_CALLBACKS
@@ -19519,63 +20423,65 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
 #if !defined(NO_RSA) || defined(HAVE_ECC)
 
+    typedef struct DcvArgs {
+        byte*  output; /* not allocated */
+        word32 sendSz;
+        word16 sz;
+        word32 sigSz;
+        word32 idx;
+        word32 begin;
+        byte   hashAlgo;
+        byte   sigAlgo;
+    } DcvArgs;
+
+    static void FreeDcvArgs(WOLFSSL* ssl, void* pArgs)
+    {
+        DcvArgs* args = (DcvArgs*)pArgs;
+
+        (void)ssl;
+        (void)args;
+    }
+
     static int DoCertificateVerify(WOLFSSL* ssl, byte* input,
                                 word32* inOutIdx, word32 size)
     {
-        int         ret = 0;
-        byte*       output = NULL;
-        word32      sendSz = 0;
-        word16      sz = 0;
-        word32      sigSz = 0;
-        byte        hashAlgo = sha_mac;
-        byte        sigAlgo = anonymous_sa_algo;
-        word32      idx = *inOutIdx, begin = *inOutIdx;
+        int ret = 0;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        DcvArgs* args = (DcvArgs*)ssl->async.args;
+        typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+        (void)sizeof(args_test);
+    #else
+        DcvArgs  args[1];
+    #endif
 
         WOLFSSL_ENTER("DoCertificateVerify");
 
-        (void)sigSz;
-        (void)output;
-        (void)sendSz;
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
         if (ret != WC_NOT_PENDING_E) {
-            WOLF_EVENT_TYPE eType = ssl->event.type;
-
-            /* Clear event */
-            XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
             /* Check for error */
-            if (ret < 0) {
+            if (ret < 0)
                 goto exit_dcv;
-            }
-            else  {
-                /* Restore variables needed for async */
-                output = ssl->async.output;
-                sendSz = ssl->async.sendSz;
-                idx = ssl->async.idx;
-                sigSz = ssl->async.sigSz;
-                sz = ssl->async.length;
-                sigAlgo = ssl->async.sigAlgo;
-                hashAlgo = ssl->async.hashAlgo;
-
-                /* Advance key share state if not wolfCrypt */
-                if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                    ssl->options.keyShareState++;
-                }
-            }
         }
         else
     #endif
         {
             /* Reset state */
             ret = 0;
-            ssl->options.keyShareState = KEYSHARE_BEGIN;
+            ssl->options.asyncState = TLS_ASYNC_BEGIN;
+            XMEMSET(args, 0, sizeof(DcvArgs));
+            args->hashAlgo = sha_mac;
+            args->sigAlgo = anonymous_sa_algo;
+            args->idx = *inOutIdx;
+            args->begin = *inOutIdx;
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ssl->async.freeArgs = FreeDcvArgs;
+        #endif
         }
 
-        switch(ssl->options.keyShareState)
+        switch(ssl->options.asyncState)
         {
-            case KEYSHARE_BEGIN:
+            case TLS_ASYNC_BEGIN:
             {
             #ifdef WOLFSSL_CALLBACKS
                 if (ssl->hsInfoOn)
@@ -19585,79 +20491,97 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             #endif
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_BUILD;
-            } /* case KEYSHARE_BEGIN */
+                ssl->options.asyncState = TLS_ASYNC_BUILD;
+            } /* case TLS_ASYNC_BEGIN */
 
-            case KEYSHARE_BUILD:
+            case TLS_ASYNC_BUILD:
             {
                 if (IsAtLeastTLSv1_2(ssl)) {
-                    if ((idx - begin) + ENUM_LEN + ENUM_LEN > size) {
+                    if ((args->idx - args->begin) + ENUM_LEN + ENUM_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dcv);
                     }
 
-                    hashAlgo = input[idx++];
-                    sigAlgo  = input[idx++];
+                    args->hashAlgo = input[args->idx++];
+                    args->sigAlgo  = input[args->idx++];
                 }
 
-                if ((idx - begin) + OPAQUE16_LEN > size) {
+                if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                     ERROR_OUT(BUFFER_ERROR, exit_dcv);
                 }
 
-                ato16(input + idx, &sz);
-                idx += OPAQUE16_LEN;
+                ato16(input + args->idx, &args->sz);
+                args->idx += OPAQUE16_LEN;
 
-                if ((idx - begin) + sz > size || sz > ENCRYPT_LEN) {
+                if ((args->idx - args->begin) + args->sz > size ||
+                                                    args->sz > ENCRYPT_LEN) {
                     ERROR_OUT(BUFFER_ERROR, exit_dcv);
                 }
 
             #ifdef HAVE_ECC
                 if (ssl->peerEccDsaKeyPresent) {
-                    ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha;
-                    ssl->buffers.digest.length = SHA_DIGEST_SIZE;
 
                     WOLFSSL_MSG("Doing ECC peer cert verify");
 
+                /* make sure a default is defined */
+                #if !defined(NO_SHA)
+                    ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha;
+                    ssl->buffers.digest.length = SHA_DIGEST_SIZE;
+                #elif !defined(NO_SHA256)
+                    ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
+                    ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
+                #elif defined(WOLFSSL_SHA384)
+                    ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
+                    ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
+                #elif defined(WOLFSSL_SHA512)
+                    ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
+                    ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
+                #else
+                    #error No digest enabled for ECC sig verify
+                #endif
+
                     if (IsAtLeastTLSv1_2(ssl)) {
-                        if (sigAlgo != ecc_dsa_sa_algo) {
+                        if (args->sigAlgo != ecc_dsa_sa_algo) {
                             WOLFSSL_MSG("Oops, peer sent ECC key but not in verify");
                         }
 
-                        if (hashAlgo == sha256_mac) {
+                        switch (args->hashAlgo) {
+                            case sha256_mac:
                             #ifndef NO_SHA256
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
                                 ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
                             #endif
-                        }
-                        else if (hashAlgo == sha384_mac) {
+                                break;
+                            case sha384_mac:
                             #ifdef WOLFSSL_SHA384
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
                                 ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
                             #endif
-                        }
-                        else if (hashAlgo == sha512_mac) {
+                                break;
+                            case sha512_mac:
                             #ifdef WOLFSSL_SHA512
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
                                 ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
                             #endif
+                                break;
                         }
                     }
                 }
             #endif /* HAVE_ECC */
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_DO;
-            } /* case KEYSHARE_BUILD */
+                ssl->options.asyncState = TLS_ASYNC_DO;
+            } /* case TLS_ASYNC_BUILD */
 
-            case KEYSHARE_DO:
+            case TLS_ASYNC_DO:
             {
             #ifndef NO_RSA
                 if (ssl->peerRsaKey != NULL && ssl->peerRsaKeyPresent != 0) {
                     WOLFSSL_MSG("Doing RSA peer cert verify");
 
                     ret = RsaVerify(ssl,
-                        input + idx,
-                        sz,
-                        &output,
+                        input + args->idx,
+                        args->sz,
+                        &args->output,
                         ssl->peerRsaKey,
                     #ifdef HAVE_PK_CALLBACKS
                         ssl->buffers.peerRsaKey.buffer,
@@ -19668,7 +20592,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     #endif
                     );
                     if (ret >= 0) {
-                        sendSz = ret;
+                        args->sendSz = ret;
                         ret = 0;
                     }
                 }
@@ -19678,7 +20602,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     WOLFSSL_MSG("Doing ECC peer cert verify");
 
                     ret = EccVerify(ssl,
-                        input + idx, sz,
+                        input + args->idx, args->sz,
                         ssl->buffers.digest.buffer, ssl->buffers.digest.length,
                         ssl->peerEccDsaKey,
                     #ifdef HAVE_PK_CALLBACKS
@@ -19698,75 +20622,91 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_VERIFY;
-            } /* case KEYSHARE_DO */
+                ssl->options.asyncState = TLS_ASYNC_VERIFY;
+            } /* case TLS_ASYNC_DO */
 
-            case KEYSHARE_VERIFY:
+            case TLS_ASYNC_VERIFY:
             {
             #ifndef NO_RSA
                 if (ssl->peerRsaKey != NULL && ssl->peerRsaKeyPresent != 0) {
                     if (IsAtLeastTLSv1_2(ssl)) {
                     #ifdef WOLFSSL_SMALL_STACK
-                        byte*  encodedSig = NULL;
+                        byte* encodedSig = NULL;
                     #else
-                        byte   encodedSig[MAX_ENCODED_SIG_SZ];
+                        byte  encodedSig[MAX_ENCODED_SIG_SZ];
                     #endif
-                        int    typeH = SHAh;
+                        int   typeH = SHAh;
 
+                    /* make sure a default is defined */
+                    #if !defined(NO_SHA)
                         ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha;
                         ssl->buffers.digest.length = SHA_DIGEST_SIZE;
+                    #elif !defined(NO_SHA256)
+                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
+                        ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
+                    #elif defined(WOLFSSL_SHA384)
+                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
+                        ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
+                    #elif defined(WOLFSSL_SHA512)
+                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
+                        ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
+                    #else
+                        #error No digest enabled for RSA sig verify
+                    #endif
 
                     #ifdef WOLFSSL_SMALL_STACK
-                        encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
-                                                               DYNAMIC_TYPE_TMP_BUFFER);
+                        encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                         if (encodedSig == NULL) {
                             ERROR_OUT(MEMORY_E, exit_dcv);
                         }
                     #endif
 
-                        if (sigAlgo != rsa_sa_algo) {
+                        if (args->sigAlgo != rsa_sa_algo) {
                             WOLFSSL_MSG("Oops, peer sent RSA key but not in verify");
                         }
 
-                        switch (hashAlgo) {
-                        #ifndef NO_SHA256
+                        switch (args->hashAlgo) {
                             case sha256_mac:
+                            #ifndef NO_SHA256
                                 typeH    = SHA256h;
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
                                 ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
+                            #endif /* !NO_SHA256 */
                                 break;
-                        #endif /* !NO_SHA256 */
-                        #ifdef WOLFSSL_SHA384
                             case sha384_mac:
+                            #ifdef WOLFSSL_SHA384
                                 typeH    = SHA384h;
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
                                 ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
+                            #endif /* WOLFSSL_SHA384 */
                                 break;
-                        #endif /* WOLFSSL_SHA384 */
-                        #ifdef WOLFSSL_SHA512
                             case sha512_mac:
+                            #ifdef WOLFSSL_SHA512
                                 typeH    = SHA512h;
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
                                 ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
+                            #endif /* WOLFSSL_SHA512 */
                                 break;
-                        #endif /* WOLFSSL_SHA512 */
                         } /* switch */
 
-                        sigSz = wc_EncodeSignature(encodedSig,
-                            ssl->buffers.digest.buffer, ssl->buffers.digest.length,
-                                                                            typeH);
+                        args->sigSz = wc_EncodeSignature(encodedSig,
+                            ssl->buffers.digest.buffer,
+                            ssl->buffers.digest.length, typeH);
 
-                        if (sendSz != sigSz || !output || XMEMCMP(output,
-                                encodedSig, min(sigSz, MAX_ENCODED_SIG_SZ)) != 0) {
+                        if (args->sendSz != args->sigSz || !args->output ||
+                            XMEMCMP(args->output, encodedSig,
+                                min(args->sigSz, MAX_ENCODED_SIG_SZ)) != 0) {
                             ret = VERIFY_CERT_ERROR;
                         }
 
                     #ifdef WOLFSSL_SMALL_STACK
-                        XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        XFREE(encodedSig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                     #endif
                     }
                     else {
-                        if (sendSz != FINISHED_SZ || !output || XMEMCMP(output,
+                        if (args->sendSz != FINISHED_SZ || !args->output ||
+                            XMEMCMP(args->output,
                                 &ssl->hsHashes->certHashes, FINISHED_SZ) != 0) {
                             ret = VERIFY_CERT_ERROR;
                         }
@@ -19775,57 +20715,40 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             #endif /* !NO_RSA */
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_FINALIZE;
-            } /* case KEYSHARE_VERIFY */
+                ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+            } /* case TLS_ASYNC_VERIFY */
 
-            case KEYSHARE_FINALIZE:
+            case TLS_ASYNC_FINALIZE:
             {
                 ssl->options.havePeerVerify = 1;
 
                 /* Set final index */
-                idx += sz;
-                *inOutIdx = idx;
+                args->idx += args->sz;
+                *inOutIdx = args->idx;
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_END;
-            } /* case KEYSHARE_FINALIZE */
+                ssl->options.asyncState = TLS_ASYNC_END;
+            } /* case TLS_ASYNC_FINALIZE */
 
-            case KEYSHARE_END:
+            case TLS_ASYNC_END:
             {
                 break;
             }
             default:
                 ret = INPUT_CASE_ERROR;
-        } /* switch(ssl->options.keyShareState) */
+        } /* switch(ssl->options.asyncState) */
 
     exit_dcv:
 
         WOLFSSL_LEAVE("DoCertificateVerify", ret);
 
-        /* Handle cleanup for stack variables here */
-
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* Handle WC_PENDING_E */
+        /* Handle async operation */
         if (ret == WC_PENDING_E) {
-            /* Store variables needed for async */
-            XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-            ssl->async.output = output;
-            ssl->async.sendSz = sendSz;
-            ssl->async.idx = idx;
-            ssl->async.sigSz = sigSz;
-            ssl->async.length = sz;
-            ssl->async.sigAlgo = sigAlgo;
-            ssl->async.hashAlgo = hashAlgo;
-
             /* Mark message as not recevied so it can process again */
             ssl->msgsReceived.got_certificate_verify = 0;
 
-            /* Push event to queue */
-            ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-            if (ret == 0) {
-                return WC_PENDING_E;
-            }
+            return ret;
         }
     #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -19834,6 +20757,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         ssl->buffers.digest.length = 0;
 
         /* Final cleanup */
+        FreeDcvArgs(ssl, args);
         FreeKeyExchange(ssl);
 
         return ret;
@@ -19843,14 +20767,15 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
     int SendServerHelloDone(WOLFSSL* ssl)
     {
-        byte              *output;
-        int                sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-        int                ret;
+        byte* output;
+        int   sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+        int   ret;
+
+    #ifdef WOLFSSL_DTLS
+        if (ssl->options.dtls)
+            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+    #endif
 
-        #ifdef WOLFSSL_DTLS
-            if (ssl->options.dtls)
-                sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-        #endif
         /* check for available size */
         if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
             return ret;
@@ -19861,27 +20786,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         AddHeaders(output, 0, server_hello_done, ssl);
 
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                    return 0;
-            }
+    #ifdef WOLFSSL_DTLS
+        if (IsDtlsNotSctpMode(ssl)) {
+            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                return 0;
+        }
 
-            if (ssl->options.dtls)
-                DtlsSEQIncrement(ssl, CUR_ORDER);
-        #endif
+        if (ssl->options.dtls)
+            DtlsSEQIncrement(ssl, CUR_ORDER);
+    #endif
 
         ret = HashOutput(ssl, output, sendSz, 0);
             if (ret != 0)
                 return ret;
 
-#ifdef WOLFSSL_CALLBACKS
+    #ifdef WOLFSSL_CALLBACKS
         if (ssl->hsInfoOn)
             AddPacketName("ServerHelloDone", &ssl->handShakeInfo);
         if (ssl->toInfoOn)
             AddPacketInfo("ServerHelloDone", &ssl->timeoutInfo, output, sendSz,
                           ssl->heap);
-#endif
+    #endif
         ssl->options.serverState = SERVER_HELLODONE_COMPLETE;
 
         ssl->buffers.outputBuffer.length += sendSz;
@@ -20154,62 +21079,61 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
     }
 #endif /* WOLFSSL_DTLS */
 
+    typedef struct DckeArgs {
+        byte*  output; /* not allocated */
+        word32 length;
+        word32 idx;
+        word32 begin;
+        word32 sigSz;
+    } DckeArgs;
+
+    static void FreeDckeArgs(WOLFSSL* ssl, void* pArgs)
+    {
+        DckeArgs* args = (DckeArgs*)pArgs;
+
+        (void)ssl;
+        (void)args;
+    }
+
     static int DoClientKeyExchange(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                                                                     word32 size)
     {
-        int    ret;
-        word32 length = 0;
-        word32 idx = *inOutIdx, begin = *inOutIdx;
-        byte*  output_lcl = NULL;
-        byte** output = &output_lcl;
-
-        /* suppress possible compiler warnings */
-        (void)input;
-        (void)size;
-        (void)length;
-        (void)idx;
-        (void)output;
+        int ret;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        DckeArgs* args = (DckeArgs*)ssl->async.args;
+        typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+        (void)sizeof(args_test);
+    #else
+        DckeArgs  args[1];
+    #endif
 
         WOLFSSL_ENTER("DoClientKeyExchange");
 
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* use async pointer for output */
-        output = &ssl->async.output;
-
-        ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.asyncState);
         if (ret != WC_NOT_PENDING_E) {
-            WOLF_EVENT_TYPE eType = ssl->event.type;
-
-            /* Clear event */
-            XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
             /* Check for error */
-            if (ret < 0) {
+            if (ret < 0)
                 goto exit_dcke;
-            }
-            else  {
-                /* Restore variables needed for async */
-                idx = ssl->async.idx;
-                length = ssl->async.length;
-
-                /* Advance key share state if not wolfCrypt */
-                if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                    ssl->options.keyShareState++;
-                }
-            }
         }
         else
     #endif /* WOLFSSL_ASYNC_CRYPT */
         {
             /* Reset state */
             ret = 0;
-            ssl->options.keyShareState = KEYSHARE_BEGIN;
+            ssl->options.asyncState = TLS_ASYNC_BEGIN;
+            XMEMSET(args, 0, sizeof(DckeArgs));
+            args->idx = *inOutIdx;
+            args->begin = *inOutIdx;
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ssl->async.freeArgs = FreeDckeArgs;
+        #endif
         }
 
         /* Do Client Key Exchange State Machine */
-        switch(ssl->options.keyShareState)
+        switch(ssl->options.asyncState)
         {
-            case KEYSHARE_BEGIN:
+            case TLS_ASYNC_BEGIN:
             {
                 /* Sanity checks */
                 if (ssl->options.side != WOLFSSL_SERVER_END) {
@@ -20234,7 +21158,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                 if (ssl->options.verifyPeer && ssl->options.failNoCertxPSK) {
                     if (!ssl->options.havePeerCert &&
-                                                 !ssl->options.usingPSK_cipher){
+                                             !ssl->options.usingPSK_cipher) {
                         WOLFSSL_MSG("client didn't present peer cert");
                         return NO_PEER_CERT;
                     }
@@ -20254,7 +21178,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
-                        if (!ssl->buffers.key->buffer) {
+                        /* make sure private key exists */
+                        if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
                             ERROR_OUT(NO_PRIVATE_KEY, exit_dcke);
                         }
                         break;
@@ -20274,7 +21200,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifdef HAVE_NTRU
                     case ntru_kea:
                     {
-                        if (!ssl->buffers.key->buffer) {
+                        /* make sure private key exists */
+                        if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
                             ERROR_OUT(NO_PRIVATE_KEY, exit_dcke);
                         }
                         break;
@@ -20325,10 +21253,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_BUILD;
-            } /* KEYSHARE_BEGIN */
+                ssl->options.asyncState = TLS_ASYNC_BUILD;
+            } /* TLS_ASYNC_BEGIN */
 
-            case KEYSHARE_BUILD:
+            case TLS_ASYNC_BUILD:
             {
                 switch (ssl->specs.kea) {
                 #ifndef NO_RSA
@@ -20337,29 +21265,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word32 i = 0;
                         int    keySz;
 
-                        ssl->sigKey = XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                              DYNAMIC_TYPE_RSA);
-                        if (ssl->sigKey == NULL) {
-                            ERROR_OUT(MEMORY_E, exit_dcke);
-                        }
-                        ssl->sigType = DYNAMIC_TYPE_RSA;
-
-                        ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey, ssl->heap,
-                                                                   ssl->devId);
+                        ssl->hsType = DYNAMIC_TYPE_RSA;
+                        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                         if (ret != 0) {
                             goto exit_dcke;
                         }
 
                         ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer,
-                            &i, (RsaKey*)ssl->sigKey, ssl->buffers.key->length);
+                            &i, (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
                         if (ret != 0) {
                             goto exit_dcke;
                         }
-                        keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                        keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                         if (keySz < 0) { /* test if keySz has error */
                             ERROR_OUT(keySz, exit_dcke);
                         }
-                        length = (word32)keySz;
+                        args->length = (word32)keySz;
 
                         if (keySz < ssl->options.minRsaKeySz) {
                             WOLFSSL_MSG("Peer RSA key is too small");
@@ -20370,25 +21291,25 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->options.tls) {
                             word16 check;
 
-                            if ((idx - begin) + OPAQUE16_LEN > size) {
+                            if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                                 ERROR_OUT(BUFFER_ERROR, exit_dcke);
                             }
 
-                            ato16(input + idx, &check);
-                            idx += OPAQUE16_LEN;
+                            ato16(input + args->idx, &check);
+                            args->idx += OPAQUE16_LEN;
 
-                            if ((word32)check != length) {
+                            if ((word32)check != args->length) {
                                 WOLFSSL_MSG("RSA explicit size doesn't match");
                                 ERROR_OUT(RSA_PRIVATE_ERROR, exit_dcke);
                             }
                         }
 
-                        if ((idx - begin) + length > size) {
+                        if ((args->idx - args->begin) + args->length > size) {
                             WOLFSSL_MSG("RSA message too big");
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        *output = NULL;
+                        args->output = NULL;
                         break;
                     } /* rsa_kea */
                 #endif /* !NO_RSA */
@@ -20398,25 +21319,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         byte* pms = ssl->arrays->preMasterSecret;
                         word16 ci_sz;
 
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &ci_sz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &ci_sz);
+                        args->idx += OPAQUE16_LEN;
 
                         if (ci_sz > MAX_PSK_ID_LEN) {
                             ERROR_OUT(CLIENT_ID_ERROR, exit_dcke);
                         }
 
-                        if ((idx - begin) + ci_sz > size) {
+                        if ((args->idx - args->begin) + ci_sz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        XMEMCPY(ssl->arrays->client_identity, input + idx, ci_sz);
-                        idx += ci_sz;
+                        XMEMCPY(ssl->arrays->client_identity,
+                                                    input + args->idx, ci_sz);
+                        args->idx += ci_sz;
 
-                        ssl->arrays->client_identity[min(ci_sz, MAX_PSK_ID_LEN-1)] = 0;
+                        ssl->arrays->client_identity[
+                                        min(ci_sz, MAX_PSK_ID_LEN-1)] = 0;
                         ssl->arrays->psk_keySz = ssl->options.server_psk_cb(ssl,
                             ssl->arrays->client_identity, ssl->arrays->psk_key,
                             MAX_PSK_KEY_LEN);
@@ -20438,7 +21361,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         pms += OPAQUE16_LEN;
 
                         XMEMCPY(pms, ssl->arrays->psk_key, ssl->arrays->psk_keySz);
-                        ssl->arrays->preMasterSz = ssl->arrays->psk_keySz * 2 + 4;
+                        ssl->arrays->preMasterSz =
+                            (ssl->arrays->psk_keySz * 2) + (OPAQUE16_LEN * 2);
                         break;
                     }
                 #endif /* !NO_PSK */
@@ -20446,27 +21370,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case ntru_kea:
                     {
                         word16 cipherLen;
-                        word16 plainLen = sizeof(ssl->arrays->preMasterSecret);
+                        word16 plainLen = ENCRYPT_LEN;
 
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &cipherLen);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &cipherLen);
+                        args->idx += OPAQUE16_LEN;
 
                         if (cipherLen > MAX_NTRU_ENCRYPT_SZ) {
                             ERROR_OUT(NTRU_KEY_ERROR, exit_dcke);
                         }
 
-                        if ((idx - begin) + cipherLen > size) {
+                        if ((args->idx - args->begin) + cipherLen > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
                         if (NTRU_OK != ntru_crypto_ntru_decrypt(
                                     (word16) ssl->buffers.key->length,
                                     ssl->buffers.key->buffer, cipherLen,
-                                    input + idx, &plainLen,
+                                    input + args->idx, &plainLen,
                                     ssl->arrays->preMasterSecret)) {
                             ERROR_OUT(NTRU_DECRYPT_ERROR, exit_dcke);
                         }
@@ -20475,7 +21399,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             ERROR_OUT(NTRU_DECRYPT_ERROR, exit_dcke);
                         }
 
-                        idx += cipherLen;
+                        args->idx += cipherLen;
                         ssl->arrays->preMasterSz = plainLen;
                         break;
                     }
@@ -20489,14 +21413,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->specs.static_ecdh) {
                             word32 i = 0;
 
-                            ssl->sigKey = XMALLOC(sizeof(ecc_key), ssl->heap,
-                                                              DYNAMIC_TYPE_ECC);
-                            if (ssl->sigKey == NULL) {
-                                ERROR_OUT(MEMORY_E, exit_dcke);
-                            }
-                            ssl->sigType = DYNAMIC_TYPE_ECC;
-
-                            ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap, ssl->devId);
+                            ssl->hsType = DYNAMIC_TYPE_ECC;
+                            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
@@ -20504,10 +21422,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             ret = wc_EccPrivateKeyDecode(
                                 ssl->buffers.key->buffer,
                                 &i,
-                                (ecc_key*)ssl->sigKey,
+                                (ecc_key*)ssl->hsKey,
                                 ssl->buffers.key->length);
                             if (ret == 0) {
-                                private_key = (ecc_key*)ssl->sigKey;
+                                private_key = (ecc_key*)ssl->hsKey;
                                 if (wc_ecc_size(private_key) <
                                                 ssl->options.minEccKeySz) {
                                     WOLFSSL_MSG("ECC key too small");
@@ -20517,16 +21435,18 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         }
 
                         /* import peer ECC key */
-                        if ((idx - begin) + OPAQUE8_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE8_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        length = input[idx++];
+                        args->length = input[args->idx++];
 
-                        if ((idx - begin) + length > size) {
+                        if ((args->idx - args->begin) + args->length > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
+                        ssl->arrays->preMasterSz = ENCRYPT_LEN;
+
                     #ifdef HAVE_PK_CALLBACKS
                         /* if callback then use it for shared secret */
                         if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -20542,14 +21462,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                         if (ssl->peerEccKey == NULL) {
                             /* alloc/init on demand */
-                            ssl->peerEccKey = (ecc_key*)XMALLOC(
-                                sizeof(ecc_key), ssl->heap, DYNAMIC_TYPE_ECC);
-                            if (ssl->peerEccKey == NULL) {
-                                WOLFSSL_MSG("PeerEccKey Memory error");
-                                ERROR_OUT(MEMORY_E, exit_dcke);
-                            }
-                            ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
-                                                                ssl->devId);
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                (void**)&ssl->peerEccKey);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
@@ -20563,12 +21477,16 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             }
                         }
 
-                        if (wc_ecc_import_x963_ex(input + idx, length,
-                                ssl->peerEccKey, private_key->dp->id)) {
+                        if (wc_ecc_import_x963_ex(input + args->idx, args->length,
+                                        ssl->peerEccKey, private_key->dp->id)) {
                             ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                         }
 
                         ssl->peerEccKeyPresent = 1;
+
+                        if (ret != 0) {
+                            goto exit_dcke;
+                        }
                         break;
                     }
                 #endif /* HAVE_ECC */
@@ -20577,18 +21495,30 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         word16 clientPubSz;
 
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientPubSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientPubSz);
+                        args->idx += OPAQUE16_LEN;
 
-                        if ((idx - begin) + clientPubSz > size) {
+                        if ((args->idx - args->begin) + clientPubSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ssl->sigLen = clientPubSz;
+                        args->sigSz = clientPubSz;
+
+                        ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                        if (ret != 0) {
+                            goto exit_dcke;
+                        }
+
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length);
                         break;
                     }
                 #endif /* !NO_DH */
@@ -20598,38 +21528,52 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word16 clientSz;
 
                         /* Read in the PSK hint */
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientSz);
+                        args->idx += OPAQUE16_LEN;
                         if (clientSz > MAX_PSK_ID_LEN) {
                             ERROR_OUT(CLIENT_ID_ERROR, exit_dcke);
                         }
 
-                        if ((idx - begin) + clientSz > size) {
+                        if ((args->idx - args->begin) + clientSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        XMEMCPY(ssl->arrays->client_identity, input + idx, clientSz);
-                        idx += clientSz;
+                        XMEMCPY(ssl->arrays->client_identity, input + args->idx,
+                                                                    clientSz);
+                        args->idx += clientSz;
                         ssl->arrays->client_identity[
                             min(clientSz, MAX_PSK_ID_LEN-1)] = 0;
 
                         /* Read in the DHE business */
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientSz);
+                        args->idx += OPAQUE16_LEN;
 
-                        if ((idx - begin) + clientSz > size) {
+                        if ((args->idx - args->begin) + clientSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ssl->sigLen = clientSz;
+                        args->sigSz = clientSz;
+
+                        ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                        if (ret != 0) {
+                            goto exit_dcke;
+                        }
+
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length);
+
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
@@ -20639,36 +21583,38 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word16 clientSz;
 
                         /* Read in the PSK hint */
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientSz);
+                        args->idx += OPAQUE16_LEN;
                         if (clientSz > MAX_PSK_ID_LEN) {
                             ERROR_OUT(CLIENT_ID_ERROR, exit_dcke);
                         }
-                        if ((idx - begin) + clientSz > size) {
+                        if ((args->idx - args->begin) + clientSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
                         XMEMCPY(ssl->arrays->client_identity,
-                                                       input + idx, clientSz);
-                        idx += clientSz;
+                                                   input + args->idx, clientSz);
+                        args->idx += clientSz;
                         ssl->arrays->client_identity[
                             min(clientSz, MAX_PSK_ID_LEN-1)] = 0;
 
                         /* import peer ECC key */
-                        if ((idx - begin) + OPAQUE8_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE8_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        length = input[idx++];
+                        args->length = input[args->idx++];
 
-                        if ((idx - begin) + length > size) {
+                        if ((args->idx - args->begin) + args->length > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
+                        args->sigSz = ENCRYPT_LEN - OPAQUE16_LEN;
+
                     #ifdef HAVE_PK_CALLBACKS
                         /* if callback then use it for shared secret */
                         if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -20683,14 +21629,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                         if (ssl->peerEccKey == NULL) {
                             /* alloc/init on demand */
-                            ssl->peerEccKey = (ecc_key*)XMALLOC(
-                                sizeof(ecc_key), ssl->heap, DYNAMIC_TYPE_ECC);
-                            if (ssl->peerEccKey == NULL) {
-                                WOLFSSL_MSG("PeerEccKey Memory error");
-                                ERROR_OUT(MEMORY_E, exit_dcke);
-                            }
-                            ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
-                                                                ssl->devId);
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                (void**)&ssl->peerEccKey);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
@@ -20704,9 +21644,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 goto exit_dcke;
                             }
                         }
-
-                        if (wc_ecc_import_x963_ex(input + idx, length,
-                                ssl->peerEccKey, ssl->eccTempKey->dp->id)) {
+                        if (wc_ecc_import_x963_ex(input + args->idx, args->length,
+                                 ssl->peerEccKey, ssl->eccTempKey->dp->id)) {
                             ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                         }
 
@@ -20724,21 +21663,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_DO;
-            } /* KEYSHARE_BUILD */
+                ssl->options.asyncState = TLS_ASYNC_DO;
+            } /* TLS_ASYNC_BUILD */
 
-            case KEYSHARE_DO:
+            case TLS_ASYNC_DO:
             {
                 switch (ssl->specs.kea) {
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
+                        RsaKey* key = (RsaKey*)ssl->hsKey;
                         ret = RsaDec(ssl,
-                            input + idx,
-                            length,
-                            output,
-                            &ssl->sigLen,
-                            (RsaKey*)ssl->sigKey,
+                            input + args->idx,
+                            args->length,
+                            &args->output,
+                            &args->sigSz,
+                            key,
                         #if defined(HAVE_PK_CALLBACKS)
                             ssl->buffers.key->buffer,
                             ssl->buffers.key->length,
@@ -20767,15 +21707,13 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         ecc_key* private_key = ssl->eccTempKey;
                         if (ssl->specs.static_ecdh) {
-                            private_key = (ecc_key*)ssl->sigKey;
+                            private_key = (ecc_key*)ssl->hsKey;
                         }
 
-                        ssl->arrays->preMasterSz = ENCRYPT_LEN;
-
                         /* Generate shared secret */
                         ret = EccSharedSecret(ssl,
                             private_key, ssl->peerEccKey,
-                            input + idx, &length,
+                            input + args->idx, &args->length,
                             ssl->arrays->preMasterSecret,
                             &ssl->arrays->preMasterSz,
                             WOLFSSL_SERVER_END,
@@ -20791,19 +21729,11 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
-                        word16 clientPubSz = (word16)ssl->sigLen;
-
-                        ret = DhAgree(ssl,
-                            ssl->buffers.serverDH_P.buffer,
-                            ssl->buffers.serverDH_P.length,
-                            ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length,
+                        ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
-                            &ssl->buffers.serverDH_Priv.length,
-                            NULL,
-                            0,
-                            input + idx,
-                            clientPubSz,
+                            ssl->buffers.serverDH_Priv.length,
+                            input + args->idx,
+                            (word16)args->sigSz,
                             ssl->arrays->preMasterSecret,
                             &ssl->arrays->preMasterSz);
                         break;
@@ -20812,21 +21742,12 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #if !defined(NO_DH) && !defined(NO_PSK)
                     case dhe_psk_kea:
                     {
-                        byte* pms = ssl->arrays->preMasterSecret;
-                        word16 clientSz = ssl->sigLen;
-
-                        ret = DhAgree(ssl,
-                            ssl->buffers.serverDH_P.buffer,
-                            ssl->buffers.serverDH_P.length,
-                            ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length,
+                        ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
-                            &ssl->buffers.serverDH_Priv.length,
-                            NULL,
-                            0,
-                            input + idx,
-                            clientSz,
-                            pms + OPAQUE16_LEN,
+                            ssl->buffers.serverDH_Priv.length,
+                            input + args->idx,
+                            (word16)args->sigSz,
+                            ssl->arrays->preMasterSecret + OPAQUE16_LEN,
                             &ssl->arrays->preMasterSz);
                         break;
                     }
@@ -20834,14 +21755,12 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #if defined(HAVE_ECC) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
-                        ssl->sigLen = ENCRYPT_LEN - OPAQUE16_LEN;
-
                         /* Generate shared secret */
                         ret = EccSharedSecret(ssl,
                             ssl->eccTempKey, ssl->peerEccKey,
-                            input + idx, &length,
+                            input + args->idx, &args->length,
                             ssl->arrays->preMasterSecret + OPAQUE16_LEN,
-                            &ssl->sigLen,
+                            &args->sigSz,
                             WOLFSSL_SERVER_END,
                         #ifdef HAVE_PK_CALLBACKS
                             ssl->EccSharedSecretCtx
@@ -20862,20 +21781,20 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_VERIFY;
-            } /* KEYSHARE_DO */
+                ssl->options.asyncState = TLS_ASYNC_VERIFY;
+            } /* TLS_ASYNC_DO */
 
-            case KEYSHARE_VERIFY:
+            case TLS_ASYNC_VERIFY:
             {
                 switch (ssl->specs.kea) {
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
                         /* Add the signature length to idx */
-                        idx += length;
+                        args->idx += args->length;
 
-                        if (ssl->sigLen == SECRET_LEN && *output != NULL) {
-                            XMEMCPY(ssl->arrays->preMasterSecret, *output, SECRET_LEN);
+                        if (args->sigSz == SECRET_LEN && args->output != NULL) {
+                            XMEMCPY(ssl->arrays->preMasterSecret, args->output, SECRET_LEN);
                             if (ssl->arrays->preMasterSecret[0] != ssl->chVersion.major ||
                                 ssl->arrays->preMasterSecret[1] != ssl->chVersion.minor) {
                                 ERROR_OUT(PMS_VERSION_ERROR, exit_dcke);
@@ -20903,15 +21822,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case ecc_diffie_hellman_kea:
                     {
                         /* skip past the imported peer key */
-                        idx += length;
+                        args->idx += args->length;
                         break;
                     }
                 #endif /* HAVE_ECC */
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
-                        word16 clientPubSz = (word16)ssl->sigLen;
-                        idx += clientPubSz;
+                        args->idx += (word16)args->sigSz;
                         break;
                     }
                 #endif /* !NO_DH */
@@ -20919,9 +21837,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case dhe_psk_kea:
                     {
                         byte* pms = ssl->arrays->preMasterSecret;
-                        word16 clientSz = ssl->sigLen;
+                        word16 clientSz = (word16)args->sigSz;
 
-                        idx += clientSz;
+                        args->idx += clientSz;
                         c16toa((word16)ssl->arrays->preMasterSz, pms);
                         ssl->arrays->preMasterSz += OPAQUE16_LEN;
                         pms += ssl->arrays->preMasterSz;
@@ -20940,8 +21858,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         c16toa((word16) ssl->arrays->psk_keySz, pms);
                         pms += OPAQUE16_LEN;
 
-                        XMEMCPY(pms, ssl->arrays->psk_key, ssl->arrays->psk_keySz);
-                        ssl->arrays->preMasterSz += ssl->arrays->psk_keySz + OPAQUE16_LEN;
+                        XMEMCPY(pms, ssl->arrays->psk_key,
+                                                    ssl->arrays->psk_keySz);
+                        ssl->arrays->preMasterSz += ssl->arrays->psk_keySz +
+                                                                OPAQUE16_LEN;
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
@@ -20949,13 +21869,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case ecdhe_psk_kea:
                     {
                         byte* pms = ssl->arrays->preMasterSecret;
+                        word16 clientSz = (word16)args->sigSz;
 
                         /* skip past the imported peer key */
-                        idx += length;
+                        args->idx += args->length;
 
                         /* Add preMasterSecret */
-                        c16toa((word16)ssl->sigLen, pms);
-                        ssl->arrays->preMasterSz += OPAQUE16_LEN + ssl->sigLen;
+                        c16toa(clientSz, pms);
+                        ssl->arrays->preMasterSz += OPAQUE16_LEN + clientSz;
                         pms += ssl->arrays->preMasterSz;
 
                         /* Use the PSK hint to look up the PSK and add it to the
@@ -20988,29 +21909,29 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_FINALIZE;
-            } /* KEYSHARE_VERIFY */
+                ssl->options.asyncState = TLS_ASYNC_FINALIZE;
+            } /* TLS_ASYNC_VERIFY */
 
-            case KEYSHARE_FINALIZE:
+            case TLS_ASYNC_FINALIZE:
             {
             #ifdef HAVE_QSH
                 word16 name;
 
                 if (ssl->options.haveQSH) {
                     /* extension name */
-                    ato16(input + idx, &name);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &name);
+                    args->idx += OPAQUE16_LEN;
 
                     if (name == TLSX_QUANTUM_SAFE_HYBRID) {
                         int    qshSz;
                         /* if qshSz is larger than 0 it is the
                            length of buffer used */
                         if ((qshSz = TLSX_QSHCipher_Parse(ssl,
-                                input + idx,
-                                size - idx + begin, 1)) < 0) {
+                                input + args->idx,
+                                size - args->idx + args->begin, 1)) < 0) {
                             ERROR_OUT(qshSz, exit_dcke);
                         }
-                        idx += qshSz;
+                        args->idx += qshSz;
                     }
                     else {
                         /* unknown extension sent client ignored handshake */
@@ -21026,13 +21947,13 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
 
                 /* Advance state and proceed */
-                ssl->options.keyShareState = KEYSHARE_END;
-            } /* KEYSHARE_FINALIZE */
+                ssl->options.asyncState = TLS_ASYNC_END;
+            } /* TLS_ASYNC_FINALIZE */
 
-            case KEYSHARE_END:
+            case TLS_ASYNC_END:
             {
                 /* Set final index */
-                *inOutIdx = idx;
+                *inOutIdx = args->idx;
 
                 ssl->options.clientState = CLIENT_KEYEXCHANGE_COMPLETE;
             #ifndef NO_CERTS
@@ -21041,36 +21962,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 }
             #endif
                 break;
-            } /* KEYSHARE_END */
+            } /* TLS_ASYNC_END */
             default:
                 ret = INPUT_CASE_ERROR;
-        } /* switch(ssl->options.keyShareState) */
+        } /* switch(ssl->options.asyncState) */
 
     exit_dcke:
 
         WOLFSSL_LEAVE("DoClientKeyExchange", ret);
 
-        /* Handle cleanup for stack variables here */
-
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* Handle WC_PENDING_E */
+        /* Handle async operation */
         if (ret == WC_PENDING_E) {
-            /* Store variables needed for async */
-            output_lcl = ssl->async.output;
-            XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-            ssl->async.idx = idx;
-            ssl->async.length = length;
-            ssl->async.output = output_lcl;
-
             /* Mark message as not recevied so it can process again */
             ssl->msgsReceived.got_client_key_exchange = 0;
 
-            /* Push event to queue */
-            ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-            if (ret == 0) {
-                return WC_PENDING_E;
-            }
+            return ret;
         }
     #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -21079,6 +21986,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         ssl->arrays->preMasterSz = 0;
 
         /* Final cleanup */
+        FreeDckeArgs(ssl, args);
         FreeKeyExchange(ssl);
 
         return ret;
@@ -21104,6 +22012,82 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 #endif /* HAVE_STUNNEL */
 #endif /* NO_WOLFSSL_SERVER */
 
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+int wolfSSL_AsyncPop(WOLFSSL* ssl, byte* state)
+{
+    int ret = 0;
+    WC_ASYNC_DEV* asyncDev;
+    WOLF_EVENT* event;
+
+    if (ssl == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* check for pending async */
+    asyncDev = ssl->async.dev;
+    if (asyncDev) {
+        /* grab event pointer */
+        event = &asyncDev->event;
+
+        ret = wolfAsync_EventPop(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL);
+        if (ret != WC_NOT_PENDING_E && ret != WC_PENDING_E) {
+
+            /* advance key share state if doesn't need called again */
+            if (state && (asyncDev->event.flags & WC_ASYNC_FLAG_CALL_AGAIN) == 0) {
+                (*state)++;
+            }
+
+            /* clear event */
+            XMEMSET(&asyncDev->event, 0, sizeof(WOLF_EVENT));
+
+            /* clear async dev */
+            ssl->async.dev = NULL;
+        }
+    }
+    else {
+        ret = WC_NOT_PENDING_E;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_AsyncPop", ret);
+
+    return ret;
+}
+
+int wolfSSL_AsyncPush(WOLFSSL* ssl, WC_ASYNC_DEV* asyncDev, word32 flags)
+{
+    int ret;
+    WOLF_EVENT* event;
+
+    if (ssl == NULL || asyncDev == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* grab event pointer */
+    event = &asyncDev->event;
+
+    /* init event */
+    ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, ssl, flags);
+    if (ret == 0) {
+        ssl->async.dev = asyncDev;
+
+        /* place event into queue */
+        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, event);
+    }
+
+    /* success means return WC_PENDING_E */
+    if (ret == 0) {
+        ret = WC_PENDING_E;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_AsyncPush", ret);
+
+    return ret;
+}
+
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+
 #undef ERROR_OUT
 
 #endif /* WOLFCRYPT_ONLY */
diff --git a/src/keys.c b/src/keys.c
index 240460524..2417b0f23 100644
--- a/src/keys.c
+++ b/src/keys.c
@@ -2070,22 +2070,20 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
             dec->arc4 = (Arc4*)XMALLOC(sizeof(Arc4), heap, DYNAMIC_TYPE_CIPHER);
         if (dec && dec->arc4 == NULL)
             return MEMORY_E;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (devId != INVALID_DEVID) {
-            if (enc) {
-                if (wc_Arc4AsyncInit(enc->arc4, devId) != 0) {
-                    WOLFSSL_MSG("Arc4AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
-            }
-            if (dec) {
-                if (wc_Arc4AsyncInit(dec->arc4, devId) != 0) {
-                    WOLFSSL_MSG("Arc4AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
+
+        if (enc) {
+            if (wc_Arc4Init(enc->arc4, heap, devId) != 0) {
+                WOLFSSL_MSG("Arc4Init failed in SetKeys");
+                return ASYNC_INIT_E;
             }
         }
-#endif
+        if (dec) {
+            if (wc_Arc4Init(dec->arc4, heap, devId) != 0) {
+                WOLFSSL_MSG("Arc4Init failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+
         if (side == WOLFSSL_CLIENT_END) {
             if (enc)
                 wc_Arc4SetKey(enc->arc4, keys->client_write_key, sz);
@@ -2103,7 +2101,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_ARC4 */
 
 
 #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
@@ -2165,7 +2163,8 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_CHACHA && HAVE_POLY1305 */
+
 
 #ifdef HAVE_HC128
     /* check that buffer sizes are sufficient */
@@ -2214,7 +2213,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_HC128 */
 
 #ifdef BUILD_RABBIT
     /* check that buffer sizes are sufficient */
@@ -2263,7 +2262,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_RABBIT */
 
 #ifdef BUILD_DES3
     /* check that buffer sizes are sufficient */
@@ -2274,30 +2273,34 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_triple_des) {
         int desRet = 0;
 
-        if (enc && enc->des3 == NULL)
-            enc->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->des3 == NULL)
-            return MEMORY_E;
-        if (dec && dec->des3 == NULL)
-            dec->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->des3 == NULL)
-            return MEMORY_E;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (devId != INVALID_DEVID) {
-            if (enc) {
-                if (wc_Des3AsyncInit(enc->des3, devId) != 0) {
-                    WOLFSSL_MSG("Des3AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
-            }
-            if (dec) {
-                if (wc_Des3AsyncInit(dec->des3, devId) != 0) {
-                    WOLFSSL_MSG("Des3AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
+        if (enc) {
+            if (enc->des3 == NULL)
+                enc->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->des3 == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->des3, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->des3 == NULL)
+                dec->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->des3 == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->des3, 0, sizeof(Des3));
+        }
+
+        if (enc) {
+            if (wc_Des3Init(enc->des3, heap, devId) != 0) {
+                WOLFSSL_MSG("Des3Init failed in SetKeys");
+                return ASYNC_INIT_E;
             }
         }
-#endif
+        if (dec) {
+            if (wc_Des3Init(dec->des3, heap, devId) != 0) {
+                WOLFSSL_MSG("Des3Init failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
                 desRet = wc_Des3_SetKey(enc->des3, keys->client_write_key,
@@ -2327,7 +2330,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_DES3 */
 
 #ifdef BUILD_AES
     /* check that buffer sizes are sufficient */
@@ -2338,30 +2341,33 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_aes) {
         int aesRet = 0;
 
-        if (enc && enc->aes == NULL)
-            enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->aes == NULL)
-            return MEMORY_E;
-        if (dec && dec->aes == NULL)
-            dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->aes == NULL)
-            return MEMORY_E;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (devId != INVALID_DEVID) {
-            if (enc) {
-                if (wc_AesAsyncInit(enc->aes, devId) != 0) {
-                    WOLFSSL_MSG("AesAsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
-            }
-            if (dec) {
-                if (wc_AesAsyncInit(dec->aes, devId) != 0) {
-                    WOLFSSL_MSG("AesAsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
+        if (enc) {
+            if (enc->aes == NULL)
+                enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->aes, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->aes == NULL)
+                dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->aes, 0, sizeof(Aes));
+        }
+        if (enc) {
+            if (wc_AesInit(enc->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
             }
         }
-#endif
+        if (dec) {
+            if (wc_AesInit(dec->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
                 aesRet = wc_AesSetKey(enc->aes, keys->client_write_key,
@@ -2395,7 +2401,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_AES */
 
 #ifdef BUILD_AESGCM
     /* check that buffer sizes are sufficient */
@@ -2412,14 +2418,33 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_aes_gcm) {
         int gcmRet;
 
-        if (enc && enc->aes == NULL)
-            enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->aes == NULL)
-            return MEMORY_E;
-        if (dec && dec->aes == NULL)
-            dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->aes == NULL)
-            return MEMORY_E;
+        if (enc) {
+            if (enc->aes == NULL)
+                enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->aes, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->aes == NULL)
+                dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->aes, 0, sizeof(Aes));
+        }
+
+        if (enc) {
+            if (wc_AesInit(enc->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+        if (dec) {
+            if (wc_AesInit(dec->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
 
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
@@ -2458,7 +2483,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_AESGCM */
 
 #ifdef HAVE_AESCCM
     /* check that buffer sizes are sufficient (CCM is same size as GCM) */
@@ -2475,14 +2500,33 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_aes_ccm) {
         int CcmRet;
 
-        if (enc && enc->aes == NULL)
-            enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->aes == NULL)
-            return MEMORY_E;
-        if (dec && dec->aes == NULL)
-            dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->aes == NULL)
-            return MEMORY_E;
+        if (enc) {
+            if (enc->aes == NULL)
+                enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->aes, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->aes == NULL)
+                dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->aes, 0, sizeof(Aes));
+        }
+
+        if (enc) {
+            if (wc_AesInit(enc->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+        if (dec) {
+            if (wc_AesInit(dec->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
 
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
@@ -2529,7 +2573,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_AESCCM */
 
 #ifdef HAVE_CAMELLIA
     /* check that buffer sizes are sufficient */
@@ -2581,7 +2625,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_CAMELLIA */
 
 #ifdef HAVE_IDEA
     /* check that buffer sizes are sufficient */
@@ -2635,7 +2679,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_IDEA */
 
 #ifdef HAVE_NULL_CIPHER
     if (specs->bulk_cipher_algorithm == wolfssl_cipher_null) {
diff --git a/src/ssl.c b/src/ssl.c
old mode 100644
new mode 100755
index 98fbe3a38..460bb26d4
--- a/src/ssl.c
+++ b/src/ssl.c
@@ -333,7 +333,7 @@ int wolfSSL_CTX_new_rng(WOLFSSL_CTX* ctx)
     }
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(rng, ctx->heap);
+    ret = wc_InitRng_ex(rng, ctx->heap, ctx->devId);
 #else
     ret = wc_InitRng(rng);
 #endif
@@ -361,7 +361,7 @@ WOLFSSL* wolfSSL_new(WOLFSSL_CTX* ctx)
 
     ssl = (WOLFSSL*) XMALLOC(sizeof(WOLFSSL), ctx->heap, DYNAMIC_TYPE_SSL);
     if (ssl)
-        if ( (ret = InitSSL(ssl, ctx)) < 0) {
+        if ( (ret = InitSSL(ssl, ctx, 0)) < 0) {
             FreeSSL(ssl, ctx->heap);
             ssl = 0;
         }
@@ -379,6 +379,167 @@ void wolfSSL_free(WOLFSSL* ssl)
     WOLFSSL_LEAVE("SSL_free", 0);
 }
 
+
+#ifdef HAVE_WRITE_DUP
+
+/*
+ * Release resources around WriteDup object
+ *
+ * ssl WOLFSSL object
+ *
+ * no return, destruction so make best attempt
+*/
+void FreeWriteDup(WOLFSSL* ssl)
+{
+    int doFree = 0;
+
+    WOLFSSL_ENTER("FreeWriteDup");
+
+    if (ssl->dupWrite) {
+        if (wc_LockMutex(&ssl->dupWrite->dupMutex) == 0) {
+            ssl->dupWrite->dupCount--;
+            if (ssl->dupWrite->dupCount == 0) {
+                doFree = 1;
+            } else {
+                WOLFSSL_MSG("WriteDup count not zero, no full free");
+            }
+            wc_UnLockMutex(&ssl->dupWrite->dupMutex);
+        }
+    }
+
+    if (doFree) {
+        WOLFSSL_MSG("Doing WriteDup full free, count to zero");
+        wc_FreeMutex(&ssl->dupWrite->dupMutex);
+        XFREE(ssl->dupWrite, ssl->heap, DYNAMIC_TYPE_WRITEDUP);
+    }
+}
+
+
+/*
+ * duplicate existing ssl members into dup needed for writing
+ *
+ * dup write only WOLFSSL
+ * ssl exisiting WOLFSSL
+ *
+ * 0 on success
+*/
+static int DupSSL(WOLFSSL* dup, WOLFSSL* ssl)
+{
+    /* shared dupWrite setup */
+    ssl->dupWrite = (WriteDup*)XMALLOC(sizeof(WriteDup), ssl->heap,
+                                       DYNAMIC_TYPE_WRITEDUP);
+    if (ssl->dupWrite == NULL) {
+        return MEMORY_E;
+    }
+    XMEMSET(ssl->dupWrite, 0, sizeof(WriteDup));
+
+    if (wc_InitMutex(&ssl->dupWrite->dupMutex) != 0) {
+        XFREE(ssl->dupWrite, ssl->heap, DYNAMIC_TYPE_WRITEDUP);
+        ssl->dupWrite = NULL;
+        return BAD_MUTEX_E;
+    }
+    ssl->dupWrite->dupCount = 2;    /* both sides have a count to start */
+    dup->dupWrite = ssl->dupWrite ; /* each side uses */
+
+    /* copy write parts over to dup writer */
+    XMEMCPY(&dup->specs,   &ssl->specs,   sizeof(CipherSpecs));
+    XMEMCPY(&dup->options, &ssl->options, sizeof(Options));
+    XMEMCPY(&dup->keys,    &ssl->keys,    sizeof(Keys));
+    XMEMCPY(&dup->encrypt, &ssl->encrypt, sizeof(Ciphers));
+    /* dup side now owns encrypt/write ciphers */
+    XMEMSET(&ssl->encrypt, 0, sizeof(Ciphers));
+
+    dup->IOCB_WriteCtx = ssl->IOCB_WriteCtx;
+    dup->wfd    = ssl->wfd;
+    dup->wflags = ssl->wflags;
+    dup->hmac   = ssl->hmac;
+#ifdef HAVE_TRUNCATED_HMAC
+    dup->truncated_hmac = ssl->truncated_hmac;
+#endif
+
+    /* unique side dup setup */
+    dup->dupSide = WRITE_DUP_SIDE;
+    ssl->dupSide = READ_DUP_SIDE;
+
+    return 0;
+}
+
+
+/*
+ * duplicate a WOLFSSL object post handshake for writing only
+ * turn exisitng object into read only.  Allows concurrent access from two
+ * different threads.
+ *
+ * ssl exisiting WOLFSSL object
+ *
+ * return dup'd WOLFSSL object on success
+*/
+WOLFSSL* wolfSSL_write_dup(WOLFSSL* ssl)
+{
+    WOLFSSL* dup = NULL;
+    int ret = 0;
+
+    (void)ret;
+    WOLFSSL_ENTER("wolfSSL_write_dup");
+
+    if (ssl == NULL) {
+        return ssl;
+    }
+
+    if (ssl->options.handShakeDone == 0) {
+        WOLFSSL_MSG("wolfSSL_write_dup called before handshake complete");
+        return NULL;
+    }
+
+    if (ssl->dupWrite) {
+        WOLFSSL_MSG("wolfSSL_write_dup already called once");
+        return NULL;
+    }
+
+    dup = (WOLFSSL*) XMALLOC(sizeof(WOLFSSL), ssl->ctx->heap, DYNAMIC_TYPE_SSL);
+    if (dup) {
+        if ( (ret = InitSSL(dup, ssl->ctx, 1)) < 0) {
+            FreeSSL(dup, ssl->ctx->heap);
+            dup = NULL;
+        } else if ( (ret = DupSSL(dup, ssl) < 0)) {
+            FreeSSL(dup, ssl->ctx->heap);
+            dup = NULL;
+        }
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_write_dup", ret);
+
+    return dup;
+}
+
+
+/*
+ * Notify write dup side of fatal error or close notify
+ *
+ * ssl WOLFSSL object
+ * err Notify err
+ *
+ * 0 on success
+*/
+int NotifyWriteSide(WOLFSSL* ssl, int err)
+{
+    int ret;
+
+    WOLFSSL_ENTER("NotifyWriteSide");
+
+    ret = wc_LockMutex(&ssl->dupWrite->dupMutex);
+    if (ret == 0) {
+        ssl->dupWrite->dupErr = err;
+        ret = wc_UnLockMutex(&ssl->dupWrite->dupMutex);
+    }
+
+    return ret;
+}
+
+
+#endif /* HAVE_WRITE_DUP */
+
+
 #ifdef HAVE_POLY1305
 /* set if to use old poly 1 for yes 0 to use new poly */
 int wolfSSL_use_old_poly(WOLFSSL* ssl, int value)
@@ -691,38 +852,38 @@ int wolfSSL_GetObjectSize(void)
     printf("sizeof suites           = %lu\n", sizeof(Suites));
     printf("sizeof ciphers(2)       = %lu\n", sizeof(Ciphers));
 #ifndef NO_RC4
-    printf("    sizeof arc4         = %lu\n", sizeof(Arc4));
+    printf("\tsizeof arc4         = %lu\n", sizeof(Arc4));
 #endif
-    printf("    sizeof aes          = %lu\n", sizeof(Aes));
+    printf("\tsizeof aes          = %lu\n", sizeof(Aes));
 #ifndef NO_DES3
-    printf("    sizeof des3         = %lu\n", sizeof(Des3));
+    printf("\tsizeof des3         = %lu\n", sizeof(Des3));
 #endif
 #ifndef NO_RABBIT
-    printf("    sizeof rabbit       = %lu\n", sizeof(Rabbit));
+    printf("\tsizeof rabbit       = %lu\n", sizeof(Rabbit));
 #endif
 #ifdef HAVE_CHACHA
-    printf("    sizeof chacha       = %lu\n", sizeof(ChaCha));
+    printf("\tsizeof chacha       = %lu\n", sizeof(ChaCha));
 #endif
     printf("sizeof cipher specs     = %lu\n", sizeof(CipherSpecs));
     printf("sizeof keys             = %lu\n", sizeof(Keys));
     printf("sizeof Hashes(2)        = %lu\n", sizeof(Hashes));
 #ifndef NO_MD5
-    printf("    sizeof MD5          = %lu\n", sizeof(Md5));
+    printf("\tsizeof MD5          = %lu\n", sizeof(Md5));
 #endif
 #ifndef NO_SHA
-    printf("    sizeof SHA          = %lu\n", sizeof(Sha));
+    printf("\tsizeof SHA          = %lu\n", sizeof(Sha));
 #endif
 #ifdef WOLFSSL_SHA224
     printf("    sizeof SHA224       = %lu\n", sizeof(Sha224));
 #endif
 #ifndef NO_SHA256
-    printf("    sizeof SHA256       = %lu\n", sizeof(Sha256));
+    printf("\tsizeof SHA256       = %lu\n", sizeof(Sha256));
 #endif
 #ifdef WOLFSSL_SHA384
-    printf("    sizeof SHA384       = %lu\n", sizeof(Sha384));
+    printf("\tsizeof SHA384       = %lu\n", sizeof(Sha384));
 #endif
 #ifdef WOLFSSL_SHA384
-    printf("    sizeof SHA512       = %lu\n", sizeof(Sha512));
+    printf("\tsizeof SHA512       = %lu\n", sizeof(Sha512));
 #endif
     printf("sizeof Buffers          = %lu\n", sizeof(Buffers));
     printf("sizeof Options          = %lu\n", sizeof(Options));
@@ -908,7 +1069,7 @@ int wolfSSL_GetOutputSize(WOLFSSL* ssl, int inSz)
     if (inSz > maxSize)
         return INPUT_SIZE_E;
 
-    return BuildMessage(ssl, NULL, 0, NULL, inSz, application_data, 0, 1);
+    return BuildMessage(ssl, NULL, 0, NULL, inSz, application_data, 0, 1, 0);
 }
 
 
@@ -983,24 +1144,24 @@ int wolfSSL_SetTmpDH(WOLFSSL* ssl, const unsigned char* p, int pSz,
         return SIDE_ERROR;
 
     if (ssl->buffers.serverDH_P.buffer && ssl->buffers.weOwnDH) {
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_P.buffer = NULL;
     }
     if (ssl->buffers.serverDH_G.buffer && ssl->buffers.weOwnDH) {
-        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_G.buffer = NULL;
     }
 
     ssl->buffers.weOwnDH = 1;  /* SSL owns now */
     ssl->buffers.serverDH_P.buffer = (byte*)XMALLOC(pSz, ssl->heap,
-                                                    DYNAMIC_TYPE_DH);
+                                                    DYNAMIC_TYPE_DH_BUFFER);
     if (ssl->buffers.serverDH_P.buffer == NULL)
         return MEMORY_E;
 
     ssl->buffers.serverDH_G.buffer = (byte*)XMALLOC(gSz, ssl->heap,
-                                                    DYNAMIC_TYPE_DH);
+                                                    DYNAMIC_TYPE_DH_BUFFER);
     if (ssl->buffers.serverDH_G.buffer == NULL) {
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_P.buffer = NULL;
         return MEMORY_E;
     }
@@ -1037,16 +1198,16 @@ int wolfSSL_CTX_SetTmpDH(WOLFSSL_CTX* ctx, const unsigned char* p, int pSz,
     if (pSz < ctx->minDhKeySz)
         return DH_KEY_SIZE_E;
 
-    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH);
-    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH);
+    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
 
-    ctx->serverDH_P.buffer = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH);
+    ctx->serverDH_P.buffer = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (ctx->serverDH_P.buffer == NULL)
        return MEMORY_E;
 
-    ctx->serverDH_G.buffer = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH);
+    ctx->serverDH_G.buffer = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (ctx->serverDH_G.buffer == NULL) {
-        XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH);
+        XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
         return MEMORY_E;
     }
 
@@ -1103,6 +1264,36 @@ int wolfSSL_write(WOLFSSL* ssl, const void* data, int sz)
     if (ssl == NULL || data == NULL || sz < 0)
         return BAD_FUNC_ARG;
 
+#ifdef HAVE_WRITE_DUP
+    { /* local variable scope */
+        int dupErr = 0;   /* local copy */
+
+        ret = 0;
+
+        if (ssl->dupWrite && ssl->dupSide == READ_DUP_SIDE) {
+            WOLFSSL_MSG("Read dup side cannot write");
+            return WRITE_DUP_WRITE_E;
+        }
+        if (ssl->dupWrite) {
+            if (wc_LockMutex(&ssl->dupWrite->dupMutex) != 0) {
+                return BAD_MUTEX_E;
+            }
+            dupErr = ssl->dupWrite->dupErr;
+            ret = wc_UnLockMutex(&ssl->dupWrite->dupMutex);
+        }
+
+        if (ret != 0) {
+            ssl->error = ret;  /* high priority fatal error */
+            return SSL_FATAL_ERROR;
+        }
+        if (dupErr != 0) {
+            WOLFSSL_MSG("Write dup error from other side");
+            ssl->error = dupErr;
+            return SSL_FATAL_ERROR;
+        }
+    }
+#endif
+
 #ifdef HAVE_ERRNO_H
     errno = 0;
 #endif
@@ -1127,6 +1318,13 @@ static int wolfSSL_read_internal(WOLFSSL* ssl, void* data, int sz, int peek)
     if (ssl == NULL || data == NULL || sz < 0)
         return BAD_FUNC_ARG;
 
+#ifdef HAVE_WRITE_DUP
+    if (ssl->dupWrite && ssl->dupSide == WRITE_DUP_SIDE) {
+        WOLFSSL_MSG("Write dup side cannot read");
+        return WRITE_DUP_READ_E;
+    }
+#endif
+
 #ifdef HAVE_ERRNO_H
         errno = 0;
 #endif
@@ -1147,6 +1345,21 @@ static int wolfSSL_read_internal(WOLFSSL* ssl, void* data, int sz, int peek)
 #endif
     ret = ReceiveData(ssl, (byte*)data, sz, peek);
 
+#ifdef HAVE_WRITE_DUP
+    if (ssl->dupWrite) {
+        if (ssl->error != 0 && ssl->error != WANT_READ &&
+                               ssl->error != WC_PENDING_E) {
+            int notifyErr;
+
+            WOLFSSL_MSG("Notifying write side of fatal read error");
+            notifyErr  = NotifyWriteSide(ssl, ssl->error);
+            if (notifyErr < 0) {
+                ret = ssl->error = notifyErr;
+            }
+        }
+    }
+#endif
+
     WOLFSSL_LEAVE("wolfSSL_read_internal()", ret);
 
     if (ret < 0)
@@ -1321,7 +1534,7 @@ int wolfSSL_UseOCSPStapling(WOLFSSL* ssl, byte status_type, byte options)
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequest(&ssl->extensions, status_type,
-                                                            options, ssl->heap);
+                                                options, ssl->heap, ssl->devId);
 }
 
 
@@ -1332,7 +1545,7 @@ int wolfSSL_CTX_UseOCSPStapling(WOLFSSL_CTX* ctx, byte status_type,
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequest(&ctx->extensions, status_type,
-                                                            options, ctx->heap);
+                                                options, ctx->heap, ctx->devId);
 }
 
 #endif /* HAVE_CERTIFICATE_STATUS_REQUEST */
@@ -1345,7 +1558,7 @@ int wolfSSL_UseOCSPStaplingV2(WOLFSSL* ssl, byte status_type, byte options)
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequestV2(&ssl->extensions, status_type,
-                                                            options, ssl->heap);
+                                                options, ssl->heap, ssl->devId);
 }
 
 
@@ -1356,7 +1569,7 @@ int wolfSSL_CTX_UseOCSPStaplingV2(WOLFSSL_CTX* ctx,
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequestV2(&ctx->extensions, status_type,
-                                                            options, ctx->heap);
+                                                options, ctx->heap, ctx->devId);
 }
 
 #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
@@ -1658,26 +1871,28 @@ int wolfSSL_Rehandshake(WOLFSSL* ssl)
 
 #ifndef NO_OLD_TLS
 #ifndef NO_MD5
-    wc_InitMd5(&ssl->hsHashes->hashMd5);
+    ret = wc_InitMd5_ex(&ssl->hsHashes->hashMd5, ssl->heap, ssl->devId);
+    if (ret !=0)
+        return ret;
 #endif
 #ifndef NO_SHA
-    ret = wc_InitSha(&ssl->hsHashes->hashSha);
+    ret = wc_InitSha_ex(&ssl->hsHashes->hashSha, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
 #endif /* NO_OLD_TLS */
 #ifndef NO_SHA256
-    ret = wc_InitSha256(&ssl->hsHashes->hashSha256);
+    ret = wc_InitSha256_ex(&ssl->hsHashes->hashSha256, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
 #ifdef WOLFSSL_SHA384
-    ret = wc_InitSha384(&ssl->hsHashes->hashSha384);
+    ret = wc_InitSha384_ex(&ssl->hsHashes->hashSha384, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
 #ifdef WOLFSSL_SHA512
-    ret = wc_InitSha512(&ssl->hsHashes->hashSha512);
+    ret = wc_InitSha512_ex(&ssl->hsHashes->hashSha512, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
@@ -2324,6 +2539,7 @@ int AllocDer(DerBuffer** pDer, word32 length, int type, void* heap)
         if (*pDer == NULL) {
             return MEMORY_ERROR;
         }
+        XMEMSET(*pDer, 0, sizeof(DerBuffer) + length);
 
         der = *pDer;
         der->type = type;
@@ -2994,20 +3210,29 @@ int AlreadySigner(WOLFSSL_CERT_MANAGER* cm, byte* hash)
 {
     Signer* signers;
     int     ret = 0;
-    word32  row = HashSigner(hash);
+    word32  row;
 
-    if (wc_LockMutex(&cm->caLock) != 0)
-        return  ret;
+    if (cm == NULL || hash == NULL) {
+        return ret;
+    }
+
+    row = HashSigner(hash);
+
+    if (wc_LockMutex(&cm->caLock) != 0) {
+        return ret;
+    }
     signers = cm->caTable[row];
     while (signers) {
         byte* subjectHash;
-        #ifndef NO_SKID
-            subjectHash = signers->subjectKeyIdHash;
-        #else
-            subjectHash = signers->subjectNameHash;
-        #endif
+
+    #ifndef NO_SKID
+        subjectHash = signers->subjectKeyIdHash;
+    #else
+        subjectHash = signers->subjectNameHash;
+    #endif
+
         if (XMEMCMP(hash, subjectHash, SIGNER_DIGEST_SIZE) == 0) {
-            ret = 1;
+            ret = 1; /* success */
             break;
         }
         signers = signers->next;
@@ -3212,7 +3437,7 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
         XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         return ret;
     }
-    WOLFSSL_MSG("    Parsed new trusted peer cert");
+    WOLFSSL_MSG("\tParsed new trusted peer cert");
 
     peerCert = (TrustedPeerCert*)XMALLOC(sizeof(TrustedPeerCert), cm->heap,
                                                              DYNAMIC_TYPE_CERT);
@@ -3242,7 +3467,7 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
     #endif
 
     if (AlreadyTrustedPeer(cm, subjectHash)) {
-        WOLFSSL_MSG("    Already have this CA, not adding again");
+        WOLFSSL_MSG("\tAlready have this CA, not adding again");
         (void)ret;
     }
     else {
@@ -3297,7 +3522,7 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
                 wc_UnLockMutex(&cm->tpLock);
             }
             else {
-                WOLFSSL_MSG("    Trusted Peer Cert Mutex Lock failed");
+                WOLFSSL_MSG("\tTrusted Peer Cert Mutex Lock failed");
                 FreeDecodedCert(cert);
                 XFREE(cert, cm->heap, DYNAMIC_TYPE_TMP_BUFFER);
                 FreeTrustedPeer(peerCert, cm->heap);
@@ -3305,12 +3530,12 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
             }
         }
 
-    WOLFSSL_MSG("    Freeing parsed trusted peer cert");
+    WOLFSSL_MSG("\tFreeing parsed trusted peer cert");
     FreeDecodedCert(cert);
     XFREE(cert, cm->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    WOLFSSL_MSG("    Freeing der trusted peer cert");
+    WOLFSSL_MSG("\tFreeing der trusted peer cert");
     FreeDer(&der);
-    WOLFSSL_MSG("        OK Freeing der trusted peer cert");
+    WOLFSSL_MSG("\t\tOK Freeing der trusted peer cert");
     WOLFSSL_LEAVE("AddTrustedPeer", ret);
 
     return SSL_SUCCESS;
@@ -3345,7 +3570,7 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
 
     InitDecodedCert(cert, der->buffer, der->length, cm->heap);
     ret = ParseCert(cert, CA_TYPE, verify, cm);
-    WOLFSSL_MSG("    Parsed new CA");
+    WOLFSSL_MSG("\tParsed new CA");
 
 #ifndef NO_SKID
     subjectHash = cert->extSubjKeyId;
@@ -3361,7 +3586,7 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
                 if (cm->minRsaKeySz < 0 ||
                                    cert->pubKeySize < (word16)cm->minRsaKeySz) {
                     ret = RSA_KEY_SIZE_E;
-                    WOLFSSL_MSG("    CA RSA key size error");
+                    WOLFSSL_MSG("\tCA RSA key size error");
                 }
                 break;
             #endif /* !NO_RSA */
@@ -3370,19 +3595,19 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
                 if (cm->minEccKeySz < 0 ||
                                    cert->pubKeySize < (word16)cm->minEccKeySz) {
                     ret = ECC_KEY_SIZE_E;
-                    WOLFSSL_MSG("    CA ECC key size error");
+                    WOLFSSL_MSG("\tCA ECC key size error");
                 }
                 break;
             #endif /* HAVE_ECC */
 
             default:
-                WOLFSSL_MSG("    No key size check done on CA");
+                WOLFSSL_MSG("\tNo key size check done on CA");
                 break; /* no size check if key type is not in switch */
         }
     }
 
     if (ret == 0 && cert->isCA == 0 && type != WOLFSSL_USER_CA) {
-        WOLFSSL_MSG("    Can't add as CA if not actually one");
+        WOLFSSL_MSG("\tCan't add as CA if not actually one");
         ret = NOT_CA_ERROR;
     }
 #ifndef ALLOW_INVALID_CERTSIGN
@@ -3390,12 +3615,12 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
              (cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) == 0) {
         /* Intermediate CA certs are required to have the keyCertSign
         * extension set. User loaded root certs are not. */
-        WOLFSSL_MSG("    Doesn't have key usage certificate signing");
+        WOLFSSL_MSG("\tDoesn't have key usage certificate signing");
         ret = NOT_CA_ERROR;
     }
 #endif
     else if (ret == 0 && AlreadySigner(cm, subjectHash)) {
-        WOLFSSL_MSG("    Already have this CA, not adding again");
+        WOLFSSL_MSG("\tAlready have this CA, not adding again");
         (void)ret;
     }
     else if (ret == 0) {
@@ -3449,21 +3674,21 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
                     cm->caCacheCallback(der->buffer, (int)der->length, type);
             }
             else {
-                WOLFSSL_MSG("    CA Mutex Lock failed");
+                WOLFSSL_MSG("\tCA Mutex Lock failed");
                 ret = BAD_MUTEX_E;
                 FreeSigner(signer, cm->heap);
             }
         }
     }
 
-    WOLFSSL_MSG("    Freeing Parsed CA");
+    WOLFSSL_MSG("\tFreeing Parsed CA");
     FreeDecodedCert(cert);
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-    WOLFSSL_MSG("    Freeing der CA");
+    WOLFSSL_MSG("\tFreeing der CA");
     FreeDer(pDer);
-    WOLFSSL_MSG("        OK Freeing der CA");
+    WOLFSSL_MSG("\t\tOK Freeing der CA");
 
     WOLFSSL_LEAVE("AddCA", ret);
 
@@ -4105,6 +4330,7 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
     int           rsaKey = 0;
     int           resetSuites = 0;
     void*         heap = ctx ? ctx->heap : ((ssl) ? ssl->heap : NULL);
+    int           devId = ctx ? ctx->devId : ((ssl) ? ssl->devId : INVALID_DEVID);
 #ifdef WOLFSSL_SMALL_STACK
     EncryptedInfo* info = NULL;
 #else
@@ -4131,6 +4357,7 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
         return MEMORY_E;
 #endif
 
+    XMEMSET(info, 0, sizeof(EncryptedInfo));
     info->set      = 0;
     info->ctx      = ctx;
     info->consumed = 0;
@@ -4315,7 +4542,7 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                 return MEMORY_E;
         #endif
 
-            ret = wc_InitRsaKey(key, 0);
+            ret = wc_InitRsaKey_ex(key, heap, devId);
             if (ret == 0) {
                 if (wc_RsaPrivateKeyDecode(der->buffer, &idx, key, der->length)
                     != 0) {
@@ -4349,9 +4576,9 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                         resetSuites = 1;
                     }
                 }
-            }
 
-            wc_FreeRsaKey(key);
+                wc_FreeRsaKey(key);
+            }
 
         #ifdef WOLFSSL_SMALL_STACK
             XFREE(key, heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -4367,7 +4594,11 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
             word32  idx = 0;
             ecc_key key;
 
-            wc_ecc_init(&key);
+            ret = wc_ecc_init_ex(&key, heap, devId);
+            if (ret != 0) {
+                return ret;
+            }
+
             if (wc_EccPrivateKeyDecode(der->buffer, &idx, &key,
                                                         der->length) != 0) {
                 wc_ecc_free(&key);
@@ -7561,14 +7792,14 @@ int wolfSSL_CTX_set_cipher_list(WOLFSSL_CTX* ctx, const char* list)
         XMEMSET(ctx->suites, 0, sizeof(Suites));
     }
 
-    return (SetCipherList(ctx->suites, list)) ? SSL_SUCCESS : SSL_FAILURE;
+    return (SetCipherList(ctx, ctx->suites, list)) ? SSL_SUCCESS : SSL_FAILURE;
 }
 
 
 int wolfSSL_set_cipher_list(WOLFSSL* ssl, const char* list)
 {
     WOLFSSL_ENTER("wolfSSL_set_cipher_list");
-    return (SetCipherList(ssl->suites, list)) ? SSL_SUCCESS : SSL_FAILURE;
+    return (SetCipherList(ssl->ctx, ssl->suites, list)) ? SSL_SUCCESS : SSL_FAILURE;
 }
 
 
@@ -7888,31 +8119,38 @@ int wolfSSL_DTLS_SetCookieSecret(WOLFSSL* ssl,
                 if (IsDtlsNotSctpMode(ssl)) {
                     /* re-init hashes, exclude first hello and verify request */
 #ifndef NO_OLD_TLS
-                    wc_InitMd5(&ssl->hsHashes->hashMd5);
-                    if ( (ssl->error = wc_InitSha(&ssl->hsHashes->hashSha))
-                                                                         != 0) {
+                    if ( (ssl->error = wc_InitMd5_ex(&ssl->hsHashes->hashMd5,
+                                                 ssl->heap, ssl->devId)) != 0) {
+                        WOLFSSL_ERROR(ssl->error);
+                        return SSL_FATAL_ERROR;
+                    }
+                    if ( (ssl->error = wc_InitSha_ex(&ssl->hsHashes->hashSha,
+                                                 ssl->heap, ssl->devId)) != 0) {
                         WOLFSSL_ERROR(ssl->error);
                         return SSL_FATAL_ERROR;
                     }
 #endif
                     if (IsAtLeastTLSv1_2(ssl)) {
                         #ifndef NO_SHA256
-                            if ( (ssl->error = wc_InitSha256(
-                                            &ssl->hsHashes->hashSha256)) != 0) {
+                            if ( (ssl->error = wc_InitSha256_ex(
+                                            &ssl->hsHashes->hashSha256,
+                                            ssl->heap, ssl->devId)) != 0) {
                                 WOLFSSL_ERROR(ssl->error);
                                 return SSL_FATAL_ERROR;
                             }
                         #endif
                         #ifdef WOLFSSL_SHA384
-                            if ( (ssl->error = wc_InitSha384(
-                                            &ssl->hsHashes->hashSha384)) != 0) {
+                            if ( (ssl->error = wc_InitSha384_ex(
+                                            &ssl->hsHashes->hashSha384,
+                                            ssl->heap, ssl->devId)) != 0) {
                                 WOLFSSL_ERROR(ssl->error);
                                 return SSL_FATAL_ERROR;
                             }
                         #endif
                         #ifdef WOLFSSL_SHA512
-                            if ( (ssl->error = wc_InitSha512(
-                                            &ssl->hsHashes->hashSha512)) != 0) {
+                            if ( (ssl->error = wc_InitSha512_ex(
+                                            &ssl->hsHashes->hashSha512,
+                                            ssl->heap, ssl->devId)) != 0) {
                                 WOLFSSL_ERROR(ssl->error);
                                 return SSL_FATAL_ERROR;
                             }
@@ -8420,15 +8658,6 @@ int wolfSSL_Cleanup(void)
     if (wc_FreeMutex(&count_mutex) != 0)
         ret = BAD_MUTEX_E;
 
-#ifdef HAVE_ECC
-    #ifdef FP_ECC
-        wc_ecc_fp_free();
-    #endif
-    #ifdef ECC_CACHE_CURVE
-        wc_ecc_curve_cache_free();
-    #endif
-#endif
-
     if (wolfCrypt_Cleanup() != 0) {
         WOLFSSL_MSG("Error with wolfCrypt_Cleanup call");
         ret = WC_CLEANUP_E;
@@ -10244,7 +10473,7 @@ int wolfSSL_set_compression(WOLFSSL* ssl)
                                                     WOLFSSL_X509_STORE_CTX* ctx)
     {
         WOLFSSL_ENTER("wolfSSL_X509_STORE_CTX_get_current_cert");
-        if(ctx)
+        if (ctx)
             return ctx->current_cert;
         return NULL;
     }
@@ -10742,10 +10971,21 @@ int wolfSSL_set_compression(WOLFSSL* ssl)
         (void)type;
 
         WOLFSSL_ENTER("wolfSSL_EVP_BytesToKey");
-        wc_InitMd5(md5);
+
+        if (wc_InitMd5(md5) != 0) {
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            return 0;
+        }
 
         /* only support MD5 for now */
-        if (XSTRNCMP(md, "MD5", 3) != 0) return 0;
+        if (XSTRNCMP(md, "MD5", 3) != 0) {
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            return 0;
+        }
 
         /* only support CBC DES and AES for now */
         #ifndef NO_DES3
@@ -10924,11 +11164,13 @@ size_t wolfSSL_get_client_random(const WOLFSSL* ssl, unsigned char* out,
 #ifndef NO_MD5
     void wolfSSL_MD5_Init(WOLFSSL_MD5_CTX* md5)
     {
+        int ret;
         typedef char md5_test[sizeof(MD5_CTX) >= sizeof(Md5) ? 1 : -1];
         (void)sizeof(md5_test);
 
         WOLFSSL_ENTER("MD5_Init");
-        wc_InitMd5((Md5*)md5);
+        ret = wc_InitMd5((Md5*)md5);
+        (void)ret;
     }
 
 
@@ -11279,8 +11521,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
     void wolfSSL_EVP_MD_CTX_init(WOLFSSL_EVP_MD_CTX* ctx)
     {
         WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_init");
-        (void)ctx;
-        /* do nothing */
+        XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_MD_CTX));
     }
 
     const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx)
@@ -12057,6 +12298,14 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
             return BAD_FUNC_ARG;
         }
 
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* compile-time validation of ASYNC_CTX_SIZE */
+        typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ?
+                                                                        1 : -1];
+        (void)sizeof(async_test);
+    #endif
+
         if (XSTRNCMP(type, "SHA256", 6) == 0) {
              ctx->macType = SHA256;
              wolfSSL_SHA256_Init(&(ctx->hash.sha256));
@@ -12220,6 +12469,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
 #else
         Hmac  hmac[1];
 #endif
+        void* heap = NULL;
 
         WOLFSSL_ENTER("HMAC");
         if (!md)
@@ -12233,22 +12483,27 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
             return NULL;
 
     #ifdef WOLFSSL_SMALL_STACK
-        hmac = (Hmac*)XMALLOC(sizeof(Hmac), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_TMP_BUFFER);
         if (hmac == NULL)
             return NULL;
     #endif
 
-        if (wc_HmacSetKey(hmac, type, (const byte*)key, key_len) == 0)
-            if (wc_HmacUpdate(hmac, d, n) == 0)
-                if (wc_HmacFinal(hmac, md) == 0) {
-                    if (md_len)
-                        *md_len = (type == MD5) ? (int)MD5_DIGEST_SIZE
-                                                : (int)SHA_DIGEST_SIZE;
-                    ret = md;
+        if (wc_HmacInit(hmac, heap, INVALID_DEVID) == 0) {
+            if (wc_HmacSetKey(hmac, type, (const byte*)key, key_len) == 0) {
+                if (wc_HmacUpdate(hmac, d, n) == 0) {
+                    if (wc_HmacFinal(hmac, md) == 0) {
+                        if (md_len)
+                            *md_len = (type == MD5) ? (int)MD5_DIGEST_SIZE
+                                                    : (int)SHA_DIGEST_SIZE;
+                        ret = md;
+                    }
                 }
+            }
+            wc_HmacFree(hmac);
+        }
 
     #ifdef WOLFSSL_SMALL_STACK
-        XFREE(hmac, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(hmac, heap, DYNAMIC_TYPE_TMP_BUFFER);
     #endif
 
         return ret;
@@ -12431,6 +12686,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
 
 #ifdef KEEP_PEER_CERT
         FreeX509(&ssl->peerCert);
+        InitX509(&ssl->peerCert, 0, ssl->heap);
 #endif
 
         return SSL_SUCCESS;
@@ -13703,8 +13959,23 @@ int wolfSSL_set_session_id_context(WOLFSSL* ssl, const unsigned char* id,
 
 void wolfSSL_set_connect_state(WOLFSSL* ssl)
 {
-    (void)ssl;
-    /* client by default */
+    word16 haveRSA = 1;
+    word16 havePSK = 0;
+
+    if (ssl->options.side == WOLFSSL_SERVER_END) {
+        ssl->options.side = WOLFSSL_CLIENT_END;
+
+        #ifdef NO_RSA
+            haveRSA = 0;
+        #endif
+        #ifndef NO_PSK
+            havePSK = ssl->options.havePSK;
+        #endif
+        InitSuites(ssl->suites, ssl->version, haveRSA, havePSK,
+                   ssl->options.haveDH, ssl->options.haveNTRU,
+                   ssl->options.haveECDSAsig, ssl->options.haveECC,
+                   ssl->options.haveStaticECC, ssl->options.side);
+    }
 }
 #endif
 
@@ -15344,13 +15615,13 @@ long wolfSSL_set_tmp_dh(WOLFSSL *ssl, WOLFSSL_DH *dh)
     if (pSz <= 0 || gSz <= 0)
         return SSL_FATAL_ERROR;
 
-    p = (byte*)XMALLOC(pSz, ssl->heap, DYNAMIC_TYPE_DH);
+    p = (byte*)XMALLOC(pSz, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (!p)
         return MEMORY_E;
 
-    g = (byte*)XMALLOC(gSz, ssl->heap, DYNAMIC_TYPE_DH);
+    g = (byte*)XMALLOC(gSz, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (!g) {
-        XFREE(p, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(p, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         return MEMORY_E;
     }
 
@@ -15360,8 +15631,8 @@ long wolfSSL_set_tmp_dh(WOLFSSL *ssl, WOLFSSL_DH *dh)
     if (pSz >= 0 && gSz >= 0) /* Conversion successful */
         ret = wolfSSL_SetTmpDH(ssl, p, pSz, g, gSz);
 
-    XFREE(p, ssl->heap, DYNAMIC_TYPE_DH);
-    XFREE(g, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(p, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(g, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
 
     return pSz > 0 && gSz > 0 ? ret : SSL_FATAL_ERROR;
 }
@@ -16454,7 +16725,7 @@ void wolfSSL_BN_free(WOLFSSL_BIGNUM* bn)
     WOLFSSL_MSG("wolfSSL_BN_free");
     if (bn) {
         if (bn->internal) {
-            mp_clear((mp_int*)bn->internal);
+            mp_forcezero((mp_int*)bn->internal);
             XFREE(bn->internal, NULL, DYNAMIC_TYPE_BIGINT);
             bn->internal = NULL;
         }
@@ -18496,12 +18767,16 @@ int wolfSSL_RSA_sign(int type, const unsigned char* m,
             WOLFSSL_MSG("Bad Encode Signature");
         }
         else {
-            *sigLen = wc_RsaSSL_Sign(encodedSig, signSz, sigRet, outLen,
+            ret = wc_RsaSSL_Sign(encodedSig, signSz, sigRet, outLen,
                                   (RsaKey*)rsa->internal, rng);
-            if (*sigLen <= 0)
+            if (ret <= 0) {
                 WOLFSSL_MSG("Bad Rsa Sign");
-            else
+                ret = 0;
+            }
+            else {
                 ret = SSL_SUCCESS;
+                *sigLen = ret;
+            }
         }
 
     }
@@ -18645,7 +18920,11 @@ void wolfSSL_HMAC_Init(WOLFSSL_HMAC_CTX* ctx, const void* key, int keylen,
 
     if (key && keylen) {
         WOLFSSL_MSG("keying hmac");
-        wc_HmacSetKey(&ctx->hmac, ctx->type, (const byte*)key, (word32)keylen);
+
+        if (wc_HmacInit(&ctx->hmac, NULL, INVALID_DEVID) == 0) {
+            wc_HmacSetKey(&ctx->hmac, ctx->type, (const byte*)key,
+                                                        (word32)keylen);
+        }
         /* OpenSSL compat, no error */
     }
 }
@@ -18707,9 +18986,10 @@ void wolfSSL_HMAC_Final(WOLFSSL_HMAC_CTX* ctx, unsigned char* hash,
 
 void wolfSSL_HMAC_cleanup(WOLFSSL_HMAC_CTX* ctx)
 {
-    (void)ctx;
-
     WOLFSSL_MSG("wolfSSL_HMAC_cleanup");
+
+    if (ctx)
+        wc_HmacFree(&ctx->hmac);
 }
 
 
@@ -20171,6 +20451,7 @@ int wolfSSL_EC_POINT_mul(const WOLFSSL_EC_GROUP *group, WOLFSSL_EC_POINT *r,
                          const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
 {
     mp_int a, prime;
+    int ret;
 
     (void)ctx;
     (void)n;
@@ -20187,43 +20468,41 @@ int wolfSSL_EC_POINT_mul(const WOLFSSL_EC_GROUP *group, WOLFSSL_EC_POINT *r,
         WOLFSSL_MSG("No ECPoint internal set, do it");
 
         if (SetECPointInternal((WOLFSSL_EC_POINT *)q) != SSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal failed");
+            WOLFSSL_MSG("SetECPointInternal q failed");
             return SSL_FAILURE;
         }
     }
 
     /* read the curve prime and a */
     if (mp_init_multi(&prime, &a, NULL, NULL, NULL, NULL) != MP_OKAY) {
-        WOLFSSL_MSG("wolfSSL_EC_POINT_mul init 'prime/A' failed");
-        return SSL_FAILURE;
-    }
-    if (mp_read_radix(&prime, ecc_sets[group->curve_idx].prime, 16) != MP_OKAY){
-        WOLFSSL_MSG("wolfSSL_EC_POINT_mul read 'prime' curve value failed");
-        return SSL_FAILURE;
-    }
-    if (mp_read_radix(&a, ecc_sets[group->curve_idx].Af, 16) != MP_OKAY){
-        WOLFSSL_MSG("wolfSSL_EC_POINT_mul read 'A' curve value failed");
         return SSL_FAILURE;
     }
 
+    ret = mp_read_radix(&prime, ecc_sets[group->curve_idx].prime, 16);
+    if (ret == MP_OKAY)
+        ret = mp_read_radix(&a, ecc_sets[group->curve_idx].Af, 16);
+
     /* r = q * m % prime */
-    if (wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
-                      (ecc_point*)r->internal, &a, &prime, 1) != MP_OKAY) {
-        WOLFSSL_MSG("ecc_mulmod failure");
-        mp_clear(&prime);
-        return SSL_FAILURE;
-    }
+    if (ret == MP_OKAY)
+        ret = wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
+                      (ecc_point*)r->internal, &a, &prime, 1);
 
     mp_clear(&a);
     mp_clear(&prime);
 
-    /* set the external value for the computed point */
-    if (SetECPointInternal(r) != SSL_SUCCESS) {
-        WOLFSSL_MSG("SetECPointInternal failed");
-        return SSL_FAILURE;
+    if (ret != MP_OKAY) {
+        ret = SSL_FAILURE;
     }
 
-    return SSL_SUCCESS;
+    /* set the external value for the computed point */
+    if (ret != SSL_FAILURE) {
+        ret = SetECPointInternal(r);
+        if (ret != SSL_SUCCESS) {
+            WOLFSSL_MSG("SetECPointInternal r failed");
+        }
+    }
+
+    return ret;
 }
 
 void wolfSSL_EC_POINT_clear_free(WOLFSSL_EC_POINT *p)
@@ -20437,8 +20716,8 @@ WOLFSSL_ECDSA_SIG *wolfSSL_ECDSA_do_sign(const unsigned char *d, int dlen,
                 }
 
             }
-            mp_clear(&sig_r);
-            mp_clear(&sig_s);
+            mp_free(&sig_r);
+            mp_free(&sig_s);
         }
     }
 
@@ -22250,13 +22529,13 @@ long wolfSSL_CTX_set_tmp_dh(WOLFSSL_CTX* ctx, WOLFSSL_DH* dh)
     if(pSz <= 0 || gSz <= 0)
         return SSL_FATAL_ERROR;
 
-    p = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH);
+    p = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if(!p)
         return MEMORY_E;
 
-    g = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH);
+    g = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if(!g) {
-        XFREE(p, ctx->heap, DYNAMIC_TYPE_DH);
+        XFREE(p, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
         return MEMORY_E;
     }
 
@@ -22266,8 +22545,8 @@ long wolfSSL_CTX_set_tmp_dh(WOLFSSL_CTX* ctx, WOLFSSL_DH* dh)
     if(pSz >= 0 && gSz >= 0) /* Conversion successful */
         ret = wolfSSL_CTX_SetTmpDH(ctx, p, pSz, g, gSz);
 
-    XFREE(p, ctx->heap, DYNAMIC_TYPE_DH);
-    XFREE(g, ctx->heap, DYNAMIC_TYPE_DH);
+    XFREE(p, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(g, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
 
     return pSz > 0 && gSz > 0 ? ret : SSL_FATAL_ERROR;
 }
@@ -22637,7 +22916,7 @@ const char * wolfSSL_get_servername(WOLFSSL* ssl, byte type)
 
 WOLFSSL_CTX* wolfSSL_set_SSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx)
 {
-    if (ssl && ctx && SetSSL_CTX(ssl, ctx) == SSL_SUCCESS)
+    if (ssl && ctx && SetSSL_CTX(ssl, ctx, 0) == SSL_SUCCESS)
         return ssl->ctx;
     return NULL;
 }
@@ -23130,15 +23409,15 @@ int wolfSSL_AsyncPoll(WOLFSSL* ssl, WOLF_EVENT_FLAG flags)
         return BAD_FUNC_ARG;
     }
 
-    /* not filtering on "ssl", since its the asyncDev */
-    ret = wolfAsync_EventQueuePoll(&ssl->ctx->event_queue, NULL,
+    ret = wolfAsync_EventQueuePoll(&ssl->ctx->event_queue, ssl,
         events, sizeof(events)/sizeof(events), flags, &eventCount);
-    if (ret == 0 && eventCount > 0) {
-        ret = 1; /* Success */
+    if (ret == 0) {
+        ret = eventCount;
     }
 
     return ret;
 }
+
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef OPENSSL_EXTRA
diff --git a/src/tls.c b/src/tls.c
index 60e9a30ea..162906585 100755
--- a/src/tls.c
+++ b/src/tls.c
@@ -47,6 +47,8 @@
 #ifdef HAVE_QSH
     static int TLSX_AddQSHKey(QSHKey** list, QSHKey* key);
     static byte* TLSX_QSHKeyFind_Pub(QSHKey* qsh, word16* pubLen, word16 name);
+#endif
+#if defined(HAVE_NTRU) || defined(HAVE_QSH)
     static int TLSX_CreateNtruKey(WOLFSSL* ssl, int type);
 #endif
 
@@ -72,6 +74,7 @@
     #define P_HASH_MAX_SIZE SHA256_DIGEST_SIZE
 #endif
 
+
 /* compute p_hash for MD5, SHA-1, SHA-256, or SHA-384 for TLSv1 PRF */
 static int p_hash(byte* result, word32 resLen, const byte* secret,
                    word32 secLen, const byte* seed, word32 seedLen, int hash)
@@ -146,36 +149,41 @@ static int p_hash(byte* result, word32 resLen, const byte* secret,
 
     lastTime = times - 1;
 
-    if ((ret = wc_HmacSetKey(hmac, hash, secret, secLen)) == 0) {
-        if ((ret = wc_HmacUpdate(hmac, seed, seedLen)) == 0) { /* A0 = seed */
-            if ((ret = wc_HmacFinal(hmac, previous)) == 0) {   /* A1 */
-                for (i = 0; i < times; i++) {
+    ret = wc_HmacInit(hmac, NULL, INVALID_DEVID);
+    if (ret == 0) {
+        ret = wc_HmacSetKey(hmac, hash, secret, secLen);
+        if (ret == 0)
+            ret = wc_HmacUpdate(hmac, seed, seedLen); /* A0 = seed */
+        if (ret == 0)
+            ret = wc_HmacFinal(hmac, previous);       /* A1 */
+        if (ret == 0) {
+            for (i = 0; i < times; i++) {
+                ret = wc_HmacUpdate(hmac, previous, len);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(hmac, seed, seedLen);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacFinal(hmac, current);
+                if (ret != 0)
+                    break;
+
+                if ((i == lastTime) && lastLen)
+                    XMEMCPY(&result[idx], current,
+                                             min(lastLen, P_HASH_MAX_SIZE));
+                else {
+                    XMEMCPY(&result[idx], current, len);
+                    idx += len;
                     ret = wc_HmacUpdate(hmac, previous, len);
                     if (ret != 0)
                         break;
-                    ret = wc_HmacUpdate(hmac, seed, seedLen);
+                    ret = wc_HmacFinal(hmac, previous);
                     if (ret != 0)
                         break;
-                    ret = wc_HmacFinal(hmac, current);
-                    if (ret != 0)
-                        break;
-
-                    if ((i == lastTime) && lastLen)
-                        XMEMCPY(&result[idx], current,
-                                                 min(lastLen, P_HASH_MAX_SIZE));
-                    else {
-                        XMEMCPY(&result[idx], current, len);
-                        idx += len;
-                        ret = wc_HmacUpdate(hmac, previous, len);
-                        if (ret != 0)
-                            break;
-                        ret = wc_HmacFinal(hmac, previous);
-                        if (ret != 0)
-                            break;
-                    }
                 }
             }
         }
+        wc_HmacFree(hmac);
     }
 
     ForceZero(previous,  P_HASH_MAX_SIZE);
@@ -388,21 +396,29 @@ int BuildTlsFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
     int         ret;
     const byte* side;
-    byte        handshake_hash[HSHASH_SZ];
+    byte*       handshake_hash;
     word32      hashSz = HSHASH_SZ;
 
+    /* using allocate here to allow async hardware to use buffer directly */
+    handshake_hash = (byte*)XMALLOC(hashSz, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (handshake_hash == NULL)
+        return MEMORY_E;
+
     ret = BuildTlsHandshakeHash(ssl, handshake_hash, &hashSz);
-    if (ret < 0)
-        return ret;
+    if (ret == 0) {
+        if ( XSTRNCMP((const char*)sender, (const char*)client, SIZEOF_SENDER) == 0)
+            side = tls_client;
+        else
+            side = tls_server;
 
-    if ( XSTRNCMP((const char*)sender, (const char*)client, SIZEOF_SENDER) == 0)
-        side = tls_client;
-    else
-        side = tls_server;
+        ret = PRF((byte*)hashes, TLS_FINISHED_SZ, ssl->arrays->masterSecret,
+                   SECRET_LEN, side, FINISHED_LABEL_SZ, handshake_hash, hashSz,
+                   IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm);
+    }
 
-    return PRF((byte*)hashes, TLS_FINISHED_SZ, ssl->arrays->masterSecret,
-               SECRET_LEN, side, FINISHED_LABEL_SZ, handshake_hash, hashSz,
-               IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm);
+    XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret;
 }
 
 
@@ -533,20 +549,27 @@ int MakeTlsMasterSecret(WOLFSSL* ssl)
 {
     int    ret;
 #ifdef HAVE_EXTENDED_MASTER
-    byte   handshake_hash[HSHASH_SZ];
-    word32 hashSz = HSHASH_SZ;
-
     if (ssl->options.haveEMS) {
+        byte*  handshake_hash;
+        word32 hashSz = HSHASH_SZ;
+
+        handshake_hash = (byte*)XMALLOC(HSHASH_SZ, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (handshake_hash == NULL)
+            return MEMORY_E;
 
         ret = BuildTlsHandshakeHash(ssl, handshake_hash, &hashSz);
-        if (ret < 0)
+        if (ret < 0) {
+            XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return ret;
+        }
 
         ret = wolfSSL_MakeTlsExtendedMasterSecret(
                 ssl->arrays->masterSecret, SECRET_LEN,
                 ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz,
                 handshake_hash, hashSz,
                 IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm);
+
+        XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
     } else
 #endif
     ret = wolfSSL_MakeTlsMasterSecret(ssl->arrays->masterSecret, SECRET_LEN,
@@ -777,7 +800,7 @@ int TLS_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
               int content, int verify)
 {
     Hmac hmac;
-    int  ret;
+    int  ret = 0;
     byte myInner[WOLFSSL_TLS_HMAC_INNER_SZ];
 
     if (ssl == NULL)
@@ -790,21 +813,22 @@ int TLS_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
 
     wolfSSL_SetTlsHmacInner(ssl, myInner, sz, content, verify);
 
-    ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
-                     wolfSSL_GetMacSecret(ssl, verify), ssl->specs.hash_size);
-    if (ret != 0)
-        return ret;
-    ret = wc_HmacUpdate(&hmac, myInner, sizeof(myInner));
-    if (ret != 0)
-        return ret;
-    ret = wc_HmacUpdate(&hmac, in, sz);                                /* content */
-    if (ret != 0)
-        return ret;
-    ret = wc_HmacFinal(&hmac, digest);
+    ret = wc_HmacInit(&hmac, NULL, ssl->devId);
     if (ret != 0)
         return ret;
 
-    return 0;
+    ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
+                     wolfSSL_GetMacSecret(ssl, verify), ssl->specs.hash_size);
+    if (ret == 0) {
+        ret = wc_HmacUpdate(&hmac, myInner, sizeof(myInner));
+        if (ret == 0)
+            ret = wc_HmacUpdate(&hmac, in, sz);                    /* content */
+        if (ret == 0)
+            ret = wc_HmacFinal(&hmac, digest);
+    }
+    wc_HmacFree(&hmac);
+
+    return ret;
 }
 
 #ifdef HAVE_TLS_EXTENSIONS
@@ -2105,7 +2129,8 @@ static int TLSX_CSR_Parse(WOLFSSL* ssl, byte* input, word16 length,
 
             /* enable extension at ssl level */
             ret = TLSX_UseCertificateStatusRequest(&ssl->extensions,
-                                     csr->status_type, csr->options, ssl->heap);
+                                     csr->status_type, csr->options, ssl->heap,
+                                     ssl->devId);
             if (ret != SSL_SUCCESS)
                 return ret;
 
@@ -2181,7 +2206,7 @@ static int TLSX_CSR_Parse(WOLFSSL* ssl, byte* input, word16 length,
 
         /* accept the first good status_type and return */
         ret = TLSX_UseCertificateStatusRequest(&ssl->extensions, status_type,
-                                                                  0, ssl->heap);
+                                                      0, ssl->heap, ssl->devId);
         if (ret != SSL_SUCCESS)
             return ret; /* throw error */
 
@@ -2267,7 +2292,7 @@ int TLSX_CSR_ForceRequest(WOLFSSL* ssl)
 }
 
 int TLSX_UseCertificateStatusRequest(TLSX** extensions, byte status_type,
-                                                       byte options, void* heap)
+                                           byte options, void* heap, int devId)
 {
     CertificateStatusRequest* csr = NULL;
     int ret = 0;
@@ -2290,11 +2315,13 @@ int TLSX_UseCertificateStatusRequest(TLSX** extensions, byte status_type,
             if (options & WOLFSSL_CSR_OCSP_USE_NONCE) {
                 WC_RNG rng;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-                if (wc_InitRng_ex(&rng, heap) == 0) {
-#else
-                if (wc_InitRng(&rng) == 0) {
-#endif
+            #ifndef HAVE_FIPS
+                ret = wc_InitRng_ex(&rng, heap, devId);
+            #else
+                ret = wc_InitRng(&rng);
+                (void)devId;
+            #endif
+                if (ret == 0) {
                     if (wc_RNG_GenerateBlock(&rng, csr->request.ocsp.nonce,
                                                         MAX_OCSP_NONCE_SZ) == 0)
                         csr->request.ocsp.nonceSz = MAX_OCSP_NONCE_SZ;
@@ -2467,7 +2494,7 @@ static int TLSX_CSR2_Parse(WOLFSSL* ssl, byte* input, word16 length,
             /* enable extension at ssl level */
             for (; csr2; csr2 = csr2->next) {
                 ret = TLSX_UseCertificateStatusRequestV2(&ssl->extensions,
-                                   csr2->status_type, csr2->options, ssl->heap);
+                       csr2->status_type, csr2->options, ssl->heap, ssl->devId);
                 if (ret != SSL_SUCCESS)
                     return ret;
 
@@ -2566,7 +2593,7 @@ static int TLSX_CSR2_Parse(WOLFSSL* ssl, byte* input, word16 length,
 
             /* accept the first good status_type and return */
             ret = TLSX_UseCertificateStatusRequestV2(&ssl->extensions,
-                                                     status_type, 0, ssl->heap);
+                                         status_type, 0, ssl->heap, ssl->devId);
             if (ret != SSL_SUCCESS)
                 return ret; /* throw error */
 
@@ -2679,7 +2706,7 @@ int TLSX_CSR2_ForceRequest(WOLFSSL* ssl)
 }
 
 int TLSX_UseCertificateStatusRequestV2(TLSX** extensions, byte status_type,
-                                                       byte options, void* heap)
+                                           byte options, void* heap, int devId)
 {
     TLSX* extension = NULL;
     CertificateStatusRequestItemV2* csr2 = NULL;
@@ -2709,11 +2736,13 @@ int TLSX_UseCertificateStatusRequestV2(TLSX** extensions, byte status_type,
             if (options & WOLFSSL_CSR2_OCSP_USE_NONCE) {
                 WC_RNG rng;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-                if (wc_InitRng_ex(&rng, heap) == 0) {
-#else
-                if (wc_InitRng(&rng) == 0) {
-#endif
+            #ifndef HAVE_FIPS
+                ret = wc_InitRng_ex(&rng, heap, devId);
+            #else
+                ret = wc_InitRng(&rng);
+                (void)devId;
+            #endif
+                if (ret == 0) {
                     if (wc_RNG_GenerateBlock(&rng, csr2->request.ocsp[0].nonce,
                                                         MAX_OCSP_NONCE_SZ) == 0)
                         csr2->request.ocsp[0].nonceSz = MAX_OCSP_NONCE_SZ;
@@ -3399,6 +3428,7 @@ int TLSX_AddEmptyRenegotiationInfo(TLSX** extensions, void* heap)
 
 #ifdef HAVE_SESSION_TICKET
 
+#ifndef NO_WOLFSSL_CLIENT
 static void TLSX_SessionTicket_ValidateRequest(WOLFSSL* ssl)
 {
     TLSX*          extension = TLSX_Find(ssl->extensions, TLSX_SESSION_TICKET);
@@ -3413,6 +3443,7 @@ static void TLSX_SessionTicket_ValidateRequest(WOLFSSL* ssl)
         }
     }
 }
+#endif /* NO_WOLFSSL_CLIENT */
 
 
 static word16 TLSX_SessionTicket_GetSize(SessionTicket* ticket, int isRequest)
@@ -3447,7 +3478,9 @@ static int TLSX_SessionTicket_Parse(WOLFSSL* ssl, byte* input, word16 length,
         if (length != 0)
             return BUFFER_ERROR;
 
+#ifndef NO_WOLFSSL_CLIENT
         ssl->expect_session_ticket = 1;
+#endif
     }
 #ifndef NO_WOLFSSL_SERVER
     else {
@@ -3565,10 +3598,12 @@ int TLSX_UseSessionTicket(TLSX** extensions, SessionTicket* ticket, void* heap)
 /* Quantum-Safe-Hybrid                                                        */
 /******************************************************************************/
 
-#ifdef HAVE_QSH
+#if defined(HAVE_NTRU) && defined(HAVE_QSH)
 static WC_RNG* rng;
 static wolfSSL_Mutex* rngMutex;
+#endif
 
+#ifdef HAVE_QSH
 static void TLSX_QSH_FreeAll(QSHScheme* list, void* heap)
 {
     QSHScheme* current;
@@ -4440,6 +4475,8 @@ static int TLSX_CreateQSHKey(WOLFSSL* ssl, int type)
 {
     int ret;
 
+    (void)ssl;
+
     switch (type) {
 #ifdef HAVE_NTRU
         case WOLFSSL_NTRU_EESS439:
@@ -4488,10 +4525,11 @@ static int TLSX_AddQSHKey(QSHKey** list, QSHKey* key)
 }
 
 
-#ifdef HAVE_NTRU
+#if defined(HAVE_NTRU) || defined(HAVE_QSH)
 int TLSX_CreateNtruKey(WOLFSSL* ssl, int type)
 {
-    int ret;
+    int ret = -1;
+#ifdef HAVE_NTRU
     int ntruType;
 
     /* variable declarations for NTRU*/
@@ -4554,6 +4592,10 @@ int TLSX_CreateNtruKey(WOLFSSL* ssl, int type)
     temp->next = NULL;
 
     TLSX_AddQSHKey(&ssl->QSH_Key, temp);
+#endif
+
+    (void)ssl;
+    (void)type;
 
     return ret;
 }
diff --git a/support/wolfssl.pc b/support/wolfssl.pc
index 476dff764..c05107569 100644
--- a/support/wolfssl.pc
+++ b/support/wolfssl.pc
@@ -5,6 +5,6 @@ includedir=${prefix}/include
 
 Name: wolfssl
 Description: wolfssl C library.
-Version: 3.10.3
+Version: 3.10.4
 Libs: -L${libdir} -lwolfssl
 Cflags: -I${includedir}
diff --git a/tests/api.c b/tests/api.c
index 1297d659a..ee40e5839 100644
--- a/tests/api.c
+++ b/tests/api.c
@@ -112,6 +112,12 @@ static const char* failed = "failed";
     ;
 #endif
 
+enum {
+    TESTING_RSA = 1,
+    TESTING_ECC = 2
+};
+
+
 /*----------------------------------------------------------------------------*
  | Setup
  *----------------------------------------------------------------------------*/
@@ -228,20 +234,20 @@ static void test_wolfSSL_CTX_use_certificate_file(void)
     AssertNotNull(ctx = wolfSSL_CTX_new(wolfSSLv23_server_method()));
 
     /* invalid context */
-    AssertFalse(wolfSSL_CTX_use_certificate_file(NULL, svrCert,
+    AssertFalse(wolfSSL_CTX_use_certificate_file(NULL, svrCertFile,
                                                              SSL_FILETYPE_PEM));
     /* invalid cert file */
     AssertFalse(wolfSSL_CTX_use_certificate_file(ctx, bogusFile,
                                                              SSL_FILETYPE_PEM));
     /* invalid cert type */
-    AssertFalse(wolfSSL_CTX_use_certificate_file(ctx, svrCert, 9999));
+    AssertFalse(wolfSSL_CTX_use_certificate_file(ctx, svrCertFile, 9999));
 
 #ifdef NO_RSA
     /* rsa needed */
-    AssertFalse(wolfSSL_CTX_use_certificate_file(ctx, svrCert,SSL_FILETYPE_PEM));
+    AssertFalse(wolfSSL_CTX_use_certificate_file(ctx, svrCertFile,SSL_FILETYPE_PEM));
 #else
     /* success */
-    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
+    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
 #endif
 
     wolfSSL_CTX_free(ctx);
@@ -283,21 +289,21 @@ static void test_wolfSSL_CTX_use_PrivateKey_file(void)
     AssertNotNull(ctx = wolfSSL_CTX_new(wolfSSLv23_server_method()));
 
     /* invalid context */
-    AssertFalse(wolfSSL_CTX_use_PrivateKey_file(NULL, svrKey,
+    AssertFalse(wolfSSL_CTX_use_PrivateKey_file(NULL, svrKeyFile,
                                                              SSL_FILETYPE_PEM));
     /* invalid key file */
     AssertFalse(wolfSSL_CTX_use_PrivateKey_file(ctx, bogusFile,
                                                              SSL_FILETYPE_PEM));
     /* invalid key type */
-    AssertFalse(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey, 9999));
+    AssertFalse(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, 9999));
 
     /* success */
 #ifdef NO_RSA
     /* rsa needed */
-    AssertFalse(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertFalse(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
 #else
     /* success */
-    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
 #endif
 
     wolfSSL_CTX_free(ctx);
@@ -319,11 +325,11 @@ static void test_wolfSSL_CTX_trust_peer_cert(void)
                                               SSL_FILETYPE_PEM) != SSL_SUCCESS);
     assert(wolfSSL_CTX_trust_peer_cert(ctx, bogusFile,
                                               SSL_FILETYPE_PEM) != SSL_SUCCESS);
-    assert(wolfSSL_CTX_trust_peer_cert(ctx, cliCert,
+    assert(wolfSSL_CTX_trust_peer_cert(ctx, cliCertFile,
                                              SSL_FILETYPE_ASN1) != SSL_SUCCESS);
 
     /* success */
-    assert(wolfSSL_CTX_trust_peer_cert(ctx, cliCert, SSL_FILETYPE_PEM)
+    assert(wolfSSL_CTX_trust_peer_cert(ctx, cliCertFile, SSL_FILETYPE_PEM)
                                                                 == SSL_SUCCESS);
 
     /* unload cert */
@@ -364,7 +370,7 @@ static void test_wolfSSL_CTX_load_verify_locations(void)
     AssertNotNull(ctx = wolfSSL_CTX_new(wolfSSLv23_client_method()));
 
     /* invalid context */
-    AssertFalse(wolfSSL_CTX_load_verify_locations(NULL, caCert, 0));
+    AssertFalse(wolfSSL_CTX_load_verify_locations(NULL, caCertFile, 0));
 
     /* invalid ca file */
     AssertFalse(wolfSSL_CTX_load_verify_locations(ctx, NULL,      0));
@@ -373,11 +379,11 @@ static void test_wolfSSL_CTX_load_verify_locations(void)
 #ifndef WOLFSSL_TIRTOS
     /* invalid path */
     /* not working... investigate! */
-    /* AssertFalse(wolfSSL_CTX_load_verify_locations(ctx, caCert, bogusFile)); */
+    /* AssertFalse(wolfSSL_CTX_load_verify_locations(ctx, caCertFile, bogusFile)); */
 #endif
 
     /* success */
-    AssertTrue(wolfSSL_CTX_load_verify_locations(ctx, caCert, 0));
+    AssertTrue(wolfSSL_CTX_load_verify_locations(ctx, caCertFile, 0));
 
     wolfSSL_CTX_free(ctx);
 #endif
@@ -392,16 +398,16 @@ static void test_wolfSSL_CTX_SetTmpDH_file(void)
 
     /* invalid context */
     AssertIntNE(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_file(NULL,
-                dhParam, SSL_FILETYPE_PEM));
+                dhParamFile, SSL_FILETYPE_PEM));
 
-    /* invalid dhParam file */
+    /* invalid dhParamFile file */
     AssertIntNE(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_file(ctx,
                 NULL, SSL_FILETYPE_PEM));
     AssertIntNE(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_file(ctx,
                 bogusFile, SSL_FILETYPE_PEM));
 
     /* success */
-    AssertIntEQ(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_file(ctx, dhParam,
+    AssertIntEQ(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_file(ctx, dhParamFile,
                 SSL_FILETYPE_PEM));
 
     wolfSSL_CTX_free(ctx);
@@ -419,7 +425,7 @@ static void test_wolfSSL_CTX_SetTmpDH_buffer(void)
     AssertIntNE(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_buffer(NULL, dh_key_der_2048,
                 sizeof_dh_key_der_2048, SSL_FILETYPE_ASN1));
 
-    /* invalid dhParam file */
+    /* invalid dhParamFile file */
     AssertIntNE(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_buffer(NULL, NULL,
                 0, SSL_FILETYPE_ASN1));
     AssertIntNE(SSL_SUCCESS, wolfSSL_CTX_SetTmpDH_buffer(ctx, dsa_key_der_2048,
@@ -447,8 +453,8 @@ static void test_server_wolfSSL_new(void)
     AssertNotNull(ctx_nocert = wolfSSL_CTX_new(wolfSSLv23_server_method()));
     AssertNotNull(ctx        = wolfSSL_CTX_new(wolfSSLv23_server_method()));
 
-    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
-    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
+    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
 
     /* invalid context */
     AssertNull(ssl = wolfSSL_new(NULL));
@@ -476,7 +482,7 @@ static void test_client_wolfSSL_new(void)
     AssertNotNull(ctx_nocert = wolfSSL_CTX_new(wolfSSLv23_client_method()));
     AssertNotNull(ctx        = wolfSSL_CTX_new(wolfSSLv23_client_method()));
 
-    AssertTrue(wolfSSL_CTX_load_verify_locations(ctx, caCert, 0));
+    AssertTrue(wolfSSL_CTX_load_verify_locations(ctx, caCertFile, 0));
 
     /* invalid context */
     AssertNull(ssl = wolfSSL_new(NULL));
@@ -502,30 +508,30 @@ static void test_wolfSSL_SetTmpDH_file(void)
 
     AssertNotNull(ctx = wolfSSL_CTX_new(wolfSSLv23_server_method()));
 #ifndef NO_RSA
-    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, svrCert,
+    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, svrCertFile,
                 SSL_FILETYPE_PEM));
-    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey,
+    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile,
                 SSL_FILETYPE_PEM));
 #else
-    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, eccCert,
+    AssertTrue(wolfSSL_CTX_use_certificate_file(ctx, eccCertFile,
                 SSL_FILETYPE_PEM));
-    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, eccKey,
+    AssertTrue(wolfSSL_CTX_use_PrivateKey_file(ctx, eccKeyFile,
                 SSL_FILETYPE_PEM));
 #endif
     AssertNotNull(ssl = wolfSSL_new(ctx));
 
     /* invalid ssl */
     AssertIntNE(SSL_SUCCESS, wolfSSL_SetTmpDH_file(NULL,
-                dhParam, SSL_FILETYPE_PEM));
+                dhParamFile, SSL_FILETYPE_PEM));
 
-    /* invalid dhParam file */
+    /* invalid dhParamFile file */
     AssertIntNE(SSL_SUCCESS, wolfSSL_SetTmpDH_file(ssl,
                 NULL, SSL_FILETYPE_PEM));
     AssertIntNE(SSL_SUCCESS, wolfSSL_SetTmpDH_file(ssl,
                 bogusFile, SSL_FILETYPE_PEM));
 
     /* success */
-    AssertIntEQ(SSL_SUCCESS, wolfSSL_SetTmpDH_file(ssl, dhParam,
+    AssertIntEQ(SSL_SUCCESS, wolfSSL_SetTmpDH_file(ssl, dhParamFile,
                 SSL_FILETYPE_PEM));
 
     wolfSSL_free(ssl);
@@ -550,7 +556,7 @@ static void test_wolfSSL_SetTmpDH_buffer(void)
     AssertIntNE(SSL_SUCCESS, wolfSSL_SetTmpDH_buffer(NULL, dh_key_der_2048,
                 sizeof_dh_key_der_2048, SSL_FILETYPE_ASN1));
 
-    /* invalid dhParam file */
+    /* invalid dhParamFile file */
     AssertIntNE(SSL_SUCCESS, wolfSSL_SetTmpDH_buffer(NULL, NULL,
                 0, SSL_FILETYPE_ASN1));
     AssertIntNE(SSL_SUCCESS, wolfSSL_SetTmpDH_buffer(ssl, dsa_key_der_2048,
@@ -692,19 +698,19 @@ static THREAD_RETURN WOLFSSL_THREAD test_server_nofail(void* args)
     wolfSSL_CTX_set_default_passwd_cb(ctx, PasswordCallBack);
 #endif
 
-    if (wolfSSL_CTX_load_verify_locations(ctx, cliCert, 0) != SSL_SUCCESS)
+    if (wolfSSL_CTX_load_verify_locations(ctx, cliCertFile, 0) != SSL_SUCCESS)
     {
         /*err_sys("can't load ca file, Please run from wolfSSL home dir");*/
         goto done;
     }
-    if (wolfSSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM)
+    if (wolfSSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM)
             != SSL_SUCCESS)
     {
         /*err_sys("can't load server cert chain file, "
                 "Please run from wolfSSL home dir");*/
         goto done;
     }
-    if (wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM)
+    if (wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM)
             != SSL_SUCCESS)
     {
         /*err_sys("can't load server key file, "
@@ -723,7 +729,7 @@ static THREAD_RETURN WOLFSSL_THREAD test_server_nofail(void* args)
 
 #ifdef NO_PSK
     #if !defined(NO_FILESYSTEM) && !defined(NO_DH)
-        wolfSSL_SetTmpDH_file(ssl, dhParam, SSL_FILETYPE_PEM);
+        wolfSSL_SetTmpDH_file(ssl, dhParamFile, SSL_FILETYPE_PEM);
     #elif !defined(NO_DH)
         SetDH(ssl);  /* will repick suites with DHE, higher priority than PSK */
     #endif
@@ -826,19 +832,19 @@ static void test_client_nofail(void* args)
     wolfSSL_CTX_set_default_passwd_cb(ctx, PasswordCallBack);
 #endif
 
-    if (wolfSSL_CTX_load_verify_locations(ctx, caCert, 0) != SSL_SUCCESS)
+    if (wolfSSL_CTX_load_verify_locations(ctx, caCertFile, 0) != SSL_SUCCESS)
     {
         /* err_sys("can't load ca file, Please run from wolfSSL home dir");*/
         goto done2;
     }
-    if (wolfSSL_CTX_use_certificate_file(ctx, cliCert, SSL_FILETYPE_PEM)
+    if (wolfSSL_CTX_use_certificate_file(ctx, cliCertFile, SSL_FILETYPE_PEM)
             != SSL_SUCCESS)
     {
         /*err_sys("can't load client cert file, "
                 "Please run from wolfSSL home dir");*/
         goto done2;
     }
-    if (wolfSSL_CTX_use_PrivateKey_file(ctx, cliKey, SSL_FILETYPE_PEM)
+    if (wolfSSL_CTX_use_PrivateKey_file(ctx, cliKeyFile, SSL_FILETYPE_PEM)
             != SSL_SUCCESS)
     {
         /*err_sys("can't load client key file, "
@@ -949,13 +955,13 @@ static THREAD_RETURN WOLFSSL_THREAD run_wolfssl_server(void* args)
 #endif
 
 
-    AssertIntEQ(SSL_SUCCESS, wolfSSL_CTX_load_verify_locations(ctx, cliCert, 0));
+    AssertIntEQ(SSL_SUCCESS, wolfSSL_CTX_load_verify_locations(ctx, cliCertFile, 0));
 
     AssertIntEQ(SSL_SUCCESS,
-               wolfSSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
+               wolfSSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
 
     AssertIntEQ(SSL_SUCCESS,
-                 wolfSSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+                 wolfSSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
 
     if (callbacks->ctx_ready)
         callbacks->ctx_ready(ctx);
@@ -981,7 +987,7 @@ static THREAD_RETURN WOLFSSL_THREAD run_wolfssl_server(void* args)
 
 #ifdef NO_PSK
     #if !defined(NO_FILESYSTEM) && !defined(NO_DH)
-        wolfSSL_SetTmpDH_file(ssl, dhParam, SSL_FILETYPE_PEM);
+        wolfSSL_SetTmpDH_file(ssl, dhParamFile, SSL_FILETYPE_PEM);
     #elif !defined(NO_DH)
         SetDH(ssl);  /* will repick suites with DHE, higher priority than PSK */
     #endif
@@ -1085,13 +1091,13 @@ static void run_wolfssl_client(void* args)
     wolfSSL_CTX_set_default_passwd_cb(ctx, PasswordCallBack);
 #endif
 
-    AssertIntEQ(SSL_SUCCESS, wolfSSL_CTX_load_verify_locations(ctx, caCert, 0));
+    AssertIntEQ(SSL_SUCCESS, wolfSSL_CTX_load_verify_locations(ctx, caCertFile, 0));
 
     AssertIntEQ(SSL_SUCCESS,
-               wolfSSL_CTX_use_certificate_file(ctx, cliCert, SSL_FILETYPE_PEM));
+               wolfSSL_CTX_use_certificate_file(ctx, cliCertFile, SSL_FILETYPE_PEM));
 
     AssertIntEQ(SSL_SUCCESS,
-                 wolfSSL_CTX_use_PrivateKey_file(ctx, cliKey, SSL_FILETYPE_PEM));
+                 wolfSSL_CTX_use_PrivateKey_file(ctx, cliKeyFile, SSL_FILETYPE_PEM));
 
     if (callbacks->ctx_ready)
         callbacks->ctx_ready(ctx);
@@ -2038,7 +2044,7 @@ static void test_wolfSSL_X509_NAME_get_entry(void)
         int idx;
 
     #ifndef NO_FILESYSTEM
-        x509 = wolfSSL_X509_load_certificate_file(cliCert, SSL_FILETYPE_PEM);
+        x509 = wolfSSL_X509_load_certificate_file(cliCertFile, SSL_FILETYPE_PEM);
         AssertNotNull(x509);
 
         name = X509_get_subject_name(x509);
@@ -3368,8 +3374,8 @@ static void test_wolfSSL_certs(void)
     printf(testingFmt, "wolfSSL_certs()");
 
     AssertNotNull(ctx = SSL_CTX_new(wolfSSLv23_server_method()));
-    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
-    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
     AssertNotNull(ssl = SSL_new(ctx));
 
     AssertIntEQ(wolfSSL_check_private_key(ssl), SSL_SUCCESS);
@@ -3379,7 +3385,7 @@ static void test_wolfSSL_certs(void)
     #endif /* HAVE_PK_CALLBACKS */
 
     /* create and use x509 */
-    x509 = wolfSSL_X509_load_certificate_file(cliCert, SSL_FILETYPE_PEM);
+    x509 = wolfSSL_X509_load_certificate_file(cliCertFile, SSL_FILETYPE_PEM);
     AssertNotNull(x509);
     AssertIntEQ(SSL_use_certificate(ssl, x509), SSL_SUCCESS);
 
@@ -3535,8 +3541,8 @@ static void test_wolfSSL_private_keys(void)
     OpenSSL_add_all_algorithms();
 
     AssertNotNull(ctx = SSL_CTX_new(wolfSSLv23_server_method()));
-    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
-    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
     AssertNotNull(ssl = SSL_new(ctx));
 
     AssertIntEQ(wolfSSL_check_private_key(ssl), SSL_SUCCESS);
@@ -3629,8 +3635,8 @@ static void test_wolfSSL_tmp_dh(void)
     printf(testingFmt, "wolfSSL_tmp_dh()");
 
     AssertNotNull(ctx = SSL_CTX_new(wolfSSLv23_server_method()));
-    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
-    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
     AssertNotNull(ssl = SSL_new(ctx));
 
     f = fopen(file, "rb");
@@ -3727,7 +3733,9 @@ static void test_wolfSSL_ERR_peek_last_error_line(void)
     tcp_ready ready;
     func_args client_args;
     func_args server_args;
+#ifndef SINGLE_THREADED
     THREAD_TYPE serverThread;
+#endif
     callback_functions client_cb;
     callback_functions server_cb;
     int         line = 0;
@@ -3753,10 +3761,12 @@ static void test_wolfSSL_ERR_peek_last_error_line(void)
     client_args.signal    = &ready;
     client_args.callbacks = &client_cb;
 
+#ifndef SINGLE_THREADED
     start_thread(test_server_nofail, &server_args, &serverThread);
     wait_tcp_ready(&server_args);
     test_client_nofail(&client_args);
     join_thread(serverThread);
+#endif
 
     FreeTcpReady(&ready);
 
@@ -3793,7 +3803,7 @@ static void test_wolfSSL_X509_STORE_set_flags(void)
     printf(testingFmt, "wolfSSL_ERR_peek_last_error_line()");
     AssertNotNull((store = wolfSSL_X509_STORE_new()));
     AssertNotNull((x509 =
-                wolfSSL_X509_load_certificate_file(svrCert, SSL_FILETYPE_PEM)));
+                wolfSSL_X509_load_certificate_file(svrCertFile, SSL_FILETYPE_PEM)));
     AssertIntEQ(X509_STORE_add_cert(store, x509), SSL_SUCCESS);
 
 #ifdef HAVE_CRL
@@ -3872,8 +3882,8 @@ static void test_wolfSSL_set_options(void)
     printf(testingFmt, "wolfSSL_set_options()");
 
     AssertNotNull(ctx = SSL_CTX_new(wolfSSLv23_server_method()));
-    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCert, SSL_FILETYPE_PEM));
-    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKey, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_certificate_file(ctx, svrCertFile, SSL_FILETYPE_PEM));
+    AssertTrue(SSL_CTX_use_PrivateKey_file(ctx, svrKeyFile, SSL_FILETYPE_PEM));
     AssertNotNull(ssl = SSL_new(ctx));
 
     AssertTrue(SSL_set_options(ssl, SSL_OP_NO_TLSv1) == SSL_OP_NO_TLSv1);
@@ -3909,7 +3919,7 @@ static void test_wolfSSL_PEM_read_bio(void)
 
     printf(testingFmt, "wolfSSL_PEM_read_bio()");
 
-    AssertNotNull(f = fopen(cliCert, "rb"));
+    AssertNotNull(f = fopen(cliCertFile, "rb"));
     bytes = (int)fread(buffer, 1, sizeof(buffer), f);
     fclose(f);
 
@@ -4076,7 +4086,7 @@ static void test_wolfSSL_BIO(void)
         AssertIntEQ((int)BIO_set_mem_eof_return(f_bio1, -1), 0);
         AssertIntEQ((int)BIO_set_mem_eof_return(NULL, -1),   0);
 
-        f1 = XFOPEN(svrCert, "rwb");
+        f1 = XFOPEN(svrCertFile, "rwb");
         AssertIntEQ((int)BIO_set_fp(f_bio1, f1, BIO_CLOSE), SSL_SUCCESS);
         AssertIntEQ(BIO_write_filename(f_bio2, testFile),
                 SSL_SUCCESS);
@@ -4197,6 +4207,207 @@ static void test_wc_ecc_get_curve_id_from_name(void)
 #endif /* HAVE_ECC */
 }
 
+static void test_wc_ecc_get_curve_id_from_params(void)
+{
+#ifdef HAVE_ECC
+    int id;
+
+    const byte prime[] =
+    {
+        0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x01,
+        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF
+    };
+
+    const byte primeInvalid[] =
+    {
+        0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x01,
+        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x01,0x01
+    };
+
+    const byte Af[] =
+    {
+        0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x01,
+        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x00,0xFF,0xFF,0xFF,0xFF,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFC
+    };
+
+    const byte Bf[] =
+    {
+        0x5A,0xC6,0x35,0xD8,0xAA,0x3A,0x93,0xE7,
+        0xB3,0xEB,0xBD,0x55,0x76,0x98,0x86,0xBC,
+        0x65,0x1D,0x06,0xB0,0xCC,0x53,0xB0,0xF6,
+        0x3B,0xCE,0x3C,0x3E,0x27,0xD2,0x60,0x4B
+    };
+
+    const byte order[] =
+    {
+        0xFF,0xFF,0xFF,0xFF,0x00,0x00,0x00,0x00,
+        0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
+        0xBC,0xE6,0xFA,0xAD,0xA7,0x17,0x9E,0x84,
+        0xF3,0xB9,0xCA,0xC2,0xFC,0x63,0x25,0x51
+    };
+
+    const byte Gx[] =
+    {
+        0x6B,0x17,0xD1,0xF2,0xE1,0x2C,0x42,0x47,
+        0xF8,0xBC,0xE6,0xE5,0x63,0xA4,0x40,0xF2,
+        0x77,0x03,0x7D,0x81,0x2D,0xEB,0x33,0xA0,
+        0xF4,0xA1,0x39,0x45,0xD8,0x98,0xC2,0x96
+    };
+
+    const byte Gy[] =
+    {
+        0x4F,0xE3,0x42,0xE2,0xFE,0x1A,0x7F,0x9B,
+        0x8E,0xE7,0xEB,0x4A,0x7C,0x0F,0x9E,0x16,
+        0x2B,0xCE,0x33,0x57,0x6B,0x31,0x5E,0xCE,
+        0xCB,0xB6,0x40,0x68,0x37,0xBF,0x51,0xF5
+    };
+
+    int cofactor = 1;
+    int fieldSize = 256;
+
+    printf(testingFmt, "wc_ecc_get_curve_id_from_params");
+
+    #if !defined(NO_ECC256) && !defined(NO_ECC_SECP)
+        id = wc_ecc_get_curve_id_from_params(fieldSize, prime, sizeof(prime),
+                Af, sizeof(Af), Bf, sizeof(Bf), order, sizeof(order),
+                Gx, sizeof(Gx), Gy, sizeof(Gy), cofactor);
+        AssertIntEQ(id, ECC_SECP256R1);
+    #endif
+
+    /* invalid case, fieldSize = 0 */
+    id = wc_ecc_get_curve_id_from_params(0, prime, sizeof(prime),
+            Af, sizeof(Af), Bf, sizeof(Bf), order, sizeof(order),
+            Gx, sizeof(Gx), Gy, sizeof(Gy), cofactor);
+    AssertIntEQ(id, ECC_CURVE_INVALID);
+
+    /* invalid case, NULL prime */
+    id = wc_ecc_get_curve_id_from_params(fieldSize, NULL, sizeof(prime),
+            Af, sizeof(Af), Bf, sizeof(Bf), order, sizeof(order),
+            Gx, sizeof(Gx), Gy, sizeof(Gy), cofactor);
+    AssertIntEQ(id, BAD_FUNC_ARG);
+
+    /* invalid case, invalid prime */
+    id = wc_ecc_get_curve_id_from_params(fieldSize,
+            primeInvalid, sizeof(primeInvalid),
+            Af, sizeof(Af), Bf, sizeof(Bf), order, sizeof(order),
+            Gx, sizeof(Gx), Gy, sizeof(Gy), cofactor);
+    AssertIntEQ(id, ECC_CURVE_INVALID);
+
+    printf(resultFmt, passed);
+#endif
+}
+
+
+/*----------------------------------------------------------------------------*
+ | Certficate Failure Checks
+ *----------------------------------------------------------------------------*/
+#ifndef NO_CERTS
+    /* Use the Cert Manager(CM) API to generate the error ASN_SIG_CONFIRM_E */
+    static int verify_sig_cm(const char* ca, byte* cert_buf, size_t cert_sz,
+        int type)
+    {
+        int ret;
+        WOLFSSL_CERT_MANAGER* cm = NULL;
+
+        switch (type) {
+            case TESTING_RSA:
+            #ifdef NO_RSA
+                printf("RSA disabled, skipping test\n");
+                return ASN_SIG_CONFIRM_E;
+            #else
+                break;
+            #endif
+            case TESTING_ECC:
+            #ifndef HAVE_ECC
+                printf("ECC disabled, skipping test\n");
+                return ASN_SIG_CONFIRM_E;
+            #else
+                break;
+            #endif
+            default:
+                printf("Bad function argument\n");
+                return BAD_FUNC_ARG;
+        }
+        cm = wolfSSL_CertManagerNew();
+        if (cm == NULL) {
+            printf("wolfSSL_CertManagerNew failed\n");
+            return -1;
+        }
+
+        ret = wolfSSL_CertManagerLoadCA(cm, ca, 0);
+        if (ret != SSL_SUCCESS) {
+            printf("wolfSSL_CertManagerLoadCA failed\n");
+            wolfSSL_CertManagerFree(cm);
+            return ret;
+        }
+
+        ret = wolfSSL_CertManagerVerifyBuffer(cm, cert_buf, cert_sz, SSL_FILETYPE_ASN1);
+        /* Let AssertIntEQ handle return code */
+
+        wolfSSL_CertManagerFree(cm);
+
+        return ret;
+    }
+
+    static int test_RsaSigFailure_cm(void)
+    {
+        int ret = 0;
+        const char* ca_cert = "./certs/ca-cert.pem";
+        const char* server_cert = "./certs/server-cert.der";
+        byte* cert_buf = NULL;
+        size_t cert_sz = 0;
+
+        ret = load_file(server_cert, &cert_buf, &cert_sz);
+        if (ret == 0) {
+            /* corrupt DER - invert last byte, which is signature */
+            cert_buf[cert_sz-1] = ~cert_buf[cert_sz-1];
+
+            /* test bad cert */
+            ret = verify_sig_cm(ca_cert, cert_buf, cert_sz, TESTING_RSA);
+        }
+
+        printf("Signature failure test: RSA: Ret %d\n", ret);
+
+        if (cert_buf)
+            free(cert_buf);
+
+        return ret;
+    }
+
+    static int test_EccSigFailure_cm(void)
+    {
+        int ret = 0;
+        /* self-signed ECC cert, so use server cert as CA */
+        const char* ca_cert = "./certs/server-ecc.pem";
+        const char* server_cert = "./certs/server-ecc.der";
+        byte* cert_buf = NULL;
+        size_t cert_sz = 0;
+
+        ret = load_file(server_cert, &cert_buf, &cert_sz);
+        if (ret == 0) {
+            /* corrupt DER - invert last byte, which is signature */
+            cert_buf[cert_sz-1] = ~cert_buf[cert_sz-1];
+
+            /* test bad cert */
+            ret = verify_sig_cm(ca_cert, cert_buf, cert_sz, TESTING_ECC);
+        }
+
+        printf("Signature failure test: ECC: Ret %d\n", ret);
+
+        if (cert_buf)
+            free(cert_buf);
+
+        return ret;
+    }
+
+#endif /* NO_CERTS */
+
 
 /*----------------------------------------------------------------------------*
  | Main
@@ -4264,6 +4475,13 @@ void ApiTest(void)
     /* wolfCrypt ECC tests */
     test_wc_ecc_get_curve_size_from_name();
     test_wc_ecc_get_curve_id_from_name();
+    test_wc_ecc_get_curve_id_from_params();
+
+#ifndef NO_CERTS
+    /* Bad certificate signature tests */
+    AssertIntEQ(test_EccSigFailure_cm(), ASN_SIG_CONFIRM_E);
+    AssertIntEQ(test_RsaSigFailure_cm(), ASN_SIG_CONFIRM_E);
+#endif /* NO_CERTS */
 
     /*wolfcrypt */
     printf("\n-----------------wolfcrypt unit tests------------------\n");
diff --git a/tests/hash.c b/tests/hash.c
index c32e34efc..0407af600 100644
--- a/tests/hash.c
+++ b/tests/hash.c
@@ -685,6 +685,10 @@ int hmac_md5_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -705,6 +709,8 @@ int hmac_md5_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -755,6 +761,10 @@ int hmac_sha_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -775,6 +785,8 @@ int hmac_sha_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -825,6 +837,10 @@ int hmac_sha224_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS) || defined(HAVE_CAVIUM)
         if (i == 1)
@@ -843,11 +859,10 @@ int hmac_sha224_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA224_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -902,6 +917,10 @@ int hmac_sha256_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -922,6 +941,8 @@ int hmac_sha256_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -979,6 +1000,10 @@ int hmac_sha384_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -999,6 +1024,8 @@ int hmac_sha384_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
diff --git a/tests/suites.c b/tests/suites.c
index 2028bca51..443573cc3 100644
--- a/tests/suites.c
+++ b/tests/suites.c
@@ -35,7 +35,7 @@
 
 #define MAX_ARGS 40
 #define MAX_COMMAND_SZ 240
-#define MAX_SUITE_SZ 80 
+#define MAX_SUITE_SZ 80
 #define NOT_BUILT_IN -123
 #if defined(NO_OLD_TLS) || !defined(WOLFSSL_ALLOW_SSLV3)
     #define VERSION_TOO_OLD -124
@@ -54,6 +54,11 @@ static char flagSep[] = " ";
     static char portFlag[] = "-p";
     static char svrPort[] = "0";
 #endif
+static char forceDefCipherListFlag[] = "-H";
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    static int devId = INVALID_DEVID;
+#endif
 
 
 #ifndef WOLFSSL_ALLOW_SSLV3
@@ -155,7 +160,8 @@ static int IsValidCipherSuite(const char* line, char* suite)
 static int execute_test_case(int svr_argc, char** svr_argv,
                               int cli_argc, char** cli_argv,
                               int addNoVerify, int addNonBlocking,
-                              int addDisableEMS)
+                              int addDisableEMS, int forceSrvDefCipherList,
+                              int forceCliDefCipherList)
 {
 #ifdef WOLFSSL_TIRTOS
     func_args cliArgs = {0};
@@ -174,20 +180,22 @@ static int execute_test_case(int svr_argc, char** svr_argv,
     char        commandLine[MAX_COMMAND_SZ];
     char        cipherSuite[MAX_SUITE_SZ+1];
     int         i;
-    size_t      added = 0;
+    size_t      added;
     static      int tests = 1;
 
+    /* Is Valid Cipher and Version Checks */
+    /* build command list for the Is checks below */
     commandLine[0] = '\0';
-    for (i = 0; i < svr_argc; i++) {
+    added = 0;
+    for (i = 0; i < svrArgs.argc; i++) {
         added += XSTRLEN(svr_argv[i]) + 2;
         if (added >= MAX_COMMAND_SZ) {
-            printf("server command line too long\n"); 
+            printf("server command line too long\n");
             break;
         }
         strcat(commandLine, svr_argv[i]);
         strcat(commandLine, flagSep);
     }
-
     if (IsValidCipherSuite(commandLine, cipherSuite) == 0) {
         #ifdef DEBUG_SUITE_TESTS
             printf("cipher suite %s not supported in build\n", cipherSuite);
@@ -203,7 +211,6 @@ static int execute_test_case(int svr_argc, char** svr_argv,
         return VERSION_TOO_OLD;
     }
 #endif
-
 #ifdef NO_OLD_TLS
     if (IsOldTlsVersion(commandLine) == 1) {
         #ifdef DEBUG_SUITE_TESTS
@@ -213,78 +220,52 @@ static int execute_test_case(int svr_argc, char** svr_argv,
     }
 #endif
 
+    /* Build Client Command */
     if (addNoVerify) {
-        printf("repeating test with client cert request off\n"); 
-        added += 4;   /* -d plus space plus terminator */
-        if (added >= MAX_COMMAND_SZ || svr_argc >= MAX_ARGS)
+        printf("repeating test with client cert request off\n");
+        if (svrArgs.argc >= MAX_ARGS)
             printf("server command line too long\n");
-        else {
-            svr_argv[svr_argc++] = noVerifyFlag;
-            svrArgs.argc = svr_argc;
-            strcat(commandLine, noVerifyFlag);
-            strcat(commandLine, flagSep);
-        }
+        else
+            svr_argv[svrArgs.argc++] = noVerifyFlag;
     }
     if (addNonBlocking) {
-        printf("repeating test with non blocking on\n"); 
-        added += 4;   /* -N plus terminator */
-        if (added >= MAX_COMMAND_SZ || svr_argc >= MAX_ARGS)
+        printf("repeating test with non blocking on\n");
+        if (svrArgs.argc >= MAX_ARGS)
             printf("server command line too long\n");
-        else {
-            svr_argv[svr_argc++] = nonblockFlag;
-            svrArgs.argc = svr_argc;
-            strcat(commandLine, nonblockFlag);
-            strcat(commandLine, flagSep);
-        }
+        else
+            svr_argv[svrArgs.argc++] = nonblockFlag;
     }
     #if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_TIRTOS)
-        /* add port 0 */
-        if (svr_argc + 2 > MAX_ARGS)
+        /* add port */
+        if (svrArgs.argc + 2 > MAX_ARGS)
             printf("cannot add the magic port number flag to server\n");
-        else
-        {
-            svr_argv[svr_argc++] = portFlag;
-            svr_argv[svr_argc++] = svrPort;
-            svrArgs.argc = svr_argc;
+        else {
+            svr_argv[svrArgs.argc++] = portFlag;
+            svr_argv[svrArgs.argc++] = svrPort;
         }
     #endif
-    printf("trying server command line[%d]: %s\n", tests, commandLine);
+    if (forceSrvDefCipherList) {
+        if (svrArgs.argc >= MAX_ARGS)
+            printf("cannot add the force def cipher list flag to server\n");
+        else
+            svr_argv[svrArgs.argc++] = forceDefCipherListFlag;
+    }
 
+    /* update server flags list */
     commandLine[0] = '\0';
     added = 0;
-    for (i = 0; i < cli_argc; i++) {
-        added += XSTRLEN(cli_argv[i]) + 2;
+    for (i = 0; i < svrArgs.argc; i++) {
+        added += XSTRLEN(svr_argv[i]) + 2;
         if (added >= MAX_COMMAND_SZ) {
-            printf("client command line too long\n"); 
+            printf("server command line too long\n");
             break;
         }
-        strcat(commandLine, cli_argv[i]);
+        strcat(commandLine, svr_argv[i]);
         strcat(commandLine, flagSep);
     }
-    if (addNonBlocking) {
-        added += 4;   /* -N plus space plus terminator  */
-        if (added >= MAX_COMMAND_SZ)
-            printf("client command line too long\n");
-        else  {
-            cli_argv[cli_argc++] = nonblockFlag;
-            strcat(commandLine, nonblockFlag);
-            strcat(commandLine, flagSep);
-            cliArgs.argc = cli_argc;
-        }
-    }
-    if (addDisableEMS) {
-        printf("repeating test without extended master secret\n");
-        added += 4;   /* -n plus terminator */
-        if (added >= MAX_COMMAND_SZ)
-            printf("client command line too long\n");
-        else {
-            cli_argv[cli_argc++] = disableEMSFlag;
-            strcat(commandLine, disableEMSFlag);
-            strcat(commandLine, flagSep);
-            cliArgs.argc = cli_argc;
-        }
-    }
-    printf("trying client command line[%d]: %s\n", tests++, commandLine);
+    printf("trying server command line[%d]: %s\n", tests, commandLine);
+
+    tests++; /* test count */
 
     InitTcpReady(&ready);
 
@@ -296,31 +277,65 @@ static int execute_test_case(int svr_argc, char** svr_argv,
     svrArgs.signal = &ready;
     start_thread(server_test, &svrArgs, &serverThread);
     wait_tcp_ready(&svrArgs);
-    #if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_TIRTOS)
-        if (ready.port != 0)
-        {
-            if (cli_argc + 2 > MAX_ARGS)
-                printf("cannot add the magic port number flag to client\n");
-            else {
-                char portNumber[8];
-                snprintf(portNumber, sizeof(portNumber), "%d", ready.port);
-                cli_argv[cli_argc++] = portFlag;
-                cli_argv[cli_argc++] = portNumber;
-                cliArgs.argc = cli_argc;
-            }
+
+
+    /* Build Client Command */
+    if (addNonBlocking) {
+        if (cliArgs.argc >= MAX_ARGS)
+            printf("cannot add the non block flag to client\n");
+        else
+            cli_argv[cliArgs.argc++] = nonblockFlag;
+    }
+    if (addDisableEMS) {
+        printf("repeating test without extended master secret\n");
+        if (cliArgs.argc >= MAX_ARGS)
+            printf("cannot add the disable EMS flag to client\n");
+        else
+            cli_argv[cliArgs.argc++] = disableEMSFlag;
+    }
+#if !defined(USE_WINDOWS_API) && !defined(WOLFSSL_TIRTOS)
+    if (ready.port != 0) {
+        if (cliArgs.argc + 2 > MAX_ARGS)
+            printf("cannot add the magic port number flag to client\n");
+        else {
+            char portNumber[8];
+            snprintf(portNumber, sizeof(portNumber), "%d", ready.port);
+            cli_argv[cliArgs.argc++] = portFlag;
+            cli_argv[cliArgs.argc++] = portNumber;
         }
-    #endif
+    }
+#endif
+    if (forceCliDefCipherList) {
+        if (cliArgs.argc >= MAX_ARGS)
+            printf("cannot add the force def cipher list flag to client\n");
+        else
+            cli_argv[cliArgs.argc++] = forceDefCipherListFlag;
+    }
+
+    commandLine[0] = '\0';
+    added = 0;
+    for (i = 0; i < cliArgs.argc; i++) {
+        added += XSTRLEN(cli_argv[i]) + 2;
+        if (added >= MAX_COMMAND_SZ) {
+            printf("client command line too long\n");
+            break;
+        }
+        strcat(commandLine, cli_argv[i]);
+        strcat(commandLine, flagSep);
+    }
+    printf("trying client command line[%d]: %s\n", tests, commandLine);
+
     /* start client */
     client_test(&cliArgs);
 
-    /* verify results */ 
+    /* verify results */
     if (cliArgs.return_code != 0) {
         printf("client_test failed\n");
         exit(EXIT_FAILURE);
     }
 
     join_thread(serverThread);
-    if (svrArgs.return_code != 0) { 
+    if (svrArgs.return_code != 0) {
         printf("server_test failed\n");
         exit(EXIT_FAILURE);
     }
@@ -329,7 +344,7 @@ static int execute_test_case(int svr_argc, char** svr_argv,
     fdCloseSession(Task_self());
 #endif
     FreeTcpReady(&ready);
-    
+
     return 0;
 }
 
@@ -393,7 +408,7 @@ static void test_harness(void* vargs)
         args->return_code = 1;
         return;
     }
-    
+
     fclose(file);
     script[sz] = 0;
 
@@ -442,7 +457,7 @@ static void test_harness(void* vargs)
                 else
                     svrArgs[svrArgsSz++] = strsep(&cursor, "\n");
                 if (*cursor == 0)  /* eof */
-                    do_it = 1; 
+                    do_it = 1;
         }
 
         if (svrArgsSz == MAX_ARGS || cliArgsSz == MAX_ARGS) {
@@ -452,24 +467,31 @@ static void test_harness(void* vargs)
 
         if (do_it) {
             ret = execute_test_case(svrArgsSz, svrArgs,
-                                    cliArgsSz, cliArgs, 0, 0, 0);
+                                    cliArgsSz, cliArgs, 0, 0, 0, 0, 0);
             /* don't repeat if not supported in build */
             if (ret == 0) {
+                /* test with default cipher list on server side */
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 0, 1, 0);
+                                  cliArgsSz, cliArgs, 0, 0, 0, 1, 0);
+                /* test with default cipher list on client side */
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 1, 0, 0);
+                                  cliArgsSz, cliArgs, 0, 0, 0, 0, 1);
+
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 1, 1, 0);
+                                  cliArgsSz, cliArgs, 0, 1, 0, 0, 0);
+                execute_test_case(svrArgsSz, svrArgs,
+                                  cliArgsSz, cliArgs, 1, 0, 0, 0, 0);
+                execute_test_case(svrArgsSz, svrArgs,
+                                  cliArgsSz, cliArgs, 1, 1, 0, 0, 0);
 #ifdef HAVE_EXTENDED_MASTER
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 0, 0, 1);
+                                  cliArgsSz, cliArgs, 0, 0, 1, 0, 0);
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 0, 1, 1);
+                                  cliArgsSz, cliArgs, 0, 1, 1, 0, 0);
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 1, 0, 1);
+                                  cliArgsSz, cliArgs, 1, 0, 1, 0, 0);
                 execute_test_case(svrArgsSz, svrArgs,
-                                  cliArgsSz, cliArgs, 1, 1, 1);
+                                  cliArgsSz, cliArgs, 1, 1, 1, 0, 0);
 #endif
             }
             svrArgsSz = 1;
@@ -515,17 +537,28 @@ int SuiteTest(void)
                                                    memory, sizeof(memory), 0, 1)
             != SSL_SUCCESS) {
         printf("unable to load static memory and create ctx");
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 #endif
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (wolfAsync_DevOpen(&devId) < 0) {
+        printf("Async device open failed");
+        args.return_code = EXIT_FAILURE;
+        goto exit;
+    }
+    wolfSSL_CTX_UseAsync(cipherSuiteCtx, devId);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     /* default case */
     args.argc = 1;
     printf("starting default cipher suite tests\n");
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 
     /* any extra cases will need another argument */
@@ -538,7 +571,8 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 #endif
 #ifdef WOLFSSL_SCTP
@@ -548,7 +582,8 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 #endif
 #ifndef WC_STRICT_SIG
@@ -559,7 +594,8 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 #endif /* HAVE_RSA and HAVE_ECC */
 #endif /* !WC_STRICT_SIG */
@@ -570,7 +606,8 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 #endif
 
@@ -581,15 +618,21 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE;
+        goto exit;
     }
 #endif
 
+exit:
     printf(" End Cipher Suite Tests\n");
 
     wolfSSL_CTX_free(cipherSuiteCtx);
     wolfSSL_Cleanup();
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    wolfAsync_DevClose(&devId);
+#endif
+
     return args.return_code;
 }
 
diff --git a/tests/test-dtls.conf b/tests/test-dtls.conf
index fb4260e62..5bd76c694 100644
--- a/tests/test-dtls.conf
+++ b/tests/test-dtls.conf
@@ -1,36 +1,3 @@
-# server DTLSv1 DHE-RSA-CHACHA20-POLY1305
--u
--v 2
--l DHE-RSA-CHACHA20-POLY1305
-
-# client DTLSv1 DHE-RSA-CHACHA20-POLY1305
--u
--v 2
--l DHE-RSA-CHACHA20-POLY1305
-
-# server DTLSv1 ECDHE-RSA-CHACHA20-POLY1305
--u
--v 2
--l ECDHE-RSA-CHACHA20-POLY1305
-
-# client DTLSv1 ECDHE-RSA-CHACHA20-POLY1305
--u
--v 2
--l ECDHE-RSA-CHACHA20-POLY1305
-
-# server DTLSv1 ECDHE-EDCSA-CHACHA20-POLY1305
--u
--v 2
--l ECDHE-ECDSA-CHACHA20-POLY1305
--c ./certs/server-ecc.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1 ECDHE-ECDSA-CHACHA20-POLY1305
--u
--v 2
--l ECDHE-ECDSA-CHACHA20-POLY1305
--A ./certs/server-ecc.pem
-
 # server DTLSv1.2 DHE-RSA-CHACHA20-POLY1305
 -u
 -v 3
@@ -133,26 +100,6 @@
 -l ECDHE-ECDSA-CHACHA20-POLY1305-OLD
 -A ./certs/server-ecc.pem
 
-# server DTLSv1 RC4-SHA
--u
--v 2
--l RC4-SHA
-
-# client DTLSv1 RC4-SHA
--u
--v 2
--l RC4-SHA
-
-# server DTLSv1.2 RC4-SHA
--u
--v 3
--l RC4-SHA
-
-# client DTLSv1.2 RC4-SHA
--u
--v 3
--l RC4-SHA
-
 # server DTLSv1 IDEA-CBC-SHA
 -u
 -v 2
@@ -263,16 +210,6 @@
 -v 3
 -l AES256-SHA256
 
-# server DTLSv1 ECDHE-RSA-RC4
--u
--v 2
--l ECDHE-RSA-RC4-SHA
-
-# client DTLSv1 ECDHE-RSA-RC4
--u
--v 2
--l ECDHE-RSA-RC4-SHA
-
 # server DTLSv1.1 ECDHE-RSA-DES3
 -u
 -v 2
@@ -283,12 +220,12 @@
 -v 2
 -l ECDHE-RSA-DES-CBC3-SHA
 
-# server DTLSv1.1 ECDHE-RSA-AES128 
+# server DTLSv1.1 ECDHE-RSA-AES128
 -u
 -v 2
 -l ECDHE-RSA-AES128-SHA
 
-# client DTLSv1.1 ECDHE-RSA-AES128 
+# client DTLSv1.1 ECDHE-RSA-AES128
 -u
 -v 2
 -l ECDHE-RSA-AES128-SHA
@@ -303,16 +240,6 @@
 -v 2
 -l ECDHE-RSA-AES256-SHA
 
-# server DTLSv1.2 ECDHE-RSA-RC4
--u
--v 3
--l ECDHE-RSA-RC4-SHA
-
-# client DTLSv1.2 ECDHE-RSA-RC4
--u
--v 3
--l ECDHE-RSA-RC4-SHA
-
 # server DTLSv1.2 ECDHE-RSA-DES3
 -u
 -v 3
@@ -323,12 +250,12 @@
 -v 3
 -l ECDHE-RSA-DES-CBC3-SHA
 
-# server DTLSv1.2 ECDHE-RSA-AES128 
+# server DTLSv1.2 ECDHE-RSA-AES128
 -u
 -v 3
 -l ECDHE-RSA-AES128-SHA
 
-# client DTLSv1.2 ECDHE-RSA-AES128 
+# client DTLSv1.2 ECDHE-RSA-AES128
 -u
 -v 3
 -l ECDHE-RSA-AES128-SHA
@@ -338,7 +265,7 @@
 -v 3
 -l ECDHE-RSA-AES128-SHA256
 
-# client DTLSv1.2 ECDHE-RSA-AES128-SHA256 
+# client DTLSv1.2 ECDHE-RSA-AES128-SHA256
 -u
 -v 3
 -l ECDHE-RSA-AES128-SHA256
@@ -392,19 +319,6 @@
 -l ECDHE-ECDSA-NULL-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.1 ECDHE-EDCSA-RC4
--u
--v 2
--l ECDHE-ECDSA-RC4-SHA
--c ./certs/server-ecc.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1.1 ECDHE-ECDSA-RC4
--u
--v 2
--l ECDHE-ECDSA-RC4-SHA
--A ./certs/server-ecc.pem
-
 # server DTLSv1.1 ECDHE-ECDSA-DES3
 -u
 -v 2
@@ -418,14 +332,14 @@
 -l ECDHE-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.1 ECDHE-ECDSA-AES128 
+# server DTLSv1.1 ECDHE-ECDSA-AES128
 -u
 -v 2
 -l ECDHE-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.1 ECDHE-ECDSA-AES128 
+# client DTLSv1.1 ECDHE-ECDSA-AES128
 -u
 -v 2
 -l ECDHE-ECDSA-AES128-SHA
@@ -444,19 +358,6 @@
 -l ECDHE-ECDSA-AES256-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDHE-ECDSA-RC4
--u
--v 3
--l ECDHE-ECDSA-RC4-SHA
--c ./certs/server-ecc.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1.2 ECDHE-ECDSA-RC4
--u
--v 3
--l ECDHE-ECDSA-RC4-SHA
--A ./certs/server-ecc.pem
-
 # server DTLSv1.2 ECDHE-ECDSA-DES3
 -u
 -v 3
@@ -470,14 +371,14 @@
 -l ECDHE-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDHE-ECDSA-AES128 
+# server DTLSv1.2 ECDHE-ECDSA-AES128
 -u
 -v 3
 -l ECDHE-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDHE-ECDSA-AES128 
+# client DTLSv1.2 ECDHE-ECDSA-AES128
 -u
 -v 3
 -l ECDHE-ECDSA-AES128-SHA
@@ -509,18 +410,6 @@
 -l ECDHE-ECDSA-AES256-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.1 ECDH-RSA-RC4
--u
--v 2
--l ECDH-RSA-RC4-SHA
--c ./certs/server-ecc-rsa.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1.1 ECDH-RSA-RC4
--u
--v 2
--l ECDH-RSA-RC4-SHA
-
 # server DTLSv1.1 ECDH-RSA-DES3
 -u
 -v 2
@@ -533,14 +422,14 @@
 -v 2
 -l ECDH-RSA-DES-CBC3-SHA
 
-# server DTLSv1.1 ECDH-RSA-AES128 
+# server DTLSv1.1 ECDH-RSA-AES128
 -u
 -v 2
 -l ECDH-RSA-AES128-SHA
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.1 ECDH-RSA-AES128 
+# client DTLSv1.1 ECDH-RSA-AES128
 -u
 -v 2
 -l ECDH-RSA-AES128-SHA
@@ -557,18 +446,6 @@
 -v 2
 -l ECDH-RSA-AES256-SHA
 
-# server DTLSv1.2 ECDH-RSA-RC4
--u
--v 3
--l ECDH-RSA-RC4-SHA
--c ./certs/server-ecc-rsa.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1.2 ECDH-RSA-RC4
--u
--v 3
--l ECDH-RSA-RC4-SHA
-
 # server DTLSv1.2 ECDH-RSA-DES3
 -u
 -v 3
@@ -581,26 +458,26 @@
 -v 3
 -l ECDH-RSA-DES-CBC3-SHA
 
-# server DTLSv1.2 ECDH-RSA-AES128 
+# server DTLSv1.2 ECDH-RSA-AES128
 -u
 -v 3
 -l ECDH-RSA-AES128-SHA
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-RSA-AES128 
+# client DTLSv1.2 ECDH-RSA-AES128
 -u
 -v 3
 -l ECDH-RSA-AES128-SHA
 
-# server DTLSv1.2 ECDH-RSA-AES128-SHA256 
+# server DTLSv1.2 ECDH-RSA-AES128-SHA256
 -u
 -v 3
 -l ECDH-RSA-AES128-SHA256
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-RSA-AES128-SHA256 
+# client DTLSv1.2 ECDH-RSA-AES128-SHA256
 -u
 -v 3
 -l ECDH-RSA-AES128-SHA256
@@ -617,19 +494,6 @@
 -v 3
 -l ECDH-RSA-AES256-SHA
 
-# server DTLSv1.1 ECDH-EDCSA-RC4
--u
--v 2
--l ECDH-ECDSA-RC4-SHA
--c ./certs/server-ecc.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1.1 ECDH-ECDSA-RC4
--u
--v 2
--l ECDH-ECDSA-RC4-SHA
--A ./certs/server-ecc.pem
-
 # server DTLSv1.1 ECDH-ECDSA-DES3
 -u
 -v 2
@@ -643,14 +507,14 @@
 -l ECDH-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.1 ECDH-ECDSA-AES128 
+# server DTLSv1.1 ECDH-ECDSA-AES128
 -u
 -v 2
 -l ECDH-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.1 ECDH-ECDSA-AES128 
+# client DTLSv1.1 ECDH-ECDSA-AES128
 -u
 -v 2
 -l ECDH-ECDSA-AES128-SHA
@@ -669,19 +533,6 @@
 -l ECDH-ECDSA-AES256-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDHE-ECDSA-RC4
--u
--v 3
--l ECDH-ECDSA-RC4-SHA
--c ./certs/server-ecc.pem
--k ./certs/ecc-key.pem
-
-# client DTLSv1.2 ECDH-ECDSA-RC4
--u
--v 3
--l ECDH-ECDSA-RC4-SHA
--A ./certs/server-ecc.pem
-
 # server DTLSv1.2 ECDH-ECDSA-DES3
 -u
 -v 3
@@ -695,14 +546,14 @@
 -l ECDH-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDH-ECDSA-AES128 
+# server DTLSv1.2 ECDH-ECDSA-AES128
 -u
 -v 3
 -l ECDH-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-ECDSA-AES128 
+# client DTLSv1.2 ECDH-ECDSA-AES128
 -u
 -v 3
 -l ECDH-ECDSA-AES128-SHA
@@ -715,7 +566,7 @@
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-ECDSA-AES128-SHA256 
+# client DTLSv1.2 ECDH-ECDSA-AES128-SHA256
 -u
 -v 3
 -l ECDH-ECDSA-AES128-SHA256
@@ -734,12 +585,12 @@
 -l ECDH-ECDSA-AES256-SHA
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDHE-RSA-AES256-SHA384 
+# server DTLSv1.2 ECDHE-RSA-AES256-SHA384
 -u
 -v 3
 -l ECDHE-RSA-AES256-SHA384
 
-# client DTLSv1.2 ECDHE-RSA-AES256-SHA384 
+# client DTLSv1.2 ECDHE-RSA-AES256-SHA384
 -u
 -v 3
 -l ECDHE-RSA-AES256-SHA384
@@ -757,14 +608,14 @@
 -l ECDHE-ECDSA-AES256-SHA384
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDH-RSA-AES256-SHA384 
+# server DTLSv1.2 ECDH-RSA-AES256-SHA384
 -u
 -v 3
 -l ECDH-RSA-AES256-SHA384
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-RSA-AES256-SHA384 
+# client DTLSv1.2 ECDH-RSA-AES256-SHA384
 -u
 -v 3
 -l ECDH-RSA-AES256-SHA384
@@ -776,7 +627,7 @@
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-ECDSA-AES256-SHA384 
+# client DTLSv1.2 ECDH-ECDSA-AES256-SHA384
 -u
 -v 3
 -l ECDH-ECDSA-AES256-SHA384
@@ -926,14 +777,14 @@
 -v 3
 -l PSK-AES256-CBC-SHA384
 
-# server DTLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256 
+# server DTLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDHE-ECDSA-AES128-GCM-SHA256
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256 
+# client DTLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDHE-ECDSA-AES128-GCM-SHA256
@@ -952,14 +803,14 @@
 -l ECDHE-ECDSA-AES256-GCM-SHA384
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256 
+# server DTLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDH-ECDSA-AES128-GCM-SHA256
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256 
+# client DTLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDH-ECDSA-AES128-GCM-SHA256
@@ -978,12 +829,12 @@
 -l ECDH-ECDSA-AES256-GCM-SHA384
 -A ./certs/server-ecc.pem
 
-# server DTLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 
+# server DTLSv1.2 ECDHE-RSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDHE-RSA-AES128-GCM-SHA256
 
-# client DTLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 
+# client DTLSv1.2 ECDHE-RSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDHE-RSA-AES128-GCM-SHA256
@@ -998,14 +849,14 @@
 -v 3
 -l ECDHE-RSA-AES256-GCM-SHA384
 
-# server DTLSv1.2 ECDH-RSA-AES128-GCM-SHA256 
+# server DTLSv1.2 ECDH-RSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDH-RSA-AES128-GCM-SHA256
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client DTLSv1.2 ECDH-RSA-AES128-GCM-SHA256 
+# client DTLSv1.2 ECDH-RSA-AES128-GCM-SHA256
 -u
 -v 3
 -l ECDH-RSA-AES128-GCM-SHA256
diff --git a/tests/test.conf b/tests/test.conf
index 37f672ab0..894452a91 100644
--- a/tests/test.conf
+++ b/tests/test.conf
@@ -1,30 +1,3 @@
-# server TLSv1.1 DHE-RSA-CHACHA20-POLY1305
--v 2
--l DHE-RSA-CHACHA20-POLY1305
-
-# client TLSv1.1 DHE-RSA-CHACHA20-POLY1305
--v 2
--l DHE-RSA-CHACHA20-POLY1305
-
-# server TLSv1.1 ECDHE-RSA-CHACHA20-POLY1305
--v 2
--l ECDHE-RSA-CHACHA20-POLY1305
-
-# client TLSv1.1 ECDHE-RSA-CHACHA20-POLY1305
--v 2
--l ECDHE-RSA-CHACHA20-POLY1305
-
-# server TLSv1.1 ECDHE-EDCSA-CHACHA20-POLY1305
--v 2
--l ECDHE-ECDSA-CHACHA20-POLY1305
--c ./certs/server-ecc.pem
--k ./certs/ecc-key.pem
-
-# client TLSv1.1 ECDHE-ECDSA-CHACHA20-POLY1305
--v 2
--l ECDHE-ECDSA-CHACHA20-POLY1305
--A ./certs/server-ecc.pem
-
 # server TLSv1.2 DHE-RSA-CHACHA20-POLY1305
 -v 3
 -l DHE-RSA-CHACHA20-POLY1305
@@ -341,11 +314,11 @@
 -v 1
 -l ECDHE-RSA-DES-CBC3-SHA
 
-# server TLSv1 ECDHE-RSA-AES128 
+# server TLSv1 ECDHE-RSA-AES128
 -v 1
 -l ECDHE-RSA-AES128-SHA
 
-# client TLSv1 ECDHE-RSA-AES128 
+# client TLSv1 ECDHE-RSA-AES128
 -v 1
 -l ECDHE-RSA-AES128-SHA
 
@@ -373,11 +346,11 @@
 -v 2
 -l ECDHE-RSA-DES-CBC3-SHA
 
-# server TLSv1.1 ECDHE-RSA-AES128 
+# server TLSv1.1 ECDHE-RSA-AES128
 -v 2
 -l ECDHE-RSA-AES128-SHA
 
-# client TLSv1.1 ECDHE-RSA-AES128 
+# client TLSv1.1 ECDHE-RSA-AES128
 -v 2
 -l ECDHE-RSA-AES128-SHA
 
@@ -405,11 +378,11 @@
 -v 3
 -l ECDHE-RSA-DES-CBC3-SHA
 
-# server TLSv1.2 ECDHE-RSA-AES128 
+# server TLSv1.2 ECDHE-RSA-AES128
 -v 3
 -l ECDHE-RSA-AES128-SHA
 
-# client TLSv1.2 ECDHE-RSA-AES128 
+# client TLSv1.2 ECDHE-RSA-AES128
 -v 3
 -l ECDHE-RSA-AES128-SHA
 
@@ -417,7 +390,7 @@
 -v 3
 -l ECDHE-RSA-AES128-SHA256
 
-# client TLSv1.2 ECDHE-RSA-AES128-SHA256 
+# client TLSv1.2 ECDHE-RSA-AES128-SHA256
 -v 3
 -l ECDHE-RSA-AES128-SHA256
 
@@ -484,13 +457,13 @@
 -l ECDHE-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server TLSv1 ECDHE-ECDSA-AES128 
+# server TLSv1 ECDHE-ECDSA-AES128
 -v 1
 -l ECDHE-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1 ECDHE-ECDSA-AES128 
+# client TLSv1 ECDHE-ECDSA-AES128
 -v 1
 -l ECDHE-ECDSA-AES128-SHA
 -A ./certs/server-ecc.pem
@@ -528,13 +501,13 @@
 -l ECDHE-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server TLSv1.1 ECDHE-ECDSA-AES128 
+# server TLSv1.1 ECDHE-ECDSA-AES128
 -v 2
 -l ECDHE-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.1 ECDHE-ECDSA-AES128 
+# client TLSv1.1 ECDHE-ECDSA-AES128
 -v 2
 -l ECDHE-ECDSA-AES128-SHA
 -A ./certs/server-ecc.pem
@@ -572,13 +545,13 @@
 -l ECDHE-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server TLSv1.2 ECDHE-ECDSA-AES128 
+# server TLSv1.2 ECDHE-ECDSA-AES128
 -v 3
 -l ECDHE-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDHE-ECDSA-AES128 
+# client TLSv1.2 ECDHE-ECDSA-AES128
 -v 3
 -l ECDHE-ECDSA-AES128-SHA
 -A ./certs/server-ecc.pem
@@ -625,13 +598,13 @@
 -v 1
 -l ECDH-RSA-DES-CBC3-SHA
 
-# server TLSv1 ECDH-RSA-AES128 
+# server TLSv1 ECDH-RSA-AES128
 -v 1
 -l ECDH-RSA-AES128-SHA
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1 ECDH-RSA-AES128 
+# client TLSv1 ECDH-RSA-AES128
 -v 1
 -l ECDH-RSA-AES128-SHA
 
@@ -665,13 +638,13 @@
 -v 2
 -l ECDH-RSA-DES-CBC3-SHA
 
-# server TLSv1.1 ECDH-RSA-AES128 
+# server TLSv1.1 ECDH-RSA-AES128
 -v 2
 -l ECDH-RSA-AES128-SHA
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.1 ECDH-RSA-AES128 
+# client TLSv1.1 ECDH-RSA-AES128
 -v 2
 -l ECDH-RSA-AES128-SHA
 
@@ -705,23 +678,23 @@
 -v 3
 -l ECDH-RSA-DES-CBC3-SHA
 
-# server TLSv1.2 ECDH-RSA-AES128 
+# server TLSv1.2 ECDH-RSA-AES128
 -v 3
 -l ECDH-RSA-AES128-SHA
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-RSA-AES128 
+# client TLSv1.2 ECDH-RSA-AES128
 -v 3
 -l ECDH-RSA-AES128-SHA
 
-# server TLSv1.2 ECDH-RSA-AES128-SHA256 
+# server TLSv1.2 ECDH-RSA-AES128-SHA256
 -v 3
 -l ECDH-RSA-AES128-SHA256
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-RSA-AES128-SHA256 
+# client TLSv1.2 ECDH-RSA-AES128-SHA256
 -v 3
 -l ECDH-RSA-AES128-SHA256
 
@@ -757,13 +730,13 @@
 -l ECDH-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server TLSv1 ECDH-ECDSA-AES128 
+# server TLSv1 ECDH-ECDSA-AES128
 -v 1
 -l ECDH-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1 ECDH-ECDSA-AES128 
+# client TLSv1 ECDH-ECDSA-AES128
 -v 1
 -l ECDH-ECDSA-AES128-SHA
 -A ./certs/server-ecc.pem
@@ -801,13 +774,13 @@
 -l ECDH-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server TLSv1.1 ECDH-ECDSA-AES128 
+# server TLSv1.1 ECDH-ECDSA-AES128
 -v 2
 -l ECDH-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.1 ECDH-ECDSA-AES128 
+# client TLSv1.1 ECDH-ECDSA-AES128
 -v 2
 -l ECDH-ECDSA-AES128-SHA
 -A ./certs/server-ecc.pem
@@ -845,13 +818,13 @@
 -l ECDH-ECDSA-DES-CBC3-SHA
 -A ./certs/server-ecc.pem
 
-# server TLSv1.2 ECDH-ECDSA-AES128 
+# server TLSv1.2 ECDH-ECDSA-AES128
 -v 3
 -l ECDH-ECDSA-AES128-SHA
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-ECDSA-AES128 
+# client TLSv1.2 ECDH-ECDSA-AES128
 -v 3
 -l ECDH-ECDSA-AES128-SHA
 -A ./certs/server-ecc.pem
@@ -862,7 +835,7 @@
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-ECDSA-AES128-SHA256 
+# client TLSv1.2 ECDH-ECDSA-AES128-SHA256
 -v 3
 -l ECDH-ECDSA-AES128-SHA256
 -A ./certs/server-ecc.pem
@@ -882,7 +855,7 @@
 -v 3
 -l ECDHE-RSA-AES256-SHA384
 
-# client TLSv1.2 ECDHE-RSA-AES256-SHA384 
+# client TLSv1.2 ECDHE-RSA-AES256-SHA384
 -v 3
 -l ECDHE-RSA-AES256-SHA384
 
@@ -897,13 +870,13 @@
 -l ECDHE-ECDSA-AES256-SHA384
 -A ./certs/server-ecc.pem
 
-# server TLSv1.2 ECDH-RSA-AES256-SHA384 
+# server TLSv1.2 ECDH-RSA-AES256-SHA384
 -v 3
 -l ECDH-RSA-AES256-SHA384
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-RSA-AES256-SHA384 
+# client TLSv1.2 ECDH-RSA-AES256-SHA384
 -v 3
 -l ECDH-RSA-AES256-SHA384
 
@@ -913,7 +886,7 @@
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-ECDSA-AES256-SHA384 
+# client TLSv1.2 ECDH-ECDSA-AES256-SHA384
 -v 3
 -l ECDH-ECDSA-AES256-SHA384
 -A ./certs/server-ecc.pem
@@ -1664,11 +1637,11 @@
 -v 3
 -l DHE-RSA-CAMELLIA256-SHA256
 
-# server TLSv1.2 RSA-AES128-GCM-SHA256 
+# server TLSv1.2 RSA-AES128-GCM-SHA256
 -v 3
 -l AES128-GCM-SHA256
 
-# client TLSv1.2 RSA-AES128-GCM-SHA256 
+# client TLSv1.2 RSA-AES128-GCM-SHA256
 -v 3
 -l AES128-GCM-SHA256
 
@@ -1680,13 +1653,13 @@
 -v 3
 -l AES256-GCM-SHA384
 
-# server TLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256 
+# server TLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256
 -v 3
 -l ECDHE-ECDSA-AES128-GCM-SHA256
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256 
+# client TLSv1.2 ECDHE-ECDSA-AES128-GCM-SHA256
 -v 3
 -l ECDHE-ECDSA-AES128-GCM-SHA256
 -A ./certs/server-ecc.pem
@@ -1702,13 +1675,13 @@
 -l ECDHE-ECDSA-AES256-GCM-SHA384
 -A ./certs/server-ecc.pem
 
-# server TLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256 
+# server TLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256
 -v 3
 -l ECDH-ECDSA-AES128-GCM-SHA256
 -c ./certs/server-ecc.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256 
+# client TLSv1.2 ECDH-ECDSA-AES128-GCM-SHA256
 -v 3
 -l ECDH-ECDSA-AES128-GCM-SHA256
 -A ./certs/server-ecc.pem
@@ -1724,11 +1697,11 @@
 -l ECDH-ECDSA-AES256-GCM-SHA384
 -A ./certs/server-ecc.pem
 
-# server TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 
+# server TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256
 -v 3
 -l ECDHE-RSA-AES128-GCM-SHA256
 
-# client TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 
+# client TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256
 -v 3
 -l ECDHE-RSA-AES128-GCM-SHA256
 
@@ -1740,13 +1713,13 @@
 -v 3
 -l ECDHE-RSA-AES256-GCM-SHA384
 
-# server TLSv1.2 ECDH-RSA-AES128-GCM-SHA256 
+# server TLSv1.2 ECDH-RSA-AES128-GCM-SHA256
 -v 3
 -l ECDH-RSA-AES128-GCM-SHA256
 -c ./certs/server-ecc-rsa.pem
 -k ./certs/ecc-key.pem
 
-# client TLSv1.2 ECDH-RSA-AES128-GCM-SHA256 
+# client TLSv1.2 ECDH-RSA-AES128-GCM-SHA256
 -v 3
 -l ECDH-RSA-AES128-GCM-SHA256
 
@@ -1760,11 +1733,11 @@
 -v 3
 -l ECDH-RSA-AES256-GCM-SHA384
 
-# server TLSv1.2 DHE-RSA-AES128-GCM-SHA256 
+# server TLSv1.2 DHE-RSA-AES128-GCM-SHA256
 -v 3
 -l DHE-RSA-AES128-GCM-SHA256
 
-# client TLSv1.2 DHE-RSA-AES128-GCM-SHA256 
+# client TLSv1.2 DHE-RSA-AES128-GCM-SHA256
 -v 3
 -l DHE-RSA-AES128-GCM-SHA256
 
diff --git a/tests/unit.c b/tests/unit.c
index c007fbb64..39a76ddf2 100644
--- a/tests/unit.c
+++ b/tests/unit.c
@@ -45,16 +45,12 @@ int main(int argc, char** argv)
 
 int unit_test(int argc, char** argv)
 {
-    int ret;
+    int ret = 0;
 
     (void)argc;
     (void)argv;
     printf("starting unit tests...\n");
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    InitMemoryTracker();
-#endif
-
 #if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
     wolfSSL_Debugging_ON();
 #endif
@@ -72,28 +68,25 @@ int unit_test(int argc, char** argv)
 
     if ( (ret = HashTest()) != 0){
         printf("hash test failed with %d\n", ret);
-        return ret;
+        goto exit;
     }
 
 #ifndef SINGLE_THREADED
     if ( (ret = SuiteTest()) != 0){
         printf("suite test failed with %d\n", ret);
-        return ret;
+        goto exit;
     }
 #endif
 
     SrpTest();
 
+exit:
 #ifdef HAVE_WNR
     if (wc_FreeNetRandom() < 0)
         err_sys("Failed to free netRandom context");
 #endif /* HAVE_WNR */
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    ShowMemoryTracker();
-#endif
-
-    return 0;
+    return ret;
 }
 
 
diff --git a/testsuite/testsuite.c b/testsuite/testsuite.c
index 60080da5f..144efb140 100644
--- a/testsuite/testsuite.c
+++ b/testsuite/testsuite.c
@@ -210,7 +210,7 @@ int testsuite_test(int argc, char** argv)
 #endif /* HAVE_WNR */
 
     printf("\nAll tests passed!\n");
-    EXIT_TEST(EXIT_SUCCESS);
+    return EXIT_SUCCESS;
 }
 
 void simple_test(func_args* args)
diff --git a/tirtos/README b/tirtos/README
index 6001f5664..dc7fbb114 100644
--- a/tirtos/README
+++ b/tirtos/README
@@ -7,6 +7,9 @@ library and the example applications.
 Also read TI-RTOS Getting Started Guide and TI-RTOS User Guide to learn more
 about TI-RTOS (http://www.ti.com/tool/ti-rtos).
 
+For more information see:
+(https://github.com/wolfSSL/wolfssl-examples/blob/master/tirtos_ccs_examples/README.md)
+
 ## Example Application
 
 A simple "TCP echo server with TLS" example application is provided with TI-RTOS
diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c
index a4f1e6877..9e7b41ecf 100644
--- a/wolfcrypt/benchmark/benchmark.c
+++ b/wolfcrypt/benchmark/benchmark.c
@@ -32,6 +32,10 @@
 /* Macro to disable benchmark */
 #ifndef NO_CRYPT_BENCHMARK
 
+#ifdef XMALLOC_USER
+    #include   /* we're using malloc / free direct here */
+#endif
+
 #ifdef WOLFSSL_STATIC_MEMORY
     #include 
     static WOLFSSL_HEAP_HINT* HEAP_HINT;
@@ -93,40 +97,43 @@
 #include 
 #include 
 
+/* only for stack size check */
+#ifdef HAVE_STACK_SIZE
+    #include 
+    #include 
+#endif
+
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
 #endif
-#if defined(WOLFSSL_ASYNC_CRYPT) || defined(HAVE_ECC)
-    static int devId = INVALID_DEVID;
-#endif
 
 #ifdef HAVE_WNR
     const char* wnrConfigFile = "wnr-example.conf";
 #endif
 
 #if defined(WOLFSSL_MDK_ARM)
-    extern FILE * wolfSSL_fopen(const char *fname, const char *mode) ;
+    extern FILE * wolfSSL_fopen(const char *fname, const char *mode);
     #define fopen wolfSSL_fopen
 #endif
 
 #if defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM)
     #define HAVE_GET_CYCLES
     static INLINE word64 get_intel_cycles(void);
-    static word64 total_cycles;
+    static THREAD_LS_T word64 total_cycles;
     #define INIT_CYCLE_COUNTER
     #define BEGIN_INTEL_CYCLES total_cycles = get_intel_cycles();
     #define END_INTEL_CYCLES   total_cycles = get_intel_cycles() - total_cycles;
     #define SHOW_INTEL_CYCLES  printf(" Cycles per byte = %6.2f", \
-                               (float)total_cycles / (numBlocks*sizeof(plain)));
+                               (float)total_cycles / (count*BENCH_SIZE));
 #elif defined(LINUX_CYCLE_COUNT)
     #include 
     #include 
     #include 
 
-    static word64 begin_cycles;
-    static word64 total_cycles;
-    static int cycles = -1;
-    static struct perf_event_attr atr;
+    static THREAD_LS_T word64 begin_cycles;
+    static THREAD_LS_T word64 total_cycles;
+    static THREAD_LS_T int cycles = -1;
+    static THREAD_LS_T struct perf_event_attr atr;
 
     #define INIT_CYCLE_COUNTER do { \
         atr.type   = PERF_TYPE_HARDWARE; \
@@ -141,7 +148,7 @@
     } while (0);
 
     #define SHOW_INTEL_CYCLES  printf(" Cycles per byte = %6.2f", \
-                               (float)total_cycles / (numBlocks*sizeof(plain)));
+                               (float)total_cycles / (count*BENCH_SIZE));
 
 #else
     #define INIT_CYCLE_COUNTER
@@ -158,7 +165,7 @@
 #if defined(USE_CERT_BUFFERS_1024) || defined(USE_CERT_BUFFERS_2048) \
                                    || !defined(NO_DH)
     /* include test cert and key buffers for use with NO_FILESYSTEM */
-        #include 
+    #include 
 #endif
 
 
@@ -174,43 +181,36 @@
 
 #include "wolfcrypt/benchmark/benchmark.h"
 
-#ifdef USE_WOLFSSL_MEMORY
-    #include "wolfssl/wolfcrypt/mem_track.h"
-#endif
-
-void bench_des(void);
+void bench_des(int);
 void bench_idea(void);
-void bench_arc4(void);
+void bench_arc4(int);
 void bench_hc128(void);
 void bench_rabbit(void);
 void bench_chacha(void);
 void bench_chacha20_poly1305_aead(void);
-void bench_aes(int);
-void bench_aesgcm(void);
+void bench_aescbc(int);
+void bench_aesgcm(int);
 void bench_aesccm(void);
 void bench_aesctr(void);
 void bench_poly1305(void);
 void bench_camellia(void);
 
-void bench_md5(void);
-void bench_sha(void);
-void bench_sha224(void);
-void bench_sha256(void);
-void bench_sha384(void);
-void bench_sha512(void);
+void bench_md5(int);
+void bench_sha(int);
+void bench_sha224(int);
+void bench_sha256(int);
+void bench_sha384(int);
+void bench_sha512(int);
 void bench_ripemd(void);
 void bench_cmac(void);
 void bench_scrypt(void);
 
-void bench_rsa(void);
-#ifdef WOLFSSL_ASYNC_CRYPT
-    void bench_rsa_async(void);
-#endif
-void bench_rsaKeyGen(void);
-void bench_dh(void);
+void bench_rsaKeyGen(int);
+void bench_rsa(int);
+void bench_dh(int);
 #ifdef HAVE_ECC
-void bench_eccKeyGen(void);
-void bench_eccKeyAgree(void);
+void bench_eccMakeKey(int);
+void bench_ecc(int);
     #ifdef HAVE_ECC_ENCRYPT
     void bench_eccEncrypt(void);
     #endif
@@ -235,38 +235,213 @@ void bench_rng(void);
 
 #ifdef WOLFSSL_CURRTIME_REMAP
     #define current_time WOLFSSL_CURRTIME_REMAP
-#else
+#elif !defined(HAVE_STACK_SIZE)
     double current_time(int);
 #endif
 
-#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND) && \
+        !defined(HAVE_STACK_SIZE)
     WOLFSSL_API int wolfSSL_Debugging_ON();
+    WOLFSSL_API void wolfSSL_Debugging_OFF(void);
 #endif
 
 #if !defined(NO_RSA) || !defined(NO_DH) \
                         || defined(WOLFSSL_KEYGEN) || defined(HAVE_ECC) \
                         || defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
     #define HAVE_LOCAL_RNG
-    static WC_RNG rng;
+    static THREAD_LS_T WC_RNG rng;
 #endif
 
-/* use kB instead of mB for embedded benchmarking */
-#ifdef BENCH_EMBEDDED
-    static byte plain [1024];
+
+
+/* Asynchronous helper macros */
+static THREAD_LS_T int devId = INVALID_DEVID;
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    static THREAD_LS_T WOLF_EVENT_QUEUE eventQueue;
+    static THREAD_LS_T int asyncPending;
+
+    #define BENCH_ASYNC_GET_DEV(obj)      (&(obj)->asyncDev)
+    #define BENCH_ASYNC_GET_NAME(doAsync) (doAsync) ? "HW" : "SW"
+    #define BENCH_ASYNC_IS_PEND()         (asyncPending > 0)
+    #define BENCH_MAX_PENDING             (WOLF_ASYNC_MAX_PENDING)
+
+#ifndef WC_NO_ASYNC_THREADING
+    typedef struct ThreadData {
+        pthread_t thread_id;
+    } ThreadData;
+    static ThreadData* g_threadData;
+    static int g_threadCount;
+#endif
+
+    static INLINE int bench_async_begin(void) {
+        /* init event queue */
+        asyncPending = 0;
+        return wolfEventQueue_Init(&eventQueue);
+    }
+
+    static INLINE void bench_async_end(void) {
+        /* free event queue */
+        wolfEventQueue_Free(&eventQueue);
+    }
+
+    static INLINE void bench_async_complete(int* ret, WC_ASYNC_DEV* asyncDev,
+        int* times)
+    {
+        *ret = asyncDev->event.ret;
+        if (*ret >= 0) {
+            (*times)++;
+            asyncDev->event.done = 0; /* reset done flag */
+        }
+    }
+
+    static INLINE int bench_async_check(int* ret, WC_ASYNC_DEV* asyncDev,
+        int callAgain, int* times, int limit)
+    {
+        int allowNext = 0;
+
+        /* if algo doesn't require calling again then use this flow */
+        if (!callAgain) {
+            if (asyncDev->event.done) {
+                /* operation completed */
+                bench_async_complete(ret, asyncDev, times);
+            }
+        }
+        /* if algo does require calling again then use this flow */
+        else {
+            if (asyncDev->event.done) {
+                allowNext = 1;
+            }
+        }
+
+        if (asyncDev->event.pending == 0 &&
+                (*times + asyncPending) < limit) {
+            allowNext = 1;
+        }
+
+        return allowNext;
+    }
+
+    static INLINE int bench_async_handle(int* ret, WC_ASYNC_DEV* asyncDev,
+        int callAgain, int* times)
+    {
+        if (*ret == WC_PENDING_E) {
+            *ret = wc_AsyncHandle(asyncDev, &eventQueue,
+                callAgain ? WC_ASYNC_FLAG_CALL_AGAIN : WC_ASYNC_FLAG_NONE);
+            if (*ret == 0)
+                asyncPending++;
+        }
+        else if (*ret >= 0) {
+            /* operation completed */
+            bench_async_complete(ret, asyncDev, times);
+        }
+
+        return (*ret >= 0) ? 1 : 0;
+    }
+
+    static INLINE void bench_async_poll(void)
+    {
+        /* poll until there are events done */
+        if (asyncPending > 0) {
+            int ret, asyncDone = 0;
+            do {
+                ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
+                                       WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
+                if (ret != 0) {
+                    printf("Async poll failed %d\n", ret);
+                    return;
+                }
+            } while (asyncDone == 0);
+            asyncPending -= asyncDone;
+        }
+    }
+
 #else
-    static byte plain [1024*1024];
+    #define BENCH_MAX_PENDING             (1)
+    #define BENCH_ASYNC_GET_NAME(doAsync) ""
+    #define BENCH_ASYNC_GET_DEV(obj)      NULL
+    #define BENCH_ASYNC_IS_PEND()         (0)
+
+    #define bench_async_begin()
+    #define bench_async_end()             (void)doAsync;
+
+    static INLINE int bench_async_check(int* ret, void* asyncDev,
+        int callAgain, int* times, int limit)
+    {
+        (void)ret;
+        (void)asyncDev;
+        (void)callAgain;
+        (void)times;
+        (void)limit;
+
+        return 1;
+    }
+
+    static INLINE int bench_async_handle(int* ret, void* asyncDev,
+        int callAgain, int* times)
+    {
+        (void)asyncDev;
+        (void)callAgain;
+
+        if (*ret >= 0) {
+            /* operation completed */
+            (*times)++;
+            return 1;
+        }
+        return 0;
+    }
+    #define bench_async_poll()
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+
+
+/* maximum runtime for each benchmark */
+#define BENCH_MIN_RUNTIME_SEC   1.0f
+
+
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+    #define AES_AUTH_ADD_SZ 13
+    #define AES_AUTH_TAG_SZ 16
+    #define BENCH_CIPHER_ADD AES_AUTH_TAG_SZ
+#endif
+#ifndef BENCH_CIPHER_ADD
+    #define BENCH_CIPHER_ADD 0
 #endif
 
 
 /* use kB instead of mB for embedded benchmarking */
 #ifdef BENCH_EMBEDDED
-    static byte cipher[1024];
+    enum BenchmarkBounds {
+        numBlocks  = 25, /* how many kB to test (en/de)cryption */
+        scryptCnt  = 1,
+        ntimes     = 2,
+        genTimes   = BENCH_MAX_PENDING,
+        agreeTimes = 2
+    };
+    static const char blockType[] = "kB";   /* used in printf output */
+    #define BENCH_SIZE (1024ul)
 #else
-    static byte cipher[1024*1024];
+    enum BenchmarkBounds {
+        numBlocks  = 5, /* how many megs to test (en/de)cryption */
+        scryptCnt  = 10,
+        ntimes     = 100,
+        genTimes   = BENCH_MAX_PENDING, /* must be at least BENCH_MAX_PENDING */
+        agreeTimes = 100
+    };
+    static const char blockType[] = "megs"; /* used in printf output */
+    #define BENCH_SIZE (1024*1024ul)
 #endif
 
 
-static const XGEN_ALIGN byte key[] =
+/* globals for cipher tests */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    static byte* bench_plain = NULL;
+    static byte* bench_cipher = NULL;
+#else
+    static byte bench_plain[BENCH_SIZE];
+    static byte bench_cipher[BENCH_SIZE];
+#endif
+static const XGEN_ALIGN byte bench_key_buf[] =
 {
     0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
     0xfe,0xde,0xba,0x98,0x76,0x54,0x32,0x10,
@@ -274,84 +449,239 @@ static const XGEN_ALIGN byte key[] =
     0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef
 };
 
-static const XGEN_ALIGN byte iv[] =
+static const XGEN_ALIGN byte bench_iv_buf[] =
 {
     0x12,0x34,0x56,0x78,0x90,0xab,0xcd,0xef,
     0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
     0x11,0x21,0x31,0x41,0x51,0x61,0x71,0x81
 };
-
-
-/* so embedded projects can pull in tests on their own */
-#if !defined(NO_MAIN_DRIVER)
-
-int main(int argc, char** argv)
-
-{
-    (void)argc;
-    (void)argv;
-#else
-int benchmark_test(void *args)
-{
-    (void)args;
-#endif
+static byte* bench_key = (byte*)bench_key_buf;
+static byte* bench_iv = (byte*)bench_iv_buf;
 
 #ifdef WOLFSSL_STATIC_MEMORY
     #ifdef BENCH_EMBEDDED
-        byte memory[50000];
+        static byte gBenchMemory[50000];
     #else
-        byte memory[400000];
+        static byte gBenchMemory[400000];
     #endif
+#endif
 
-    if (wc_LoadStaticMemory(&HEAP_HINT, memory, sizeof(memory),
-                                                WOLFMEM_GENERAL, 1) != 0) {
-        printf("unable to load static memory");
-        exit(EXIT_FAILURE);
+
+/******************************************************************************/
+/* Begin Stats Functions */
+/******************************************************************************/
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    typedef enum bench_stat_type {
+        BENCH_STAT_ASYM,
+        BENCH_STAT_SYM,
+    } bench_stat_type_t;
+    typedef struct bench_stats {
+        struct bench_stats* next;
+        struct bench_stats* prev;
+        const char* algo;
+        const char* desc;
+        double perfsec;
+        int strength;
+        int doAsync;
+        int finishCount;
+        bench_stat_type_t type;
+    } bench_stats_t;
+    static bench_stats_t* bench_stats_head;
+    static bench_stats_t* bench_stats_tail;
+    static pthread_mutex_t bench_lock = PTHREAD_MUTEX_INITIALIZER;
+
+    static bench_stats_t* bench_stats_add(bench_stat_type_t type,
+        const char* algo, int strength, const char* desc, int doAsync,
+        double perfsec)
+    {
+        bench_stats_t* stat;
+
+        pthread_mutex_lock(&bench_lock);
+
+        /* locate existing in list */
+        for (stat = bench_stats_head; stat != NULL; stat = stat->next) {
+            /* match based on algo, strength and desc */
+            if (stat->algo == algo && stat->strength == strength && stat->desc == desc && stat->doAsync == doAsync) {
+                break;
+            }
+        }
+
+        if (stat == NULL) {
+            /* allocate new and put on list */
+            stat = (bench_stats_t*)XMALLOC(sizeof(bench_stats_t), NULL, DYNAMIC_TYPE_INFO);
+            if (stat) {
+                XMEMSET(stat, 0, sizeof(bench_stats_t));
+
+                /* add to list */
+                stat->next = NULL;
+                if (bench_stats_tail == NULL)  {
+                    bench_stats_head = stat;
+                }
+                else {
+                    bench_stats_tail->next = stat;
+                    stat->prev = bench_stats_tail;
+                }
+                bench_stats_tail = stat; /* add to the end either way */
+            }
+        }
+
+        if (stat) {
+            int isLast = 0;
+            stat->type = type;
+            stat->algo = algo;
+            stat->strength = strength;
+            stat->desc = desc;
+            stat->doAsync = doAsync;
+            stat->perfsec += perfsec;
+            stat->finishCount++;
+
+            if (stat->finishCount == g_threadCount) {
+                isLast = 1;
+            }
+
+            pthread_mutex_unlock(&bench_lock);
+
+            /* wait until remaining are complete */
+            while (stat->finishCount < g_threadCount) {
+                wc_AsyncThreadYield();
+            }
+
+            /* print final stat */
+            if (isLast) {
+                if (stat->type == BENCH_STAT_SYM) {
+                    printf("%-8s%s %8.3f MB/s\n", stat->desc,
+                        BENCH_ASYNC_GET_NAME(stat->doAsync), stat->perfsec);
+                }
+                else {
+                    printf("%-5s %4d %-9s %s %.3f ops/sec\n",
+                        stat->algo, stat->strength, stat->desc,
+                        BENCH_ASYNC_GET_NAME(stat->doAsync), stat->perfsec);
+                }
+            }
+
+            (void)blockType;
+        }
+        else {
+            pthread_mutex_unlock(&bench_lock);
+        }
+
+        return stat;
     }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT && !WC_NO_ASYNC_THREADING */
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    InitMemoryTracker();
+static INLINE void bench_stats_init(void)
+{
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    bench_stats_head = NULL;
+    bench_stats_tail = NULL;
 #endif
-
-    wolfCrypt_Init();
     INIT_CYCLE_COUNTER
+}
 
-#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
-    wolfSSL_Debugging_ON();
+static INLINE void bench_stats_start(int* count, double* start)
+{
+    *count = 0;
+    *start = current_time(1);
+    BEGIN_INTEL_CYCLES
+}
+
+static INLINE int bench_stats_sym_check(double start)
+{
+    return ((current_time(0) - start) < BENCH_MIN_RUNTIME_SEC);
+}
+
+static void bench_stats_sym_finish(const char* desc, int doAsync, int count, double start)
+{
+    double total, persec;
+
+    END_INTEL_CYCLES
+    total = current_time(0) - start;
+
+    persec = 1 / total * count;
+#ifdef BENCH_EMBEDDED
+    /* since using kB, convert to MB/s */
+    persec = persec / 1024;
 #endif
 
-    (void)plain;
-    (void)cipher;
-    (void)key;
-    (void)iv;
+    printf("%-8s%s %5d %s took %5.3f seconds, %8.3f MB/s",
+        desc, BENCH_ASYNC_GET_NAME(doAsync), count, blockType, total, persec);
+    SHOW_INTEL_CYCLES
+    printf("\n");
+    (void)doAsync;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    /* Add to thread stats */
+    bench_stats_add(BENCH_STAT_SYM, NULL, 0, desc, doAsync, persec);
+#endif
+}
+
+static void bench_stats_asym_finish(const char* algo, int strength,
+    const char* desc, int doAsync, int count, double start)
+{
+    double total, each, opsSec, milliEach;
+
+    total = current_time(0) - start;
+    each  = total / count;     /* per second  */
+    opsSec = count / total;    /* ops/per second */
+    milliEach = each * 1000;   /* milliseconds */
+
+    printf("%-5s %4d %-9s %s %6d ops took %5.3f sec, avg %5.3f ms,"
+        " %.3f ops/sec\n", algo, strength, desc, BENCH_ASYNC_GET_NAME(doAsync),
+        count, total, milliEach, opsSec);
+    (void)doAsync;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    /* Add to thread stats */
+    bench_stats_add(BENCH_STAT_ASYM, algo, strength, desc, doAsync, opsSec);
+#endif
+}
+
+static INLINE void bench_stats_free(void)
+{
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    bench_stats_t* stat;
+    for (stat = bench_stats_head; stat != NULL; ) {
+        bench_stats_t* next = stat->next;
+        XFREE(stat, NULL, DYNAMIC_TYPE_INFO);
+        stat = next;
+    }
+    bench_stats_head = NULL;
+    bench_stats_tail = NULL;
+#endif
+}
+/******************************************************************************/
+/* End Stats Functions */
+/******************************************************************************/
+
+
+static void* benchmarks_do(void* args)
+{
 #ifdef WOLFSSL_ASYNC_CRYPT
-    if (wolfAsync_DevOpen(&devId) != 0) {
-        printf("Async device open failed\n");
-        exit(-1);
+#ifndef WC_NO_ASYNC_THREADING
+    ThreadData* threadData = (ThreadData*)args;
+
+    if (wolfAsync_DevOpenThread(&devId, &threadData->thread_id) < 0)
+#else
+    if (wolfAsync_DevOpen(&devId) < 0)
+#endif
+    {
+        printf("Async device open failed\nRunning without async\n");
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-#ifdef HAVE_WNR
-    if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0) {
-        printf("Whitewood netRandom config init failed\n");
-        exit(-1);
-    }
-#endif /* HAVE_WNR */
+    (void)args;
 
 #if defined(HAVE_LOCAL_RNG)
     {
         int rngRet;
 
 #ifndef HAVE_FIPS
-        rngRet = wc_InitRng_ex(&rng, HEAP_HINT);
+        rngRet = wc_InitRng_ex(&rng, HEAP_HINT, INVALID_DEVID);
 #else
         rngRet = wc_InitRng(&rng);
 #endif
         if (rngRet < 0) {
             printf("InitRNG failed\n");
-            return rngRet;
         }
     }
 #endif
@@ -361,11 +691,20 @@ int benchmark_test(void *args)
 #endif /* WC_NO_RNG */
 #ifndef NO_AES
 #ifdef HAVE_AES_CBC
-    bench_aes(0);
-    bench_aes(1);
+    #ifndef NO_SW_BENCH
+        bench_aescbc(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        bench_aescbc(1);
+    #endif
 #endif
 #ifdef HAVE_AESGCM
-    bench_aesgcm();
+    #ifndef NO_SW_BENCH
+        bench_aesgcm(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        bench_aesgcm(1);
+    #endif
 #endif
 #ifdef WOLFSSL_AES_COUNTER
     bench_aesctr();
@@ -379,7 +718,12 @@ int benchmark_test(void *args)
     bench_camellia();
 #endif
 #ifndef NO_RC4
-    bench_arc4();
+    #ifndef NO_SW_BENCH
+        bench_arc4(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+        bench_arc4(1);
+    #endif
 #endif
 #ifdef HAVE_HC128
     bench_hc128();
@@ -394,34 +738,67 @@ int benchmark_test(void *args)
     bench_chacha20_poly1305_aead();
 #endif
 #ifndef NO_DES3
-    bench_des();
+    #ifndef NO_SW_BENCH
+        bench_des(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+        bench_des(1);
+    #endif
 #endif
 #ifdef HAVE_IDEA
     bench_idea();
 #endif
 
-    printf("\n");
-
 #ifndef NO_MD5
-    bench_md5();
+    #ifndef NO_SW_BENCH
+        bench_md5(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+        bench_md5(1);
+    #endif
 #endif
 #ifdef HAVE_POLY1305
     bench_poly1305();
 #endif
 #ifndef NO_SHA
-    bench_sha();
+    #ifndef NO_SW_BENCH
+        bench_sha(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+        bench_sha(1);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA224
-    bench_sha224();
+    #ifndef NO_SW_BENCH
+        bench_sha224(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        bench_sha224(1);
+    #endif
 #endif
 #ifndef NO_SHA256
-    bench_sha256();
+    #ifndef NO_SW_BENCH
+        bench_sha256(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        bench_sha256(1);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA384
-    bench_sha384();
+    #ifndef NO_SW_BENCH
+        bench_sha384(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+        bench_sha384(1);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA512
-    bench_sha512();
+    #ifndef NO_SW_BENCH
+        bench_sha512(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+        bench_sha512(1);
+    #endif
 #endif
 #ifdef WOLFSSL_RIPEMD
     bench_ripemd();
@@ -433,26 +810,37 @@ int benchmark_test(void *args)
     bench_cmac();
 #endif
 
-    printf("\n");
-
 #ifdef HAVE_SCRYPT
     bench_scrypt();
 #endif
 
-    printf("\n");
-
 #ifndef NO_RSA
-    bench_rsa();
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        bench_rsa_async();
-    #endif
     #ifdef WOLFSSL_KEY_GEN
-        bench_rsaKeyGen();
+        #ifndef NO_SW_BENCH
+            bench_rsaKeyGen(0);
+        #endif
+        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+            /* async supported in simulator only */
+            #ifdef WOLFSSL_ASYNC_CRYPT_TEST
+                bench_rsaKeyGen(1);
+            #endif
+        #endif
+    #endif
+    #ifndef NO_SW_BENCH
+        bench_rsa(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+        bench_rsa(1);
     #endif
 #endif
 
 #ifndef NO_DH
-    bench_dh();
+    #ifndef NO_SW_BENCH
+        bench_dh(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+        bench_dh(1);
+    #endif
 #endif
 
 #ifdef HAVE_NTRU
@@ -461,18 +849,24 @@ int benchmark_test(void *args)
 #endif
 
 #ifdef HAVE_ECC
-    bench_eccKeyGen();
-    bench_eccKeyAgree();
+    #ifndef NO_SW_BENCH
+        bench_eccMakeKey(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+        /* async supported in simulator only */
+        #ifdef WOLFSSL_ASYNC_CRYPT_TEST
+            bench_eccMakeKey(1);
+        #endif
+    #endif
+    #ifndef NO_SW_BENCH
+        bench_ecc(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+        bench_ecc(1);
+    #endif
     #ifdef HAVE_ECC_ENCRYPT
         bench_eccEncrypt();
     #endif
-
-    #if defined(FP_ECC)
-        wc_ecc_fp_free();
-    #endif
-    #ifdef ECC_CACHE_CURVE
-        wc_ecc_curve_cache_free();
-    #endif
 #endif
 
 #ifdef HAVE_CURVE25519
@@ -495,110 +889,176 @@ int benchmark_test(void *args)
     wolfAsync_DevClose(&devId);
 #endif
 
-#ifdef HAVE_WNR
-    if (wc_FreeNetRandom() < 0) {
-        printf("Failed to free netRandom context\n");
-        exit(-1);
-    }
-#endif
-
-    if (wolfCrypt_Cleanup() != 0) {
-        printf("error with wolfCrypt_Cleanup\n");
-    }
-
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    ShowMemoryTracker();
-#endif
-
-    EXIT_TEST(0);
+    return NULL;
 }
 
 
-#ifdef BENCH_EMBEDDED
-enum BenchmarkBounds {
-    numBlocks  = 25, /* how many kB to test (en/de)cryption */
-    scryptCnt  = 1,
-    ntimes     = 1,
-    genTimes   = 5,  /* public key iterations */
-    agreeTimes = 5
-};
-static const char blockType[] = "kB";   /* used in printf output */
+/* so embedded projects can pull in tests on their own */
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD benchmark_test(void* args)
 #else
-enum BenchmarkBounds {
-    numBlocks  = 50,  /* how many megs to test (en/de)cryption */
-    scryptCnt  = 10,
+int benchmark_test(void *args)
+#endif
+{
+    int ret = 0;
+
+#ifdef WOLFSSL_STATIC_MEMORY
+    ret = wc_LoadStaticMemory(&HEAP_HINT, gBenchMemory, sizeof(gBenchMemory),
+                                                            WOLFMEM_GENERAL, 1);
+    if (ret != 0) {
+        printf("unable to load static memory %d\n", ret);
+        EXIT_TEST(EXIT_FAILURE);
+    }
+#endif /* WOLFSSL_STATIC_MEMORY */
+
+    (void)args;
+
+    wolfCrypt_Init();
+
+    bench_stats_init();
+
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
+    wolfSSL_Debugging_ON();
+#endif
+
+    printf("wolfCrypt Benchmark (min %.1f sec each)\n", BENCH_MIN_RUNTIME_SEC);
+
+#ifdef HAVE_WNR
+    ret = wc_InitNetRandom(wnrConfigFile, NULL, 5000);
+    if (ret != 0) {
+        printf("Whitewood netRandom config init failed %d\n", ret);
+        EXIT_TEST(EXIT_FAILURE);
+    }
+#endif /* HAVE_WNR */
+
+    /* setup bench plain, cipher, key and iv globals */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    ntimes     = 1000,
-    genTimes   = 1000,
-    agreeTimes = 1000
+    bench_plain = (byte*)XMALLOC(BENCH_SIZE+BENCH_CIPHER_ADD, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    bench_cipher = (byte*)XMALLOC(BENCH_SIZE+BENCH_CIPHER_ADD, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    bench_key = (byte*)XMALLOC(sizeof(bench_key_buf), HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    bench_iv = (byte*)XMALLOC(sizeof(bench_iv_buf), HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    if (bench_plain == NULL || bench_cipher == NULL || bench_key == NULL || bench_iv == NULL) {
+        printf("Benchmark cipher buffer alloc failed!\n");
+        EXIT_TEST(EXIT_FAILURE);
+    }
+    XMEMCPY(bench_key, bench_key_buf, sizeof(bench_key_buf));
+    XMEMCPY(bench_iv, bench_iv_buf, sizeof(bench_iv_buf));
+    XMEMSET(bench_plain, 0, BENCH_SIZE+BENCH_CIPHER_ADD);
+    XMEMSET(bench_cipher, 0, BENCH_SIZE+BENCH_CIPHER_ADD);
+#endif
+    (void)bench_plain;
+    (void)bench_cipher;
+    (void)bench_key;
+    (void)bench_iv;
+
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+{
+    int i;
+    int numCpus = wc_AsyncGetNumberOfCpus();
+
+    printf("CPUs: %d\n", numCpus);
+
+    g_threadData = (ThreadData*)XMALLOC(sizeof(ThreadData) * numCpus,
+        HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+    if (g_threadData == NULL) {
+        printf("Thread data alloc failed!\n");
+        EXIT_TEST(EXIT_FAILURE);
+    }
+    g_threadCount = numCpus;
+
+    /* Create threads */
+    for (i = 0; i < numCpus; i++) {
+        ret = wc_AsyncThreadCreate(&g_threadData[i].thread_id,
+            benchmarks_do, &g_threadData[i]);
+        if (ret != 0) {
+            printf("Error creating benchmark thread %d\n", ret);
+            EXIT_TEST(EXIT_FAILURE);
+        }
+    }
+
+    /* Start threads */
+    for (i = 0; i < numCpus; i++) {
+        wc_AsyncThreadJoin(&g_threadData[i].thread_id);
+    }
+
+    XFREE(g_threadData, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+}
 #else
-    ntimes     = 100,
-    genTimes   = 100,
-    agreeTimes = 100
+    benchmarks_do(NULL);
 #endif
-};
-static const char blockType[] = "megs"; /* used in printf output */
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    XFREE(bench_plain, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    XFREE(bench_cipher, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    XFREE(bench_key, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    XFREE(bench_iv, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
 #endif
 
+#ifdef HAVE_WNR
+    ret = wc_FreeNetRandom();
+    if (ret < 0) {
+        printf("Failed to free netRandom context %d\n", ret);
+        EXIT_TEST(EXIT_FAILURE);
+    }
+#endif
+
+    bench_stats_free();
+
+	if (wolfCrypt_Cleanup() != 0) {
+        printf("error with wolfCrypt_Cleanup\n");
+    }
+
+    EXIT_TEST(ret);
+}
+
+
 #ifndef WC_NO_RNG
 void bench_rng(void)
 {
-    int    ret, i;
-    double start, total, persec;
-    int pos, len, remain;
-#ifndef HAVE_LOCAL_RNG
-    WC_RNG rng;
-#endif
+    int    ret, i, count;
+    double start;
+    int    pos, len, remain;
+    WC_RNG myrng;
 
-#ifndef HAVE_LOCAL_RNG
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&myrng, HEAP_HINT, devId);
 #else
-    ret = wc_InitRng(&rng);
+    ret = wc_InitRng(&myrng);
 #endif
     if (ret < 0) {
-        printf("InitRNG failed\n");
+        printf("InitRNG failed %d\n", ret);
         return;
     }
-#endif
 
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            /* Split request to handle large RNG request */
+            pos = 0;
+            remain = (int)BENCH_SIZE;
+            while (remain > 0) {
+                len = remain;
+                if (len > RNG_MAX_BLOCK_LEN)
+                    len = RNG_MAX_BLOCK_LEN;
+                ret = wc_RNG_GenerateBlock(&myrng, &bench_plain[pos], len);
+                if (ret < 0)
+                    goto exit_rng;
 
-    for(i = 0; i < numBlocks; i++) {
-        /* Split request to handle large RNG request */
-        pos = 0;
-        remain = (int)sizeof(plain);
-        while (remain > 0) {
-            len = remain;
-            if (len > RNG_MAX_BLOCK_LEN)
-                len = RNG_MAX_BLOCK_LEN;
-            ret = wc_RNG_GenerateBlock(&rng, &plain[pos], len);
-            if (ret < 0) {
-                printf("wc_RNG_GenerateBlock failed %d\n", ret);
-                break;
+                remain -= len;
+                pos += len;
             }
-            remain -= len;
-            pos += len;
         }
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_rng:
+    bench_stats_sym_finish("RNG", 0, count, start);
+
+    if (ret < 0) {
+        printf("wc_RNG_GenerateBlock failed %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-    printf("RNG      %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-
-#ifndef HAVE_LOCAL_RNG
-    wc_FreeRng(&rng);
-#endif
+    wc_FreeRng(&myrng);
 }
 #endif /* WC_NO_RNG */
 
@@ -606,146 +1066,180 @@ void bench_rng(void)
 #ifndef NO_AES
 
 #ifdef HAVE_AES_CBC
-void bench_aes(int show)
+void bench_aescbc(int doAsync)
 {
-    Aes    enc;
-    double start, total, persec;
-    int    i;
-    int    ret;
+    int    ret, i, count = 0, times;
+    Aes    enc[BENCH_MAX_PENDING];
+    double start;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if ((ret = wc_AesAsyncInit(&enc, devId)) != 0) {
-        printf("wc_AesAsyncInit failed, ret = %d\n", ret);
-        return;
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+                                doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("AesInit failed, ret = %d\n", ret);
+            goto exit;
+        }
+
+        ret = wc_AesSetKey(&enc[i], bench_key, 16, bench_iv, AES_ENCRYPTION);
+        if (ret != 0) {
+            printf("AesSetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-#endif
 
-    ret = wc_AesSetKey(&enc, key, 16, iv, AES_ENCRYPTION);
-    if (ret != 0) {
-        printf("AesSetKey failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_AesCbcEncrypt(&enc[i], bench_plain, bench_cipher,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_aes_enc;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_aes_enc:
+    bench_stats_sym_finish("AES-Enc", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCbcEncrypt(&enc, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    if (show) {
-        printf("AES enc  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-        SHOW_INTEL_CYCLES
-        printf("\n");
-    }
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_AesAsyncFree(&enc);
-    if ((ret = wc_AesAsyncInit(&enc, devId)) != 0) {
-        printf("wc_AesAsyncInit failed, ret = %d\n", ret);
-        return;
-    }
-#endif
 
 #ifdef HAVE_AES_DECRYPT
-    ret = wc_AesSetKey(&enc, key, 16, iv, AES_DECRYPTION);
-    if (ret != 0) {
-        printf("AesSetKey failed, ret = %d\n", ret);
-        return;
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_AesSetKey(&enc[i], bench_key, 16, bench_iv, AES_DECRYPTION);
+        if (ret != 0) {
+            printf("AesSetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCbcDecrypt(&enc, plain, cipher, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_AesCbcDecrypt(&enc[i], bench_plain, bench_cipher,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_aes_dec;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_aes_dec:
+    bench_stats_sym_finish("AES-Dec", doAsync, count, start);
 
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    if (show) {
-        printf("AES dec  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-        SHOW_INTEL_CYCLES
-        printf("\n");
-    }
 #endif /* HAVE_AES_DECRYPT */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_AesAsyncFree(&enc);
-#endif
+exit:
+
+    if (ret < 0) {
+        printf("bench_aescbc failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_AesFree(&enc[i]);
+    }
+
+    bench_async_end();
 }
 #endif /* HAVE_AES_CBC */
 
-#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
-    static byte additional[13];
-    static byte tag[16];
-#endif
-
-
 #ifdef HAVE_AESGCM
-void bench_aesgcm(void)
+void bench_aesgcm(int doAsync)
 {
-    Aes    enc;
-    double start, total, persec;
-    int    i;
+    int    ret, i, count = 0, times;
+    Aes    enc[BENCH_MAX_PENDING];
+    double start;
 
-    wc_AesGcmSetKey(&enc, key, 16);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
+    DECLARE_VAR(bench_tag, byte, AES_AUTH_TAG_SZ, HEAP_HINT);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesGcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
-                        tag, 16, additional, 13);
+    bench_async_begin();
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (bench_additional)
 #endif
-
-    printf("AES-GCM  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-
-#if 0
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for(i = 0; i < numBlocks; i++)
-        wc_AesGcmDecrypt(&enc, plain, cipher, sizeof(cipher), iv, 12,
-                        tag, 16, additional, 13);
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+    {   XMEMSET(bench_additional, 0, AES_AUTH_ADD_SZ); }
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (bench_tag)
 #endif
+    {   XMEMSET(bench_tag, 0, AES_AUTH_TAG_SZ); }
 
-    printf("AES-GCM Decrypt %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-#endif
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+                        doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("AesInit failed, ret = %d\n", ret);
+            goto exit;
+        }
+
+        ret = wc_AesGcmSetKey(&enc[i], bench_key, 16);
+        if (ret != 0) {
+            printf("AesGcmSetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
+    }
+
+    /* GCM uses same routine in backend for both encrypt and decrypt */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_AesGcmEncrypt(&enc[i], bench_cipher,
+                        bench_plain, BENCH_SIZE,
+                        bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
+                        bench_additional, AES_AUTH_ADD_SZ);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_aes_gcm;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_aes_gcm:
+    bench_stats_sym_finish("AES-GCM", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_aesgcm failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_AesFree(&enc[i]);
+    }
+
+    FREE_VAR(bench_additional, HEAP_HINT);
+    FREE_VAR(bench_tag, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif /* HAVE_AESGCM */
 
@@ -754,29 +1248,19 @@ void bench_aesgcm(void)
 void bench_aesctr(void)
 {
     Aes    enc;
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
-    wc_AesSetKeyDirect(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    wc_AesSetKeyDirect(&enc, bench_key, AES_BLOCK_SIZE, bench_iv, AES_ENCRYPTION);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCtrEncrypt(&enc, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("AES-CTR  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_AesCtrEncrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("AES-CTR", 0, count, start);
 }
 #endif /* WOLFSSL_AES_COUNTER */
 
@@ -785,34 +1269,30 @@ void bench_aesctr(void)
 void bench_aesccm(void)
 {
     Aes    enc;
-    double start, total, persec;
-    int    i;
-    int    ret;
+    double start;
+    int    ret, i, count;
 
-    if ((ret = wc_AesCcmSetKey(&enc, key, 16)) != 0) {
+    DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
+    DECLARE_VAR(bench_tag, byte, AES_AUTH_TAG_SZ, HEAP_HINT);
+
+    if ((ret = wc_AesCcmSetKey(&enc, bench_key, 16)) != 0) {
         printf("wc_AesCcmSetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
-                        tag, 16, additional, 13);
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_AesCcmEncrypt(&enc, bench_cipher, bench_plain, BENCH_SIZE,
+                bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
+                bench_additional, AES_AUTH_ADD_SZ);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("AES-CCM", 0, count, start);
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("AES-CCM  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_VAR(bench_additional, HEAP_HINT);
+    FREE_VAR(bench_tag, HEAP_HINT);
 }
 #endif /* HAVE_AESCCM */
 #endif /* !NO_AES */
@@ -821,38 +1301,26 @@ void bench_aesccm(void)
 #ifdef HAVE_POLY1305
 void bench_poly1305()
 {
-    Poly1305    enc;
-    byte   mac[16];
-    double start, total, persec;
-    int    i;
-    int    ret;
+    Poly1305 enc;
+    byte     mac[16];
+    double   start;
+    int      ret, i, count;
 
-
-    ret = wc_Poly1305SetKey(&enc, key, 32);
+    ret = wc_Poly1305SetKey(&enc, bench_key, 32);
     if (ret != 0) {
         printf("Poly1305SetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Poly1305Update(&enc, plain, sizeof(plain));
-
-    wc_Poly1305Final(&enc, mac);
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("POLY1305 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_Poly1305Update(&enc, bench_plain, BENCH_SIZE);
+        }
+        wc_Poly1305Final(&enc, mac);
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("POLY1305", 0, count, start);
 }
 #endif /* HAVE_POLY1305 */
 
@@ -861,178 +1329,197 @@ void bench_poly1305()
 void bench_camellia(void)
 {
     Camellia cam;
-    double start, total, persec;
-    int    i, ret;
+    double   start;
+    int      ret, i, count;
 
-    ret = wc_CamelliaSetKey(&cam, key, 16, iv);
+    ret = wc_CamelliaSetKey(&cam, bench_key, 16, bench_iv);
     if (ret != 0) {
         printf("CamelliaSetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_CamelliaCbcEncrypt(&cam, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("Camellia %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_CamelliaCbcEncrypt(&cam, bench_plain, bench_cipher,
+                BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("Camellia", 0, count, start);
 }
 #endif
 
 
 #ifndef NO_DES3
-void bench_des(void)
+void bench_des(int doAsync)
 {
-    Des3   enc;
-    double start, total, persec;
-    int    i, ret;
+    int    ret, i, count = 0, times;
+    Des3   enc[BENCH_MAX_PENDING];
+    double start;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_Des3AsyncInit(&enc, devId) != 0)
-        printf("des3 async init failed\n");
-#endif
-    ret = wc_Des3_SetKey(&enc, key, iv, DES_ENCRYPTION);
-    if (ret != 0) {
-        printf("Des3_SetKey failed, ret = %d\n", ret);
-        return;
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_Des3Init(&enc[i], HEAP_HINT,
+                                doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("Des3Init failed, ret = %d\n", ret);
+            goto exit;
+        }
+
+        ret = wc_Des3_SetKey(&enc[i], bench_key, bench_iv, DES_ENCRYPTION);
+        if (ret != 0) {
+            printf("Des3_SetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Des3_CbcEncrypt(&enc, plain, cipher, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_Des3_CbcEncrypt(&enc[i], bench_plain, bench_cipher,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_3des;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_3des:
+    bench_stats_sym_finish("3DES", doAsync, count, start);
 
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
+exit:
 
-    printf("3DES     %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_Des3AsyncFree(&enc);
-#endif
+    if (ret < 0) {
+        printf("bench_des failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Des3Free(&enc[i]);
+    }
+
+    bench_async_end();
 }
-#endif
+#endif /* !NO_DES3 */
 
 
 #ifdef HAVE_IDEA
 void bench_idea(void)
 {
     Idea   enc;
-    double start, total, persec;
-    int    i, ret;
+    double start;
+    int    ret, i, count;
 
-    ret = wc_IdeaSetKey(&enc, key, IDEA_KEY_SIZE, iv, IDEA_ENCRYPTION);
+    ret = wc_IdeaSetKey(&enc, bench_key, IDEA_KEY_SIZE, bench_iv,
+        IDEA_ENCRYPTION);
     if (ret != 0) {
         printf("Des3_SetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_IdeaCbcEncrypt(&enc, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("IDEA     %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_IdeaCbcEncrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("IDEA", 0, count, start);
 }
 #endif /* HAVE_IDEA */
 
 
 #ifndef NO_RC4
-void bench_arc4(void)
+void bench_arc4(int doAsync)
 {
-    Arc4   enc;
-    double start, total, persec;
-    int    i;
+    int    ret, i, count = 0, times;
+    Arc4   enc[BENCH_MAX_PENDING];
+    double start;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_Arc4AsyncInit(&enc, devId) != 0)
-        printf("arc4 async init failed\n");
-#endif
+    bench_async_begin();
 
-    wc_Arc4SetKey(&enc, key, 16);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Arc4Process(&enc, cipher, plain, sizeof(plain));
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_Arc4Init(&enc[i], HEAP_HINT,
+                            doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("Arc4Init failed, ret = %d\n", ret);
+            goto exit;
+        }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
+        ret = wc_Arc4SetKey(&enc[i], bench_key, 16);
+        if (ret != 0) {
+            printf("Arc4SetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
+    }
 
-    printf("ARC4     %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_Arc4AsyncFree(&enc);
-#endif
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_Arc4Process(&enc[i], bench_cipher, bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_arc4;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_arc4:
+    bench_stats_sym_finish("ARC4", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_arc4 failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Arc4Free(&enc[i]);
+    }
+
+    bench_async_end();
 }
-#endif
+#endif /* !NO_RC4 */
 
 
 #ifdef HAVE_HC128
 void bench_hc128(void)
 {
     HC128  enc;
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
-    wc_Hc128_SetKey(&enc, key, iv);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    wc_Hc128_SetKey(&enc, bench_key, bench_iv);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Hc128_Process(&enc, cipher, plain, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("HC128    %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_Hc128_Process(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("HC128", 0, count, start);
 }
 #endif /* HAVE_HC128 */
 
@@ -1040,29 +1527,20 @@ void bench_hc128(void)
 #ifndef NO_RABBIT
 void bench_rabbit(void)
 {
-    Rabbit  enc;
-    double start, total, persec;
-    int    i;
+    Rabbit enc;
+    double start;
+    int    i, count;
 
-    wc_RabbitSetKey(&enc, key, iv);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    wc_RabbitSetKey(&enc, bench_key, bench_iv);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_RabbitProcess(&enc, cipher, plain, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("RABBIT   %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_RabbitProcess(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("RABBIT", 0, count, start);
 }
 #endif /* NO_RABBIT */
 
@@ -1071,315 +1549,496 @@ void bench_rabbit(void)
 void bench_chacha(void)
 {
     ChaCha enc;
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
-    wc_Chacha_SetKey(&enc, key, 16);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for (i = 0; i < numBlocks; i++) {
-        wc_Chacha_SetIV(&enc, iv, 0);
-        wc_Chacha_Process(&enc, cipher, plain, sizeof(plain));
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("CHACHA   %d %s took %5.3f seconds, %8.3f MB/s", numBlocks, blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    wc_Chacha_SetKey(&enc, bench_key, 16);
 
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_Chacha_SetIV(&enc, bench_iv, 0);
+            wc_Chacha_Process(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("CHACHA", 0, count, start);
 }
 #endif /* HAVE_CHACHA*/
 
 #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
 void bench_chacha20_poly1305_aead(void)
 {
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
     byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE];
-    XMEMSET( authTag, 0, sizeof( authTag ) );
-
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for (i = 0; i < numBlocks; i++)
-    {
-        wc_ChaCha20Poly1305_Encrypt(key, iv, NULL, 0, plain, sizeof(plain),
-                                    cipher, authTag );
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("CHA-POLY %d %s took %5.3f seconds, %8.3f MB/s",
-           numBlocks, blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    XMEMSET(authTag, 0, sizeof(authTag));
 
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_ChaCha20Poly1305_Encrypt(bench_key, bench_iv, NULL, 0,
+                bench_plain, BENCH_SIZE, bench_cipher, authTag);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("CHA-POLY", 0, count, start);
 }
 #endif /* HAVE_CHACHA && HAVE_POLY1305 */
 
 
 #ifndef NO_MD5
-void bench_md5(void)
+void bench_md5(int doAsync)
 {
-    Md5    hash;
-    byte   digest[MD5_DIGEST_SIZE];
-    double start, total, persec;
-    int    i;
+    Md5    hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, MD5_DIGEST_SIZE, HEAP_HINT);
 
-    wc_InitMd5(&hash);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Md5Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
 
-    wc_Md5Final(&hash, digest);
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitMd5_ex(&hash[i], HEAP_HINT,
+                    doAsync ? devId : INVALID_DEVID);
+        if (ret != 0) {
+            printf("InitMd5_ex failed, ret = %d\n", ret);
+            goto exit;
+        }
+    }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Md5Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_md5;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Md5Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_md5;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_md5:
+    bench_stats_sym_finish("MD5", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_md5 failed: %d\n", ret);
+    }
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Md5Free(&hash[i]);
+    }
 #endif
 
-    printf("MD5      %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
-#endif /* NO_MD5 */
+#endif /* !NO_MD5 */
 
 
 #ifndef NO_SHA
-void bench_sha(void)
+void bench_sha(int doAsync)
 {
-    Sha    hash;
-    byte   digest[SHA_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha    hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha(&hash);
-    if (ret != 0) {
-        printf("InitSha failed, ret = %d\n", ret);
-        return;
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
+        if (ret != 0) {
+            printf("InitSha failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_ShaUpdate(&hash, plain, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    wc_ShaFinal(&hash, digest);
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_ShaUpdate(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+        times = 0;
+        do {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_ShaFinal(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha:
+    bench_stats_sym_finish("SHA", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha failed: %d\n", ret);
+    }
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_ShaFree(&hash[i]);
+    }
 #endif
 
-    printf("SHA      %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif /* NO_SHA */
 
 
 #ifdef WOLFSSL_SHA224
-void bench_sha224(void)
+void bench_sha224(int doAsync)
 {
-    Sha224 hash;
-    byte   digest[SHA224_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha224 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA224_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha224(&hash);
-    if (ret != 0) {
-        printf("InitSha224 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha224Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha224_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha224Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha224_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha224Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha224Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha224Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha224;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha224Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha224;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha224:
+    bench_stats_sym_finish("SHA-224", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha224 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha224Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-224  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
 #ifndef NO_SHA256
-void bench_sha256(void)
+void bench_sha256(int doAsync)
 {
-    Sha256 hash;
-    byte   digest[SHA256_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha256 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA256_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha256(&hash);
-    if (ret != 0) {
-        printf("InitSha256 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha256Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha256_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha256Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha256_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha256Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha256Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha256Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha256;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha256Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha256;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha256:
+    bench_stats_sym_finish("SHA-256", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha256 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha256Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-256  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
 #ifdef WOLFSSL_SHA384
-void bench_sha384(void)
+void bench_sha384(int doAsync)
 {
-    Sha384 hash;
-    byte   digest[SHA384_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha384 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA384_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha384(&hash);
-    if (ret != 0) {
-        printf("InitSha384 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha384Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha384_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha384Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha384_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha384Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha384Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha384Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha384;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha384Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha384;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha384:
+    bench_stats_sym_finish("SHA-384", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha384 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha384Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-384  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
 #ifdef WOLFSSL_SHA512
-void bench_sha512(void)
+void bench_sha512(int doAsync)
 {
-    Sha512 hash;
-    byte   digest[SHA512_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha512 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA512_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha512(&hash);
-    if (ret != 0) {
-        printf("InitSha512 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha512Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha512_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha512Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha512_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha512Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha512Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha512Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha512;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha512Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha512;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha512:
+    bench_stats_sym_finish("SHA-512", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha512 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha512Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-512  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
@@ -1388,30 +2047,20 @@ void bench_ripemd(void)
 {
     RipeMd hash;
     byte   digest[RIPEMD_DIGEST_SIZE];
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
     wc_InitRipeMd(&hash);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_RipeMdUpdate(&hash, plain, sizeof(plain));
-
-    wc_RipeMdFinal(&hash, digest);
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("RIPEMD   %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_RipeMdUpdate(&hash, bench_plain, BENCH_SIZE);
+        }
+        wc_RipeMdFinal(&hash, digest);
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("RIPEMD", 0, count, start);
 }
 #endif
 
@@ -1421,43 +2070,32 @@ void bench_blake2(void)
 {
     Blake2b b2b;
     byte    digest[64];
-    double  start, total, persec;
-    int     i, ret;
+    double  start;
+    int     ret, i, count;
 
     ret = wc_InitBlake2b(&b2b, 64);
     if (ret != 0) {
         printf("InitBlake2b failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Blake2bUpdate(&b2b, plain, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            ret = wc_Blake2bUpdate(&b2b, bench_plain, BENCH_SIZE);
+            if (ret != 0) {
+                printf("Blake2bUpdate failed, ret = %d\n", ret);
+                return;
+            }
+        }
+        ret = wc_Blake2bFinal(&b2b, digest, 64);
         if (ret != 0) {
-            printf("Blake2bUpdate failed, ret = %d\n", ret);
+            printf("Blake2bFinal failed, ret = %d\n", ret);
             return;
         }
-    }
-
-    ret = wc_Blake2bFinal(&b2b, digest, 64);
-    if (ret != 0) {
-        printf("Blake2bFinal failed, ret = %d\n", ret);
-        return;
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("BLAKE2b  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("BLAKE2b", 0, count, start);
 }
 #endif
 
@@ -1469,43 +2107,32 @@ void bench_cmac(void)
     Cmac    cmac;
     byte    digest[AES_BLOCK_SIZE];
     word32  digestSz = sizeof(digest);
-    double  start, total, persec;
-    int     i, ret;
+    double  start;
+    int     ret, i, count;
 
-    ret = wc_InitCmac(&cmac, key, 16, WC_CMAC_AES, NULL);
+    ret = wc_InitCmac(&cmac, bench_key, 16, WC_CMAC_AES, NULL);
     if (ret != 0) {
         printf("InitCmac failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_CmacUpdate(&cmac, plain, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            ret = wc_CmacUpdate(&cmac, bench_plain, BENCH_SIZE);
+            if (ret != 0) {
+                printf("CmacUpdate failed, ret = %d\n", ret);
+                return;
+            }
+        }
+        ret = wc_CmacFinal(&cmac, digest, &digestSz);
         if (ret != 0) {
-            printf("CmacUpdate failed, ret = %d\n", ret);
+            printf("CmacFinal failed, ret = %d\n", ret);
             return;
         }
-    }
-
-    ret = wc_CmacFinal(&cmac, digest, &digestSz);
-    if (ret != 0) {
-        printf("CmacFinal failed, ret = %d\n", ret);
-        return;
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("AES-CMAC %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("AES-CMAC", 0, count, start);
 }
 
 #endif /* WOLFSSL_CMAC */
@@ -1518,27 +2145,86 @@ void bench_scrypt(void)
     double start, total, each, milliEach;
     int    ret, i;
 
-    start = current_time(1);
-    for (i = 0; i < scryptCnt; i++) {
-        ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
-                        (byte*)"SodiumChloride", 14, 14, 8, 1, sizeof(derived));
-        if (ret != 0) {
-            printf("scrypt failed, ret = %d\n", ret);
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < scryptCnt; i++) {
+            ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
+                            (byte*)"SodiumChloride", 14, 14, 8, 1, sizeof(derived));
+            if (ret != 0) {
+                printf("scrypt failed, ret = %d\n", ret);
+                goto exit;
+            }
         }
-    }
-    total = current_time(0) - start;
-    each  = total / scryptCnt;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("scrypt   %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, scryptCnt);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("scrypt", 0, "", 0, count, start);
 }
 
 #endif /* HAVE_SCRYPT */
 
 #ifndef NO_RSA
 
+#if defined(WOLFSSL_KEY_GEN)
+void bench_rsaKeyGen(int doAsync)
+{
+    RsaKey genKey[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    int    k, keySz;
+    const int  keySizes[2] = {1024, 2048};
+    const long rsa_e_val = 65537;
+
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(genKey, 0, sizeof(genKey));
+
+    for (k = 0; k < (int)(sizeof(keySizes)/sizeof(int)); k++) {
+        keySz = keySizes[k];
+
+        bench_stats_start(&count, &start);
+        do {
+            /* while free pending slots in queue, submit ops */
+            for (times = 0; times < genTimes || BENCH_ASYNC_IS_PEND(); ) {
+                bench_async_poll();
+
+                for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                    if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×, genTimes)) {
+
+                        wc_FreeRsaKey(&genKey[i]);
+                        ret = wc_InitRsaKey_ex(&genKey[i], HEAP_HINT,
+                            doAsync ? devId : INVALID_DEVID);
+                        if (ret < 0) {
+                            goto exit;
+                        }
+
+                        ret = wc_MakeRsaKey(&genKey[i], keySz, rsa_e_val, &rng);
+                        if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×)) {
+                            goto exit;
+                        }
+                    }
+                } /* for i */
+            } /* for times */
+            count += times;
+        } while (bench_stats_sym_check(start));
+    exit:
+        bench_stats_asym_finish("RSA", keySz, "key gen", doAsync, count, start);
+
+        if (ret < 0) {
+            printf("bench_rsaKeyGen failed: %d\n", ret);
+            break;
+        }
+    }
+
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_FreeRsaKey(&genKey[i]);
+    }
+
+    bench_async_end();
+}
+#endif /* WOLFSSL_KEY_GEN */
 
 #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
     #if defined(WOLFSSL_MDK_SHELL)
@@ -1552,21 +2238,23 @@ void bench_scrypt(void)
     #endif
 #endif
 
-void bench_rsa(void)
+#define RSA_BUF_SIZE 256  /* for up to 2048 bit */
+
+void bench_rsa(int doAsync)
 {
-    int    i;
-    int    ret;
-    size_t bytes;
-    word32 idx = 0;
+    int         ret, i, times, count = 0;
+    size_t      bytes;
+    word32      idx = 0;
     const byte* tmp;
+    const char* messageStr = "Everyone gets Friday off.";
+    const int   len = (int)XSTRLEN((char*)messageStr);
+    double      start = 0.0f;
+    RsaKey      rsaKey[BENCH_MAX_PENDING];
+    int         rsaKeySz = RSA_BUF_SIZE * 8; /* used in printf */
 
-    const byte message[] = "Everyone gets Friday off.";
-    byte      enc[256];  /* for up to 2048 bit */
-    const int len = (int)strlen((char*)message);
-    double    start, total, each, milliEach;
-
-    RsaKey rsaKey;
-    int    rsaKeySz = 2048; /* used in printf */
+    DECLARE_VAR_INIT(message, byte, len, messageStr, HEAP_HINT);
+    DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, RSA_BUF_SIZE, HEAP_HINT);
+    DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, RSA_BUF_SIZE, HEAP_HINT);
 
 #ifdef USE_CERT_BUFFERS_1024
     tmp = rsa_key_der_1024;
@@ -1579,271 +2267,112 @@ void bench_rsa(void)
     #error "need a cert buffer size"
 #endif /* USE_CERT_BUFFERS */
 
-    if ((ret = wc_InitRsaKey(&rsaKey, HEAP_HINT)) < 0) {
-        printf("InitRsaKey failed! %d\n", ret);
-        return;
-    }
-
-    /* decode the private key */
-    ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey, (word32)bytes);
-
-    start = current_time(1);
-
-    for (i = 0; i < ntimes; i++) {
-        ret = wc_RsaPublicEncrypt(message, len, enc, sizeof(enc),
-                                                        &rsaKey, &rng);
-        if (ret < 0) {
-            break;
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d public          %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
-
-    if (ret < 0) {
-        printf("Rsa Public Encrypt failed! %d\n", ret);
-        return;
-    }
-
-#ifdef WC_RSA_BLINDING
-    wc_RsaSetRNG(&rsaKey, &rng);
-#endif
-    start = current_time(1);
-
-    /* capture resulting encrypt length */
-    idx = ret;
-
-    for (i = 0; i < ntimes; i++) {
-        byte  out[256];  /* for up to 2048 bit */
-
-        ret = wc_RsaPrivateDecrypt(enc, idx, out, sizeof(out), &rsaKey);
-        if (ret < 0 && ret != WC_PENDING_E) {
-            break;
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d private         %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
-
-    wc_FreeRsaKey(&rsaKey);
-}
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-void bench_rsa_async(void)
-{
-    int    i;
-    int    ret;
-    size_t bytes;
-    word32 idx = 0;
-    const byte* tmp;
-
-    const byte message[] = "Everyone gets Friday off.";
-    byte      enc[256];  /* for up to 2048 bit */
-    const int len = (int)strlen((char*)message);
-    double    start, total, each, milliEach;
-
-    RsaKey rsaKey[WOLF_ASYNC_MAX_PENDING];
-    int    rsaKeySz = 2048; /* used in printf */
-
-    WOLF_EVENT events[WOLF_ASYNC_MAX_PENDING];
-    WOLF_EVENT_QUEUE eventQueue;
-    int evtNum, asyncDone, asyncPend;
-
-#ifdef USE_CERT_BUFFERS_1024
-    tmp = rsa_key_der_1024;
-    bytes = sizeof_rsa_key_der_1024;
-    rsaKeySz = 1024;
-#elif defined(USE_CERT_BUFFERS_2048)
-    tmp = rsa_key_der_2048;
-    bytes = sizeof_rsa_key_der_2048;
-#else
-    #error "need a cert buffer size"
-#endif /* USE_CERT_BUFFERS */
-
-    /* init event queue */
-    ret = wolfEventQueue_Init(&eventQueue);
-    if (ret != 0) {
-        return;
-    }
+    bench_async_begin();
 
     /* clear for done cleanup */
-    XMEMSET(&events, 0, sizeof(events));
-    XMEMSET(&rsaKey, 0, sizeof(rsaKey));
+    XMEMSET(rsaKey, 0, sizeof(rsaKey));
 
-    /* init events and keys */
-    for (i = 0; i < WOLF_ASYNC_MAX_PENDING; i++) {
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
         /* setup an async context for each key */
-        if ((ret = wc_InitRsaKey_ex(&rsaKey[i], HEAP_HINT, devId)) < 0) {
-            goto done;
+        if ((ret = wc_InitRsaKey_ex(&rsaKey[i], HEAP_HINT,
+                                        doAsync ? devId : INVALID_DEVID)) < 0) {
+            goto exit;
         }
+
     #ifdef WC_RSA_BLINDING
         wc_RsaSetRNG(&rsaKey[i], &rng);
     #endif
-        if ((ret = wolfAsync_EventInit(&events[i],
-                WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &rsaKey[i].asyncDev)) != 0) {
-            goto done;
-        }
-        events[i].pending = 0; /* Reset pending flag */
 
         /* decode the private key */
         idx = 0;
         if ((ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey[i],
                                                         (word32)bytes)) != 0) {
             printf("wc_RsaPrivateKeyDecode failed! %d\n", ret);
-            goto done;
+            goto exit;
         }
     }
 
-    /* begin public async RSA */
-    start = current_time(1);
+    /* begin public RSA */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < ntimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    asyncPend = 0;
-    for (i = 0; i < ntimes; ) {
-
-        /* while free pending slots in queue, submit RSA operations */
-        for (evtNum = 0; evtNum < WOLF_ASYNC_MAX_PENDING; evtNum++) {
-            if (events[evtNum].done || (events[evtNum].pending == 0 &&
-                                                    (i + asyncPend) < ntimes))
-            {
-                /* check for event error */
-                if (events[evtNum].ret != WC_PENDING_E && events[evtNum].ret < 0) {
-                    printf("wc_RsaPublicEncrypt: Async event error: %d\n", events[evtNum].ret);
-                    goto done;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, ntimes)) {
+                    ret = wc_RsaPublicEncrypt(message, len, enc[i],
+                                            RSA_BUF_SIZE, &rsaKey[i], &rng);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×)) {
+                        goto exit_rsa_pub;
+                    }
                 }
-
-                ret = wc_RsaPublicEncrypt(message, len, enc, sizeof(enc),
-                                                        &rsaKey[evtNum], &rng);
-                if (ret == WC_PENDING_E) {
-                    ret = wc_RsaAsyncHandle(&rsaKey[evtNum], &eventQueue,
-                                                            &events[evtNum]);
-                    if (ret != 0) goto done;
-                    asyncPend++;
-                }
-                else if (ret >= 0) {
-                    /* operation completed */
-                    i++;
-                    asyncPend--;
-                    events[evtNum].done = 0;
-                }
-                else {
-                    printf("wc_RsaPublicEncrypt failed: %d\n", ret);
-                    goto done;
-                }
-            }
-        } /* for evtNum */
-
-        /* poll until there are events done */
-        if (asyncPend > 0) {
-            do {
-                ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
-                                        WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
-                if (ret != 0) goto done;
-            } while (asyncDone == 0);
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d public async    %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_rsa_pub:
+    bench_stats_asym_finish("RSA", rsaKeySz, "public", doAsync, count, start);
 
     if (ret < 0) {
-        goto done;
+        goto exit;
     }
 
-
-    /* begin private async RSA */
-    start = current_time(1);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&rsaKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
 
     /* capture resulting encrypt length */
-    idx = sizeof(enc); /* fixed at 2048 bit */
+    idx = RSA_BUF_SIZE; /* fixed at 2048 bit */
 
-    asyncPend = 0;
-    for (i = 0; i < ntimes; ) {
-        byte  out[256];  /* for up to 2048 bit */
+    /* begin private async RSA */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < ntimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-        /* while free pending slots in queue, submit RSA operations */
-        for (evtNum = 0; evtNum < WOLF_ASYNC_MAX_PENDING; evtNum++) {
-            if (events[evtNum].done || (events[evtNum].pending == 0 &&
-                                                    (i + asyncPend) < ntimes))
-            {
-                /* check for event error */
-                if (events[evtNum].ret != WC_PENDING_E && events[evtNum].ret < 0) {
-                    printf("wc_RsaPrivateDecrypt: Async event error: %d\n", events[evtNum].ret);
-                    goto done;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, ntimes)) {
+                    ret = wc_RsaPrivateDecrypt(enc[i], idx, out[i],
+                                                    RSA_BUF_SIZE, &rsaKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×)) {
+                        goto exit;
+                    }
                 }
-
-                ret = wc_RsaPrivateDecrypt(enc, idx, out, sizeof(out),
-                                                            &rsaKey[evtNum]);
-                if (ret == WC_PENDING_E) {
-                    ret = wc_RsaAsyncHandle(&rsaKey[evtNum], &eventQueue,
-                                                            &events[evtNum]);
-                    if (ret != 0) goto done;
-                    asyncPend++;
-                }
-                else if (ret >= 0) {
-                    /* operation completed */
-                    i++;
-                    asyncPend--;
-                    events[evtNum].done = 0;
-                }
-                else {
-                    printf("wc_RsaPrivateDecrypt failed: %d\n", ret);
-                    goto done;
-                }
-            }
-        } /* for evtNum */
-
-        /* poll until there are events done */
-        if (asyncPend > 0) {
-            do {
-                ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
-                                        WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
-                if (ret != 0) goto done;
-            } while (asyncDone == 0);
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d private async   %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
-
-done:
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("RSA", rsaKeySz, "private", doAsync, count, start);
 
     if (ret < 0) {
-        printf("bench_rsa_async failed: %d\n", ret);
+        printf("bench_rsa failed: %d\n", ret);
     }
 
     /* cleanup */
-    for (i = 0; i < WOLF_ASYNC_MAX_PENDING; i++) {
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
         wc_FreeRsaKey(&rsaKey[i]);
     }
 
-    /* free event queue */
-    wolfEventQueue_Free(&eventQueue);
+    FREE_ARRAY(enc, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_ARRAY(out, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_VAR(message, HEAP_HINT);
+
+    bench_async_end();
 }
-#endif /* WOLFSSL_ASYNC_CRYPT */
 
 #endif /* !NO_RSA */
 
 
 #ifndef NO_DH
 
-
 #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
     #if defined(WOLFSSL_MDK_SHELL)
         static char *certDHname = "certs/dh2048.der";
@@ -1858,27 +2387,35 @@ done:
     #endif
 #endif
 
-void bench_dh(void)
+#define BENCH_DH_KEY_SIZE  256 /* for 2048 bit */
+#define BENCH_DH_PRIV_SIZE (BENCH_DH_KEY_SIZE/8)
+
+void bench_dh(int doAsync)
 {
-    int    i ;
-    size_t bytes;
-    word32 idx = 0, pubSz, privSz = 0, pubSz2, privSz2, agreeSz;
+    int    ret, i;
+    int    count = 0, times;
     const byte* tmp = NULL;
-
-    byte   pub[256];    /* for 2048 bit */
-    byte   pub2[256];   /* for 2048 bit */
-    byte   agree[256];  /* for 2048 bit */
-    byte   priv[32];    /* for 2048 bit */
-    byte   priv2[32];   /* for 2048 bit */
-
-    double start, total, each, milliEach;
-    DhKey  dhKey;
+    double start = 0.0f;
+    DhKey  dhKey[BENCH_MAX_PENDING];
     int    dhKeySz = 2048; /* used in printf */
+#ifndef NO_ASN
+    size_t bytes;
+    word32 idx;
+#endif
+    word32 pubSz[BENCH_MAX_PENDING];
+    word32 privSz[BENCH_MAX_PENDING];
+    word32 pubSz2;
+    word32 privSz2;
+    word32 agreeSz[BENCH_MAX_PENDING];
+
+    DECLARE_ARRAY(pub, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
+    DECLARE_VAR(pub2, byte, BENCH_DH_KEY_SIZE, HEAP_HINT);
+    DECLARE_ARRAY(agree, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
+    DECLARE_ARRAY(priv, byte, BENCH_MAX_PENDING, BENCH_DH_PRIV_SIZE, HEAP_HINT);
+    DECLARE_VAR(priv2, byte, BENCH_DH_PRIV_SIZE, HEAP_HINT);
 
-    (void)idx;
     (void)tmp;
 
-
 #if defined(NO_ASN)
     dhKeySz = 1024;
     /* do nothing, but don't use default FILE */
@@ -1893,90 +2430,112 @@ void bench_dh(void)
     #error "need to define a cert buffer size"
 #endif /* USE_CERT_BUFFERS */
 
+    bench_async_begin();
 
-    if (wc_InitDhKey(&dhKey) != 0) {
-        printf("InitDhKey failed!\n");
-        return;
+    /* clear for done cleanup */
+    XMEMSET(dhKey, 0, sizeof(dhKey));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        /* setup an async context for each key */
+        ret = wc_InitDhKey_ex(&dhKey[i], HEAP_HINT,
+                        doAsync ? devId : INVALID_DEVID);
+        if (ret != 0)
+            goto exit;
+
+        /* setup key */
+    #ifdef NO_ASN
+        ret = wc_DhSetKey(&dhKey[i], dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
+    #else
+        idx = 0;
+        ret = wc_DhKeyDecode(tmp, &idx, &dhKey[i], (word32)bytes);
+    #endif
+        if (ret != 0) {
+            printf("DhKeyDecode failed %d, can't benchmark\n", ret);
+            goto exit;
+        }
     }
 
-#ifdef NO_ASN
-    bytes = wc_DhSetKey(&dhKey, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-#else
-    bytes = wc_DhKeyDecode(tmp, &idx, &dhKey, (word32)bytes);
-#endif
-    if (bytes != 0) {
-        printf("dhekydecode failed, can't benchmark\n");
-        return;
+    /* Key Gen */
+    bench_stats_start(&count, &start);
+    do {
+        /* while free pending slots in queue, submit ops */
+        for (times = 0; times < genTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×, genTimes)) {
+                    privSz[i] = 0;
+                    ret = wc_DhGenerateKeyPair(&dhKey[i], &rng, priv[i], &privSz[i],
+                        pub[i], &pubSz[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×)) {
+                        goto exit_dh_gen;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_dh_gen:
+    bench_stats_asym_finish("DH", dhKeySz, "key gen", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
     }
 
-    start = current_time(1);
+    /* Generate key to use as other public */
+    ret = wc_DhGenerateKeyPair(&dhKey[0], &rng, priv2, &privSz2, pub2, &pubSz2);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wc_AsyncWait(ret, &dhKey[0].asyncDev, WC_ASYNC_FLAG_NONE);
 
-    for (i = 0; i < ntimes; i++)
-        wc_DhGenerateKeyPair(&dhKey, &rng, priv, &privSz, pub, &pubSz);
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("DH  %d key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", dhKeySz, milliEach, ntimes);
-
-    wc_DhGenerateKeyPair(&dhKey, &rng, priv2, &privSz2, pub2, &pubSz2);
-    start = current_time(1);
-
-    for (i = 0; i < ntimes; i++)
-        wc_DhAgree(&dhKey, agree, &agreeSz, priv, privSz, pub2, pubSz2);
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("DH  %d key agreement   %6.3f milliseconds, avg over %d"
-           " iterations\n", dhKeySz, milliEach, ntimes);
-
-    wc_FreeDhKey(&dhKey);
-}
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&dhKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
 #endif
 
-#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
-void bench_rsaKeyGen(void)
-{
-    RsaKey genKey;
-    double start, total, each, milliEach;
-    int    i;
+    /* Key Agree */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    /* 1024 bit */
-    start = current_time(1);
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×, agreeTimes)) {
+                    ret = wc_DhAgree(&dhKey[i], agree[i], &agreeSz[i], priv[i], privSz[i],
+                        pub2, pubSz2);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×)) {
+                        goto exit;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("DH", dhKeySz, "key agree", doAsync, count, start);
 
-    for(i = 0; i < genTimes; i++) {
-        wc_InitRsaKey(&genKey, HEAP_HINT);
-        wc_MakeRsaKey(&genKey, 1024, 65537, &rng);
-        wc_FreeRsaKey(&genKey);
+    if (ret < 0) {
+        printf("bench_dh failed: %d\n", ret);
     }
 
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("RSA 1024 key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
-
-    /* 2048 bit */
-    start = current_time(1);
-
-    for(i = 0; i < genTimes; i++) {
-        wc_InitRsaKey(&genKey, HEAP_HINT);
-        wc_MakeRsaKey(&genKey, 2048, 65537, &rng);
-        wc_FreeRsaKey(&genKey);
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_FreeDhKey(&dhKey[i]);
     }
 
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("RSA 2048 key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    FREE_ARRAY(pub, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_VAR(pub2, HEAP_HINT);
+    FREE_ARRAY(priv, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_VAR(priv2, HEAP_HINT);
+    FREE_ARRAY(agree, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
-#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_DH */
+
 #ifdef HAVE_NTRU
 byte GetEntropy(ENTROPY_CMD cmd, byte* out);
 
@@ -2002,7 +2561,7 @@ byte GetEntropy(ENTROPY_CMD cmd, byte* out)
 void bench_ntru(void)
 {
     int    i;
-    double start, total, each, milliEach;
+    double start;
 
     byte   public_key[1027];
     word16 public_key_len = sizeof(public_key);
@@ -2027,7 +2586,6 @@ void bench_ntru(void)
         'w', 'o', 'l', 'f', 'S', 'S', 'L', ' ', 'N', 'T', 'R', 'U'
     };
 
-    printf("\n");
     for (ntruBits = 128; ntruBits < 257; ntruBits += 64) {
         switch (ntruBits) {
             case 128:
@@ -2082,8 +2640,8 @@ void bench_ntru(void)
             printf("NTRU error occurred requesting the buffer size needed\n");
             return;
         }
-        start = current_time(1);
 
+        bench_stats_start(&i, &start);
         for (i = 0; i < ntimes; i++) {
             ret = ntru_crypto_ntru_encrypt(drbg, public_key_len, public_key,
                     sizeof(aes_key), aes_key, &ciphertext_len, ciphertext);
@@ -2092,21 +2650,14 @@ void bench_ntru(void)
                 return;
             }
         }
-        ret = ntru_crypto_drbg_uninstantiate(drbg);
+        bench_stats_asym_finish("NTRU", ntruBits, "encryption", 0, i, start);
 
+        ret = ntru_crypto_drbg_uninstantiate(drbg);
         if (ret != DRBG_OK) {
             printf("NTRU error occurred uninstantiating the DRBG\n");
             return;
         }
 
-        total = current_time(0) - start;
-        each  = total / ntimes;   /* per second   */
-        milliEach = each * 1000; /* milliseconds */
-
-        printf("NTRU %d encryption took %6.3f milliseconds, avg over %d"
-           " iterations\n", ntruBits, milliEach, ntimes);
-
-
         ret = ntru_crypto_ntru_decrypt(private_key_len, private_key,
                 ciphertext_len, ciphertext, &plaintext_len, NULL);
 
@@ -2116,8 +2667,8 @@ void bench_ntru(void)
         }
 
         plaintext_len = sizeof(plaintext);
-        start = current_time(1);
 
+        bench_stats_start(&i, &start);
         for (i = 0; i < ntimes; i++) {
             ret = ntru_crypto_ntru_decrypt(private_key_len, private_key,
                                       ciphertext_len, ciphertext,
@@ -2128,20 +2679,14 @@ void bench_ntru(void)
                 return;
             }
         }
-
-        total = current_time(0) - start;
-        each  = total / ntimes;   /* per second   */
-        milliEach = each * 1000; /* milliseconds */
-
-        printf("NTRU %d decryption took %6.3f milliseconds, avg over %d"
-           " iterations\n", ntruBits, milliEach, ntimes);
+        bench_stats_asym_finish("NTRU", ntruBits, "decryption", 0, i, start);
     }
 
 }
 
 void bench_ntruKeyGen(void)
 {
-    double start, total, each, milliEach;
+    double start;
     int    i;
 
     byte   public_key[1027];
@@ -2180,15 +2725,14 @@ void bench_ntruKeyGen(void)
         /* set key sizes */
         ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
                                                   NULL, &private_key_len, NULL);
-        start = current_time(1);
 
-        for(i = 0; i < genTimes; i++) {
+        bench_stats_start(&i, &start);
+        for (i = 0; i < genTimes; i++) {
             ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
                                          public_key, &private_key_len,
                                          private_key);
         }
-
-        total = current_time(0) - start;
+        bench_stats_asym_finish("NTRU", ntruBits, "key gen", 0, i, start);
 
         if (ret != NTRU_OK) {
             printf("keygen failed\n");
@@ -2201,182 +2745,313 @@ void bench_ntruKeyGen(void)
             printf("NTRU drbg uninstantiate failed\n");
             return;
         }
-
-        each = total / genTimes;
-        milliEach = each * 1000;
-
-        printf("NTRU %d key generation  %6.3f milliseconds, avg over %d"
-            " iterations\n", ntruBits, milliEach, genTimes);
     }
 }
 #endif
 
 #ifdef HAVE_ECC
-void bench_eccKeyGen(void)
+#define BENCH_ECC_SIZE  32
+
+void bench_eccMakeKey(int doAsync)
 {
-    ecc_key genKey;
-    double start, total, each, milliEach;
-    int    i;
+    int ret, i, times, count;
+    const int keySize = BENCH_ECC_SIZE;
+    ecc_key genKey[BENCH_MAX_PENDING];
+    double start;
 
-    /* 256 bit */
-    start = current_time(1);
+    bench_async_begin();
 
-    for(i = 0; i < genTimes; i++) {
-        wc_ecc_init_ex(&genKey, HEAP_HINT, devId);
-        wc_ecc_make_key(&rng, 32, &genKey);
-        wc_ecc_free(&genKey);
+    /* clear for done cleanup */
+    XMEMSET(&genKey, 0, sizeof(genKey));
+
+    /* ECC Make Key */
+    bench_stats_start(&count, &start);
+    do {
+        /* while free pending slots in queue, submit ops */
+        for (times = 0; times < genTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×, genTimes)) {
+
+                    wc_ecc_free(&genKey[i]);
+                    ret = wc_ecc_init_ex(&genKey[i], HEAP_HINT, doAsync ? devId : INVALID_DEVID);
+                    if (ret < 0) {
+                        goto exit;
+                    }
+
+                    ret = wc_ecc_make_key(&rng, keySize, &genKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×)) {
+                        goto exit;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("ECC", keySize * 8, "key gen", doAsync, count, start);
+
+    if (ret < 0) {
+        printf("bench_eccMakeKey failed: %d\n", ret);
     }
 
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("ECC  256 key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_ecc_free(&genKey[i]);
+    }
+
+    bench_async_end();
+}
+
+void bench_ecc(int doAsync)
+{
+    int ret, i, times, count;
+    const int keySize = BENCH_ECC_SIZE;
+    ecc_key genKey[BENCH_MAX_PENDING];
+#ifdef HAVE_ECC_DHE
+    ecc_key genKey2[BENCH_MAX_PENDING];
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+#ifdef HAVE_ECC_VERIFY
+    int    verify[BENCH_MAX_PENDING];
+#endif
+#endif
+    word32 x[BENCH_MAX_PENDING];
+    double start;
+
+#ifdef HAVE_ECC_DHE
+    DECLARE_ARRAY(shared, byte, BENCH_MAX_PENDING, BENCH_ECC_SIZE, HEAP_HINT);
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+    DECLARE_ARRAY(sig, byte, BENCH_MAX_PENDING, ECC_MAX_SIG_SIZE, HEAP_HINT);
+#endif
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, BENCH_ECC_SIZE, HEAP_HINT);
+
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(&genKey, 0, sizeof(genKey));
+#ifdef HAVE_ECC_DHE
+    XMEMSET(&genKey2, 0, sizeof(genKey2));
+#endif
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        /* setup an context for each key */
+        if ((ret = wc_ecc_init_ex(&genKey[i], HEAP_HINT,
+                                    doAsync ? devId : INVALID_DEVID)) < 0) {
+            goto exit;
+        }
+        ret = wc_ecc_make_key(&rng, keySize, &genKey[i]);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wc_AsyncWait(ret, &genKey[i].asyncDev, WC_ASYNC_FLAG_NONE);
+    #endif
+        if (ret < 0) {
+            goto exit;
+        }
+
+    #ifdef HAVE_ECC_DHE
+        if ((ret = wc_ecc_init_ex(&genKey2[i], HEAP_HINT, INVALID_DEVID)) < 0) {
+            goto exit;
+        }
+        if ((ret = wc_ecc_make_key(&rng, keySize, &genKey2[i])) > 0) {
+            goto exit;
+        }
+    #endif
+    }
+
+#ifdef HAVE_ECC_DHE
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&genKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
+
+    /* ECC Shared Secret */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×, agreeTimes)) {
+                    x[i] = keySize;
+                    ret = wc_ecc_shared_secret(&genKey[i], &genKey2[i], shared[i], &x[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×)) {
+                        goto exit_ecdhe;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_ecdhe:
+    bench_stats_asym_finish("ECDHE", keySize * 8, "agree", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
+    }
+#endif /* HAVE_ECC_DHE */
+
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&genKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
+
+    /* Init digest to sign */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        for (count = 0; count < keySize; count++) {
+            digest[i][count] = (byte)count;
+        }
+    }
+
+    /* ECC Sign */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×, agreeTimes)) {
+                    if (genKey[i].state == 0)
+                        x[i] = ECC_MAX_SIG_SIZE;
+                    ret = wc_ecc_sign_hash(digest[i], keySize, sig[i], &x[i],
+                                                            &rng, &genKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×)) {
+                        goto exit_ecdsa_sign;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_ecdsa_sign:
+    bench_stats_asym_finish("ECDSA", keySize * 8, "sign", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
+    }
+
+#ifdef HAVE_ECC_VERIFY
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&genKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
+
+    /* ECC Verify */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×, agreeTimes)) {
+                    if (genKey[i].state == 0)
+                        verify[i] = 0;
+                    ret = wc_ecc_verify_hash(sig[i], x[i], digest[i],
+                                        keySize, &verify[i], &genKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×)) {
+                        goto exit_ecdsa_verify;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_ecdsa_verify:
+    bench_stats_asym_finish("ECDSA", keySize * 8, "verify", doAsync, count, start);
+#endif /* HAVE_ECC_VERIFY */
+#endif /* !NO_ASN && HAVE_ECC_SIGN */
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_ecc failed: %d\n", ret);
+    }
+
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_ecc_free(&genKey[i]);
+    #ifdef HAVE_ECC_DHE
+        wc_ecc_free(&genKey2[i]);
+    #endif
+    }
+
+#ifdef HAVE_ECC_DHE
+    FREE_ARRAY(shared, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+    FREE_ARRAY(sig, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 
 
-void bench_eccKeyAgree(void)
-{
-    ecc_key genKey, genKey2;
-    double start, total, each, milliEach;
-    int    i, ret;
-    byte   shared[32];
-#if !defined(NO_ASN) && !defined(NO_ECC_SIGN)
-    byte   sig[64+16];  /* der encoding too */
-#endif
-    byte   digest[32];
-    word32 x = 0;
-
-    wc_ecc_init_ex(&genKey, HEAP_HINT, devId);
-    wc_ecc_init_ex(&genKey2, HEAP_HINT, devId);
-
-    ret = wc_ecc_make_key(&rng, 32, &genKey);
-    if (ret != 0) {
-        printf("ecc_make_key failed\n");
-        return;
-    }
-    ret = wc_ecc_make_key(&rng, 32, &genKey2);
-    if (ret != 0) {
-        printf("ecc_make_key failed\n");
-        return;
-    }
-
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(shared);
-        ret = wc_ecc_shared_secret(&genKey, &genKey2, shared, &x);
-        if (ret != 0) {
-            printf("ecc_shared_secret failed\n");
-            return;
-        }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("EC-DHE   key agreement   %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
-
-    /* make dummy digest */
-    for (i = 0; i < (int)sizeof(digest); i++)
-        digest[i] = (byte)i;
-
-
-#if !defined(NO_ASN) && !defined(NO_ECC_SIGN)
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(sig);
-        ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, &rng, &genKey);
-        if (ret != 0) {
-            printf("ecc_sign_hash failed\n");
-            return;
-        }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("EC-DSA   sign   time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
-
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        int verify = 0;
-        ret = wc_ecc_verify_hash(sig, x, digest, sizeof(digest), &verify, &genKey);
-        if (ret != 0) {
-            printf("ecc_verify_hash failed\n");
-            return;
-        }
-    }
-#endif
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;     /* milliseconds */
-    printf("EC-DSA   verify time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
-
-    wc_ecc_free(&genKey2);
-    wc_ecc_free(&genKey);
-}
 #ifdef HAVE_ECC_ENCRYPT
 void bench_eccEncrypt(void)
 {
     ecc_key userA, userB;
+    const int keySize = BENCH_ECC_SIZE;
     byte    msg[48];
     byte    out[80];
     word32  outSz   = sizeof(out);
-    word32  plainSz = sizeof(plain);
-    int     ret, i;
-    double start, total, each, milliEach;
+    word32  bench_plainSz = BENCH_SIZE;
+    int     ret, i, count;
+    double start;
 
     wc_ecc_init_ex(&userA, HEAP_HINT, devId);
     wc_ecc_init_ex(&userB, HEAP_HINT, devId);
 
-    wc_ecc_make_key(&rng, 32, &userA);
-    wc_ecc_make_key(&rng, 32, &userB);
+    wc_ecc_make_key(&rng, keySize, &userA);
+    wc_ecc_make_key(&rng, keySize, &userB);
 
     for (i = 0; i < (int)sizeof(msg); i++)
         msg[i] = i;
 
-    start = current_time(1);
-
-    for(i = 0; i < ntimes; i++) {
-        /* encrypt msg to B */
-        ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
-        if (ret != 0) {
-            printf("wc_ecc_encrypt failed! %d\n", ret);
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < ntimes; i++) {
+            /* encrypt msg to B */
+            ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
+            if (ret != 0) {
+                printf("wc_ecc_encrypt failed! %d\n", ret);
+                goto exit_enc;
+            }
         }
-    }
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_enc:
+    bench_stats_asym_finish("ECC", keySize * 8, "encrypt", 0, count, start);
 
-    total = current_time(0) - start;
-    each  = total / ntimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("ECC      encrypt         %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, ntimes);
-
-    start = current_time(1);
-
-    for(i = 0; i < ntimes; i++) {
-        /* decrypt msg from A */
-        ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, NULL);
-        if (ret != 0) {
-            printf("wc_ecc_decrypt failed! %d\n", ret);
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < ntimes; i++) {
+            /* decrypt msg from A */
+            ret = wc_ecc_decrypt(&userB, &userA, out, outSz, bench_plain, &bench_plainSz, NULL);
+            if (ret != 0) {
+                printf("wc_ecc_decrypt failed! %d\n", ret);
+                goto exit_dec;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / ntimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("ECC      decrypt         %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, ntimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_dec:
+    bench_stats_asym_finish("ECC", keySize * 8, "decrypt", 0, count, start);
 
     /* cleanup */
     wc_ecc_free(&userB);
@@ -2389,31 +3064,27 @@ void bench_eccEncrypt(void)
 void bench_curve25519KeyGen(void)
 {
     curve25519_key genKey;
-    double start, total, each, milliEach;
-    int    i;
+    double start;
+    int    i, count;
 
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < genTimes; i++) {
-        wc_curve25519_make_key(&rng, 32, &genKey);
-        wc_curve25519_free(&genKey);
-    }
-
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("CURVE25519 256 key generation %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    /* Key Gen */
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < genTimes; i++) {
+            wc_curve25519_make_key(&rng, 32, &genKey);
+            wc_curve25519_free(&genKey);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_asym_finish("CURVE", 25519, "key gen", 0, count, start);
 }
 
 #ifdef HAVE_CURVE25519_SHARED_SECRET
 void bench_curve25519KeyAgree(void)
 {
     curve25519_key genKey, genKey2;
-    double start, total, each, milliEach;
-    int    i, ret;
+    double start;
+    int    ret, i, count;
     byte   shared[32];
     word32 x = 0;
 
@@ -2431,23 +3102,21 @@ void bench_curve25519KeyAgree(void)
         return;
     }
 
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(shared);
-        ret = wc_curve25519_shared_secret(&genKey, &genKey2, shared, &x);
-        if (ret != 0) {
-            printf("curve25519_shared_secret failed\n");
-            return;
+    /* Shared secret */
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < agreeTimes; i++) {
+            x = sizeof(shared);
+            ret = wc_curve25519_shared_secret(&genKey, &genKey2, shared, &x);
+            if (ret != 0) {
+                printf("curve25519_shared_secret failed\n");
+                goto exit;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("CURVE25519 key agreement      %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("CURVE", 25519, "key agree", 0, count, start);
 
     wc_curve25519_free(&genKey2);
     wc_curve25519_free(&genKey);
@@ -2459,24 +3128,20 @@ void bench_curve25519KeyAgree(void)
 void bench_ed25519KeyGen(void)
 {
     ed25519_key genKey;
-    double start, total, each, milliEach;
-    int    i;
+    double start;
+    int    i, count;
 
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < genTimes; i++) {
-        wc_ed25519_init(&genKey);
-        wc_ed25519_make_key(&rng, 32, &genKey);
-        wc_ed25519_free(&genKey);
-    }
-
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("ED25519  key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    /* Key Gen */
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < genTimes; i++) {
+            wc_ed25519_init(&genKey);
+            wc_ed25519_make_key(&rng, 32, &genKey);
+            wc_ed25519_free(&genKey);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_asym_finish("ED", 25519, "key gen", 0, count, start);
 }
 
 
@@ -2485,8 +3150,8 @@ void bench_ed25519KeySign(void)
     int    ret;
     ed25519_key genKey;
 #ifdef HAVE_ED25519_SIGN
-    double start, total, each, milliEach;
-    int    i;
+    double start;
+    int    i, count;
     byte   sig[ED25519_SIG_SIZE];
     byte   msg[512];
     word32 x = 0;
@@ -2505,41 +3170,37 @@ void bench_ed25519KeySign(void)
     for (i = 0; i < (int)sizeof(msg); i++)
         msg[i] = (byte)i;
 
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(sig);
-        ret = wc_ed25519_sign_msg(msg, sizeof(msg), sig, &x, &genKey);
-        if (ret != 0) {
-            printf("ed25519_sign_msg failed\n");
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < agreeTimes; i++) {
+            x = sizeof(sig);
+            ret = wc_ed25519_sign_msg(msg, sizeof(msg), sig, &x, &genKey);
+            if (ret != 0) {
+                printf("ed25519_sign_msg failed\n");
+                goto exit_ed_sign;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("ED25519  sign   time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_ed_sign:
+    bench_stats_asym_finish("ED", 25519, "sign", 0, count, start);
 
 #ifdef HAVE_ED25519_VERIFY
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        int verify = 0;
-        ret = wc_ed25519_verify_msg(sig, x, msg, sizeof(msg), &verify,
-                                    &genKey);
-        if (ret != 0 || verify != 1) {
-            printf("ed25519_verify_msg failed\n");
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < agreeTimes; i++) {
+            int verify = 0;
+            ret = wc_ed25519_verify_msg(sig, x, msg, sizeof(msg), &verify,
+                                        &genKey);
+            if (ret != 0 || verify != 1) {
+                printf("ed25519_verify_msg failed\n");
+                goto exit_ed_verify;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;     /* milliseconds */
-    printf("ED25519  verify time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_ed_verify:
+    bench_stats_asym_finish("ED", 25519, "verify", 0, count, start);
 #endif /* HAVE_ED25519_VERIFY */
 #endif /* HAVE_ED25519_SIGN */
 
@@ -2547,7 +3208,7 @@ void bench_ed25519KeySign(void)
 }
 #endif /* HAVE_ED25519 */
 
-
+#ifndef HAVE_STACK_SIZE
 #if defined(_WIN32) && !defined(INTIME_RTOS)
 
     #define WIN32_LEAN_AND_MEAN
@@ -2659,6 +3320,7 @@ void bench_ed25519KeySign(void)
     }
 
 #endif /* _WIN32 */
+#endif /* !HAVE_STACK_SIZE */
 
 #if defined(HAVE_GET_CYCLES)
 
@@ -2675,6 +3337,26 @@ static INLINE word64 get_intel_cycles(void)
 }
 
 #endif /* HAVE_GET_CYCLES */
+
+#ifndef NO_MAIN_DRIVER
+
+int main(int argc, char** argv)
+{
+    int ret = 0;
+
+#ifdef HAVE_STACK_SIZE
+    ret = StackSizeCheck(NULL, benchmark_test);
+#else
+    ret = benchmark_test(NULL);
+#endif
+
+    (void)argc;
+    (void)argv;
+
+    return ret;
+}
+#endif /* !NO_MAIN_DRIVER */
+
 #else
     #ifndef NO_MAIN_DRIVER
         int main() { return 0; }
diff --git a/wolfcrypt/benchmark/benchmark.h b/wolfcrypt/benchmark/benchmark.h
index 20feeb45d..4c67bd12d 100644
--- a/wolfcrypt/benchmark/benchmark.h
+++ b/wolfcrypt/benchmark/benchmark.h
@@ -28,7 +28,11 @@
     extern "C" {
 #endif
 
-int benchmark_test(void* args);
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD benchmark_test(void* args);
+#else
+int benchmark_test(void *args);
+#endif
 
 #ifdef __cplusplus
     }  /* extern "C" */
diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c
old mode 100644
new mode 100755
index 7b8c4b40a..5d41c89c9
--- a/wolfcrypt/src/aes.c
+++ b/wolfcrypt/src/aes.c
@@ -30,176 +30,175 @@
 
 #include 
 
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
-int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
-                          int dir)
-{
-    return AesSetKey_fips(aes, key, len, iv, dir);
-}
+    int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
+                              int dir)
+    {
+        return AesSetKey_fips(aes, key, len, iv, dir);
+    }
+    int wc_AesSetIV(Aes* aes, const byte* iv)
+    {
+        return AesSetIV_fips(aes, iv);
+    }
+    #ifdef HAVE_AES_CBC
+        int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+        {
+            return AesCbcEncrypt_fips(aes, out, in, sz);
+        }
+        #ifdef HAVE_AES_DECRYPT
+            int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+            {
+                return AesCbcDecrypt_fips(aes, out, in, sz);
+            }
+        #endif /* HAVE_AES_DECRYPT */
+    #endif /* HAVE_AES_CBC */
 
+    /* AES-CTR */
+    #ifdef WOLFSSL_AES_COUNTER
+        void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+        {
+            AesCtrEncrypt(aes, out, in, sz);
+        }
+    #endif
 
-int wc_AesSetIV(Aes* aes, const byte* iv)
-{
-    return AesSetIV_fips(aes, iv);
-}
+    /* AES-DIRECT */
+    #if defined(WOLFSSL_AES_DIRECT)
+        void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+        {
+            AesEncryptDirect(aes, out, in);
+        }
 
+        #ifdef HAVE_AES_DECRYPT
+            void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+            {
+                AesDecryptDirect(aes, out, in);
+            }
+        #endif /* HAVE_AES_DECRYPT */
 
-#ifdef HAVE_AES_CBC
-int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
-    return AesCbcEncrypt_fips(aes, out, in, sz);
-}
+        int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
+                                        const byte* iv, int dir)
+        {
+            return AesSetKeyDirect(aes, key, len, iv, dir);
+        }
+    #endif /* WOLFSSL_AES_DIRECT */
 
-#ifdef HAVE_AES_DECRYPT
-int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
-    return AesCbcDecrypt_fips(aes, out, in, sz);
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AES_CBC */
+    /* AES-GCM */
+    #ifdef HAVE_AESGCM
+        int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
+        {
+            return AesGcmSetKey_fips(aes, key, len);
+        }
+        int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+                                      const byte* iv, word32 ivSz,
+                                      byte* authTag, word32 authTagSz,
+                                      const byte* authIn, word32 authInSz)
+        {
+            return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
+                authTagSz, authIn, authInSz);
+        }
 
-/* AES-CTR */
-#ifdef WOLFSSL_AES_COUNTER
-void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
-    AesCtrEncrypt(aes, out, in, sz);
-}
-#endif
+        #ifdef HAVE_AES_DECRYPT
+            int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+                                          const byte* iv, word32 ivSz,
+                                          const byte* authTag, word32 authTagSz,
+                                          const byte* authIn, word32 authInSz)
+            {
+                return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
+                    authTagSz, authIn, authInSz);
+            }
+        #endif /* HAVE_AES_DECRYPT */
 
-/* AES-DIRECT */
-#if defined(WOLFSSL_AES_DIRECT)
-void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
-{
-    AesEncryptDirect(aes, out, in);
-}
+        int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
+        {
+            return GmacSetKey(gmac, key, len);
+        }
+        int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
+                                      const byte* authIn, word32 authInSz,
+                                      byte* authTag, word32 authTagSz)
+        {
+            return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
+                              authTag, authTagSz);
+        }
+    #endif /* HAVE_AESGCM */
 
-#ifdef HAVE_AES_DECRYPT
-void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
-{
-    AesDecryptDirect(aes, out, in);
-}
-#endif /* HAVE_AES_DECRYPT */
+    /* AES-CCM */
+    #ifdef HAVE_AESCCM
+        void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
+        {
+            AesCcmSetKey(aes, key, keySz);
+        }
+        int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+                                      const byte* nonce, word32 nonceSz,
+                                      byte* authTag, word32 authTagSz,
+                                      const byte* authIn, word32 authInSz)
+        {
+            /* sanity check on arguments */
+            if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+                    || authTag == NULL || nonceSz < 7 || nonceSz > 13)
+                return BAD_FUNC_ARG;
 
-int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
-                                const byte* iv, int dir)
-{
-    return AesSetKeyDirect(aes, key, len, iv, dir);
-}
-#endif
+            AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag,
+                authTagSz, authIn, authInSz);
+            return 0;
+        }
 
+        #ifdef HAVE_AES_DECRYPT
+            int  wc_AesCcmDecrypt(Aes* aes, byte* out,
+                const byte* in, word32 inSz,
+                const byte* nonce, word32 nonceSz,
+                const byte* authTag, word32 authTagSz,
+                const byte* authIn, word32 authInSz)
+            {
+                return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz,
+                    authTag, authTagSz, authIn, authInSz);
+            }
+        #endif /* HAVE_AES_DECRYPT */
+    #endif /* HAVE_AESCCM */
 
-#ifdef HAVE_AESGCM
-int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
-{
-    return AesGcmSetKey_fips(aes, key, len);
-}
+    int  wc_AesInit(Aes* aes, void* h, int i)
+    {
+        (void)aes;
+        (void)h;
+        (void)i;
+        /* FIPS doesn't support:
+            return AesInit(aes, h, i); */
+        return 0;
+    }
+    void wc_AesFree(Aes* aes)
+    {
+        (void)aes;
+        /* FIPS doesn't support:
+            AesFree(aes); */
+    }
 
-
-int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
-                              const byte* iv, word32 ivSz,
-                              byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
-                              authIn, authInSz);
-}
-
-#ifdef HAVE_AES_DECRYPT
-int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
-                              const byte* iv, word32 ivSz,
-                              const byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
-                              authIn, authInSz);
-}
-#endif /* HAVE_AES_DECRYPT */
-
-int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
-{
-    return GmacSetKey(gmac, key, len);
-}
-
-
-int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
-                              const byte* authIn, word32 authInSz,
-                              byte* authTag, word32 authTagSz)
-{
-    return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
-                      authTag, authTagSz);
-}
-
-#endif /* HAVE_AESGCM */
-#ifdef HAVE_AESCCM
-int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
-{
-    AesCcmSetKey(aes, key, keySz);
-    return 0;
-}
-
-
-int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
-                              const byte* nonce, word32 nonceSz,
-                              byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    /* sanity check on arguments */
-    if (aes == NULL || out == NULL || in == NULL || nonce == NULL
-            || authTag == NULL || nonceSz < 7 || nonceSz > 13)
-        return BAD_FUNC_ARG;
-
-    AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
-                  authIn, authInSz);
-    return 0;
-}
-
-#ifdef HAVE_AES_DECRYPT
-int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
-                              const byte* nonce, word32 nonceSz,
-                              const byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
-                         authIn, authInSz);
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AESCCM */
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-int  wc_AesAsyncInit(Aes* aes, int i)
-{
-    return AesAsyncInit(aes, i);
-}
-
-void wc_AesAsyncFree(Aes* aes)
-{
-    AesAsyncFree(aes);
-}
-#endif
 #else /* HAVE_FIPS */
 
-#ifdef WOLFSSL_TI_CRYPT
-#include 
+
+#if defined(WOLFSSL_TI_CRYPT)
+    #include 
 #else
 
 #include 
 #include 
+
 #ifdef NO_INLINE
     #include 
 #else
     #define WOLFSSL_MISC_INCLUDED
     #include 
 #endif
+
 #ifdef DEBUG_AESNI
     #include 
 #endif
 
-
 #ifdef _MSC_VER
     /* 4127 warning constant while(1)  */
     #pragma warning(disable: 4127)
 #endif
 
+
 /* Define AES implementation includes and functions */
 #if defined(STM32F2_CRYPTO) || defined(STM32F4_CRYPTO)
      /* STM32F2/F4 hardware AES support for CBC, CTR modes */
@@ -416,13 +415,12 @@ void wc_AesAsyncFree(Aes* aes)
         return ret;
     }
     #endif /* HAVE_AES_DECRYPT */
+
 #elif defined(WOLFSSL_PIC32MZ_CRYPT)
     /* NOTE: no support for AES-CCM/Direct */
     #define DEBUG_WOLFSSL
     #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h"
-#elif defined(HAVE_CAVIUM)
-    /* still leave SW crypto available */
-    #define NEED_AES_TABLES
+
 #elif defined(WOLFSSL_NRF51_AES)
     /* Use built-in AES hardware - AES 128 ECB Encrypt Only */
     #include "wolfssl/wolfcrypt/port/nrf51.h"
@@ -431,9 +429,176 @@ void wc_AesAsyncFree(Aes* aes)
     {
         return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
     }
+
     #ifdef HAVE_AES_DECRYPT
         #error nRF51 AES Hardware does not support decrypt
     #endif /* HAVE_AES_DECRYPT */
+
+
+#elif defined(WOLFSSL_AESNI)
+
+    #define NEED_AES_TABLES
+
+    /* Each platform needs to query info type 1 from cpuid to see if aesni is
+     * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+     */
+
+    #ifndef AESNI_ALIGN
+        #define AESNI_ALIGN 16
+    #endif
+
+    #ifndef _MSC_VER
+        #define cpuid(reg, func)\
+            __asm__ __volatile__ ("cpuid":\
+                 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+                 "a" (func));
+
+        #define XASM_LINK(f) asm(f)
+    #else
+
+        #include 
+        #define cpuid(a,b) __cpuid((int*)a,b)
+
+        #define XASM_LINK(f)
+    #endif /* _MSC_VER */
+
+
+    static int Check_CPU_support_AES(void)
+    {
+        unsigned int reg[4];  /* put a,b,c,d into 0,1,2,3 */
+        cpuid(reg, 1);        /* query info 1 */
+
+        if (reg[2] & 0x2000000)
+            return 1;
+
+        return 0;
+    }
+
+    static int checkAESNI = 0;
+    static int haveAESNI  = 0;
+
+
+    /* tell C compiler these are asm functions in case any mix up of ABI underscore
+       prefix between clang/gcc/llvm etc */
+    #ifdef HAVE_AES_CBC
+        void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
+                             unsigned char* ivec, unsigned long length,
+                             const unsigned char* KS, int nr)
+                             XASM_LINK("AES_CBC_encrypt");
+
+        #ifdef HAVE_AES_DECRYPT
+            #if defined(WOLFSSL_AESNI_BY4)
+                void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
+                                         unsigned char* ivec, unsigned long length,
+                                         const unsigned char* KS, int nr)
+                                         XASM_LINK("AES_CBC_decrypt_by4");
+            #elif defined(WOLFSSL_AESNI_BY6)
+                void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
+                                         unsigned char* ivec, unsigned long length,
+                                         const unsigned char* KS, int nr)
+                                         XASM_LINK("AES_CBC_decrypt_by6");
+            #else /* WOLFSSL_AESNI_BYx */
+                void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
+                                         unsigned char* ivec, unsigned long length,
+                                         const unsigned char* KS, int nr)
+                                         XASM_LINK("AES_CBC_decrypt_by8");
+            #endif /* WOLFSSL_AESNI_BYx */
+        #endif /* HAVE_AES_DECRYPT */
+    #endif /* HAVE_AES_CBC */
+
+    void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
+                         unsigned long length, const unsigned char* KS, int nr)
+                         XASM_LINK("AES_ECB_encrypt");
+
+    #ifdef HAVE_AES_DECRYPT
+        void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
+                             unsigned long length, const unsigned char* KS, int nr)
+                             XASM_LINK("AES_ECB_decrypt");
+    #endif
+
+    void AES_128_Key_Expansion(const unsigned char* userkey,
+                               unsigned char* key_schedule)
+                               XASM_LINK("AES_128_Key_Expansion");
+
+    void AES_192_Key_Expansion(const unsigned char* userkey,
+                               unsigned char* key_schedule)
+                               XASM_LINK("AES_192_Key_Expansion");
+
+    void AES_256_Key_Expansion(const unsigned char* userkey,
+                               unsigned char* key_schedule)
+                               XASM_LINK("AES_256_Key_Expansion");
+
+
+    static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+                                   Aes* aes)
+    {
+        int ret;
+
+        if (!userKey || !aes)
+            return BAD_FUNC_ARG;
+
+        switch (bits) {
+            case 128:
+               AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
+               return 0;
+            case 192:
+               AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
+               return 0;
+            case 256:
+               AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
+               return 0;
+            default:
+                ret = BAD_FUNC_ARG;
+        }
+
+        return ret;
+    }
+
+    #ifdef HAVE_AES_DECRYPT
+        static int AES_set_decrypt_key(const unsigned char* userKey,
+                                                    const int bits, Aes* aes)
+        {
+            int nr;
+            Aes temp_key;
+            __m128i *Key_Schedule = (__m128i*)aes->key;
+            __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
+
+            if (!userKey || !aes)
+                return BAD_FUNC_ARG;
+
+            if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
+                return BAD_FUNC_ARG;
+
+            nr = temp_key.rounds;
+            aes->rounds = nr;
+
+            Key_Schedule[nr] = Temp_Key_Schedule[0];
+            Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
+            Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
+            Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
+            Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
+            Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
+            Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
+            Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
+            Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
+            Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
+
+            if (nr>10) {
+                Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
+                Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
+            }
+
+            if (nr>12) {
+                Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
+                Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
+            }
+
+            Key_Schedule[0] = Temp_Key_Schedule[nr];
+
+            return 0;
+        }
+    #endif /* HAVE_AES_DECRYPT */
+
 #else
 
     /* using wolfCrypt software AES implementation */
@@ -441,6 +606,7 @@ void wc_AesAsyncFree(Aes* aes)
 #endif
 
 
+
 #ifdef NEED_AES_TABLES
 
 static const word32 rcon[] = {
@@ -1027,166 +1193,8 @@ static const byte Td4[256] =
 #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
 
 
-#ifdef WOLFSSL_AESNI
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-
-    #define cpuid(reg, func)\
-        __asm__ __volatile__ ("cpuid":\
-             "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
-             "a" (func));
-
-    #define XASM_LINK(f) asm(f)
-#else
-
-    #include 
-    #define cpuid(a,b) __cpuid((int*)a,b)
-
-    #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-
-static int Check_CPU_support_AES(void)
-{
-    unsigned int reg[4];  /* put a,b,c,d into 0,1,2,3 */
-    cpuid(reg, 1);        /* query info 1 */
-
-    if (reg[2] & 0x2000000)
-        return 1;
-
-    return 0;
-}
-
-static int checkAESNI = 0;
-static int haveAESNI  = 0;
-
-
-/* tell C compiler these are asm functions in case any mix up of ABI underscore
-   prefix between clang/gcc/llvm etc */
-#ifdef HAVE_AES_CBC
-void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
-                     unsigned char* ivec, unsigned long length,
-                     const unsigned char* KS, int nr)
-                     XASM_LINK("AES_CBC_encrypt");
-
-#ifdef HAVE_AES_DECRYPT
-    #if defined(WOLFSSL_AESNI_BY4)
-    void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
-                             unsigned char* ivec, unsigned long length,
-                             const unsigned char* KS, int nr)
-                             XASM_LINK("AES_CBC_decrypt_by4");
-    #elif defined(WOLFSSL_AESNI_BY6)
-    void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
-                             unsigned char* ivec, unsigned long length,
-                             const unsigned char* KS, int nr)
-                             XASM_LINK("AES_CBC_decrypt_by6");
-    #else /* WOLFSSL_AESNI_BYx */
-    void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
-                             unsigned char* ivec, unsigned long length,
-                             const unsigned char* KS, int nr)
-                             XASM_LINK("AES_CBC_decrypt_by8");
-    #endif /* WOLFSSL_AESNI_BYx */
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AES_CBC */
-
-void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
-                     unsigned long length, const unsigned char* KS, int nr)
-                     XASM_LINK("AES_ECB_encrypt");
-
-#ifdef HAVE_AES_DECRYPT
-void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
-                     unsigned long length, const unsigned char* KS, int nr)
-                     XASM_LINK("AES_ECB_decrypt");
-#endif
-
-void AES_128_Key_Expansion(const unsigned char* userkey,
-                           unsigned char* key_schedule)
-                           XASM_LINK("AES_128_Key_Expansion");
-
-void AES_192_Key_Expansion(const unsigned char* userkey,
-                           unsigned char* key_schedule)
-                           XASM_LINK("AES_192_Key_Expansion");
-
-void AES_256_Key_Expansion(const unsigned char* userkey,
-                           unsigned char* key_schedule)
-                           XASM_LINK("AES_256_Key_Expansion");
-
-
-static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-                               Aes* aes)
-{
-    if (!userKey || !aes)
-        return BAD_FUNC_ARG;
-
-    if (bits == 128) {
-       AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
-       return 0;
-    }
-    else if (bits == 192) {
-       AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
-       return 0;
-    }
-    else if (bits == 256) {
-       AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
-       return 0;
-    }
-    return BAD_FUNC_ARG;
-}
-
-#ifdef HAVE_AES_DECRYPT
-static int AES_set_decrypt_key(const unsigned char* userKey, const int bits,
-                               Aes* aes)
-{
-    int nr;
-    Aes temp_key;
-    __m128i *Key_Schedule = (__m128i*)aes->key;
-    __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
-
-    if (!userKey || !aes)
-        return BAD_FUNC_ARG;
-
-    if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
-        return BAD_FUNC_ARG;
-
-    nr = temp_key.rounds;
-    aes->rounds = nr;
-
-    Key_Schedule[nr] = Temp_Key_Schedule[0];
-    Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
-    Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
-    Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
-    Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
-    Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
-    Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
-    Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
-    Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
-    Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
-
-    if(nr>10) {
-        Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
-        Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
-    }
-
-    if(nr>12) {
-        Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
-        Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
-    }
-
-    Key_Schedule[0] = Temp_Key_Schedule[nr];
-
-    return 0;
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* WOLFSSL_AESNI */
-
-#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) ||\
-    defined(HAVE_AESGCM)
 
+#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM)
 
 #ifndef WC_CACHE_LINE_SZ
     #if defined(__x86_64__) || defined(_M_X64) || \
@@ -1220,12 +1228,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     word32 s0, s1, s2, s3;
     word32 t0, t1, t2, t3;
     word32 r = aes->rounds >> 1;
-
     const word32* rk = aes->key;
+
     if (r > 7 || r == 0) {
         WOLFSSL_MSG("AesEncrypt encountered improper key, set it up");
         return;  /* stop instead of segfaulting, set up your keys! */
     }
+
 #ifdef WOLFSSL_AESNI
     if (haveAESNI && aes->use_aesni) {
         #ifdef DEBUG_AESNI
@@ -1238,16 +1247,19 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         #endif
 
         /* check alignment, decrypt doesn't need alignment */
-        if ((wolfssl_word)inBlock % 16) {
+        if ((wolfssl_word)inBlock % AESNI_ALIGN) {
         #ifndef NO_WOLFSSL_ALLOC_ALIGN
             byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE, aes->heap,
                                                       DYNAMIC_TYPE_TMP_BUFFER);
+            byte* tmp_align;
             if (tmp == NULL) return;
 
-            XMEMCPY(tmp, inBlock, AES_BLOCK_SIZE);
-            AES_ECB_encrypt(tmp, tmp, AES_BLOCK_SIZE, (byte*)aes->key,
+            tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
+
+            XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
+            AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key,
                             aes->rounds);
-            XMEMCPY(outBlock, tmp, AES_BLOCK_SIZE);
+            XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
             XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return;
         #else
@@ -1277,12 +1289,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
 
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     s0 ^= rk[0];
     s1 ^= rk[1];
@@ -1383,12 +1395,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         rk[3];
 
     /* write out */
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     XMEMCPY(outBlock,                  &s0, sizeof(s0));
     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
@@ -1398,6 +1410,7 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 }
 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */
 
+
 #ifdef HAVE_AES_DECRYPT
 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT)
 
@@ -1416,7 +1429,6 @@ static INLINE word32 PreFetchTd(void)
     return x;
 }
 
-
 /* load Td Table4 into cache by cache line stride */
 static INLINE word32 PreFetchTd4(void)
 {
@@ -1429,7 +1441,6 @@ static INLINE word32 PreFetchTd4(void)
     return x;
 }
 
-
 static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 {
     word32 s0, s1, s2, s3;
@@ -1463,7 +1474,7 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
             printf("Skipping AES-NI\n");
         #endif
     }
-#endif
+#endif /* WOLFSSL_AESNI */
 
     /*
      * map byte array block to cipher state
@@ -1474,12 +1485,12 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
 
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     s0 ^= rk[0];
     s1 ^= rk[1];
@@ -1581,12 +1592,12 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         rk[3];
 
     /* write out */
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     XMEMCPY(outBlock,                  &s0, sizeof(s0));
     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
@@ -1598,10 +1609,11 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 #endif /* NEED_AES_TABLES */
 
 
+
 /* wc_AesSetKey */
 #if defined(STM32F2_CRYPTO) || defined(STM32F4_CRYPTO)
 
-int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             const byte* iv, int dir)
     {
         word32 *rk = aes->key;
@@ -1611,6 +1623,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
             return BAD_FUNC_ARG;
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         XMEMCPY(rk, userKey, keylen);
     #ifndef WOLFSSL_STM32_CUBEMX
@@ -1647,28 +1660,28 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
 
     extern volatile unsigned char __MBAR[];
 
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
         if (AESBuffIn == NULL) {
-            #if defined (HAVE_THREADX)
-                int s1, s2, s3, s4, s5 ;
-                s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
-                                      sizeof(SECdescriptorType), TX_NO_WAIT);
-                s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn,
-                                      AES_BUFFER_SIZE, TX_NO_WAIT);
-                s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut,
-                                      AES_BUFFER_SIZE, TX_NO_WAIT);
-                s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey,
-                                      AES_BLOCK_SIZE*2, TX_NO_WAIT);
-                s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg,
-                                      AES_BLOCK_SIZE, TX_NO_WAIT);
+        #if defined (HAVE_THREADX)
+            int s1, s2, s3, s4, s5;
+            s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+                                  sizeof(SECdescriptorType), TX_NO_WAIT);
+            s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn,
+                                  AES_BUFFER_SIZE, TX_NO_WAIT);
+            s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut,
+                                  AES_BUFFER_SIZE, TX_NO_WAIT);
+            s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey,
+                                  AES_BLOCK_SIZE*2, TX_NO_WAIT);
+            s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg,
+                                  AES_BLOCK_SIZE, TX_NO_WAIT);
 
-                if(s1 || s2 || s3 || s4 || s5)
-                    return BAD_FUNC_ARG;
-            #else
-                #warning "Allocate non-Cache buffers"
-            #endif
+            if (s1 || s2 || s3 || s4 || s5)
+                return BAD_FUNC_ARG;
+        #else
+            #warning "Allocate non-Cache buffers"
+        #endif
 
             wc_InitMutex(&Mutex_AesSEC);
         }
@@ -1679,6 +1692,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         if (aes == NULL)
             return BAD_FUNC_ARG;
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         XMEMCPY(aes->key, userKey, keylen);
 
@@ -1710,8 +1724,8 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
 #elif defined(FREESCALE_MMCAU)
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
         int ret;
         byte *rk = (byte*)aes->key;
@@ -1728,6 +1742,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             aes->left = 0;
         #endif /* WOLFSSL_AES_COUNTER */
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
 
         ret = wolfSSL_CryptHwMutexLock();
@@ -1746,9 +1761,10 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
     {
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
+
 #elif defined(WOLFSSL_NRF51_AES)
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
         int ret;
 
@@ -1758,6 +1774,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         if (keylen != 16)
             return BAD_FUNC_ARG;
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         ret = nrf51_aes_set_key(userKey);
 
@@ -1769,6 +1786,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
     {
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
+
 #else
     static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
                 const byte* iv, int dir)
@@ -1776,25 +1794,25 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         word32 temp, *rk = aes->key;
         unsigned int i = 0;
 
-        #ifdef WOLFSSL_AESNI
-            aes->use_aesni = 0;
-        #endif /* WOLFSSL_AESNI */
-        #ifdef WOLFSSL_AES_COUNTER
-            aes->left = 0;
-        #endif /* WOLFSSL_AES_COUNTER */
+    #ifdef WOLFSSL_AESNI
+        aes->use_aesni = 0;
+    #endif /* WOLFSSL_AESNI */
+    #ifdef WOLFSSL_AES_COUNTER
+        aes->left = 0;
+    #endif /* WOLFSSL_AES_COUNTER */
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
 
         XMEMCPY(rk, userKey, keylen);
-        #ifdef LITTLE_ENDIAN_ORDER
-            ByteReverseWords(rk, rk, keylen);
-        #endif
+    #ifdef LITTLE_ENDIAN_ORDER
+        ByteReverseWords(rk, rk, keylen);
+    #endif
 
         #ifdef WOLFSSL_PIC32MZ_CRYPT
         {
             word32 *akey1 = aes->key_ce;
-            word32 *areg = aes->iv_ce ;
-            aes->keylen = keylen ;
+            word32 *areg = aes->iv_ce;
             XMEMCPY(akey1, userKey, keylen);
             if (iv)
                 XMEMCPY(areg, iv, AES_BLOCK_SIZE);
@@ -1931,17 +1949,17 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         return wc_AesSetIV(aes, iv);
     }
 
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
     #if defined(AES_MAX_KEY_SIZE)
         const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
     #endif
 
-        if (aes == NULL)
-            return BAD_FUNC_ARG;
-        if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
+        if (aes == NULL ||
+                !((keylen == 16) || (keylen == 24) || (keylen == 32))) {
             return BAD_FUNC_ARG;
+        }
 
     #if defined(AES_MAX_KEY_SIZE)
         /* Check key length */
@@ -1949,12 +1967,15 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             return BAD_FUNC_ARG;
         }
     #endif
+        aes->keylen = keylen;
+        aes->rounds = keylen/4 + 6;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) {
-            return NitroxAesSetKey(aes, userKey, keylen, iv);
+            aes->asyncKey = userKey;
+            aes->asyncIv = iv;
         }
-    #endif
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
     #ifdef WOLFSSL_AESNI
         if (checkAESNI == 0) {
@@ -1981,14 +2002,12 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
     }
 
     #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
-
-    /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
-    int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
-                        const byte* iv, int dir)
-    {
-        return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
-    }
-
+        /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
+        int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+                            const byte* iv, int dir)
+        {
+            return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
+        }
     #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
 #endif /* wc_AesSetKey block */
 
@@ -2007,21 +2026,6 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
     return 0;
 }
 
-
-/* set the heap hint for aes struct */
-int wc_InitAes_h(Aes* aes, void* h)
-{
-    if (aes == NULL)
-        return BAD_FUNC_ARG;
-
-    aes->heap = h;
-
-    return 0;
-}
-
-
-
-
 /* AES-DIRECT */
 #if defined(WOLFSSL_AES_DIRECT)
     #if defined(HAVE_COLDFIRE_SEC)
@@ -2404,7 +2408,7 @@ int wc_InitAes_h(Aes* aes, void* h)
 
 #elif defined(HAVE_COLDFIRE_SEC)
     static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz,
-                           word32 descHeader)
+        word32 descHeader)
     {
         #ifdef DEBUG_WOLFSSL
             int i; int stat1, stat2; int ret;
@@ -2426,9 +2430,9 @@ int wc_InitAes_h(Aes* aes, void* h)
         secDesc->pointer2 = (byte *)secReg; /* Initial Vector */
 
         switch(aes->rounds) {
-            case 10: secDesc->length3 = 16 ; break ;
-            case 12: secDesc->length3 = 24 ; break ;
-            case 14: secDesc->length3 = 32 ; break ;
+            case 10: secDesc->length3 = 16; break;
+            case 12: secDesc->length3 = 24; break;
+            case 14: secDesc->length3 = 32; break;
         }
         XMEMCPY(secKey, aes->key, secDesc->length3);
 
@@ -2613,21 +2617,22 @@ int wc_InitAes_h(Aes* aes, void* h)
         return 0;
     }
     #endif /* HAVE_AES_DECRYPT */
+
 #elif defined(WOLFSSL_PIC32MZ_CRYPT)
     /* core hardware crypt engine driver */
     static void wc_AesCrypt(Aes *aes, byte* out, const byte* in, word32 sz,
                                             int dir, int algo, int cryptoalgo)
     {
-        securityAssociation *sa_p ;
-        bufferDescriptor *bd_p ;
+        securityAssociation *sa_p;
+        bufferDescriptor *bd_p;
 
         volatile securityAssociation sa __attribute__((aligned (8)));
         volatile bufferDescriptor bd __attribute__((aligned (8)));
-        volatile int k ;
+        volatile int k;
 
         /* get uncached address */
-        sa_p = KVA0_TO_KVA1(&sa) ;
-        bd_p = KVA0_TO_KVA1(&bd) ;
+        sa_p = KVA0_TO_KVA1(&sa);
+        bd_p = KVA0_TO_KVA1(&bd);
 
         /* Sync cache and physical memory */
         if(PIC32MZ_IF_RAM(in)) {
@@ -2636,27 +2641,27 @@ int wc_InitAes_h(Aes* aes, void* h)
         XMEMSET((void *)KVA0_TO_KVA1(out), 0, sz);
         /* Set up the Security Association */
         XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
-        sa_p->SA_CTRL.ALGO = algo ; /* AES */
+        sa_p->SA_CTRL.ALGO = algo; /* AES */
         sa_p->SA_CTRL.LNC = 1;
         sa_p->SA_CTRL.LOADIV = 1;
         sa_p->SA_CTRL.FB = 1;
-        sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
+        sa_p->SA_CTRL.ENCTYPE = dir; /* Encryption/Decryption */
         sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
 
         if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM){
             switch(aes->keylen) {
             case 32:
-                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256 ;
-                break ;
+                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256;
+                break;
             case 24:
-                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192 ;
-                break ;
+                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192;
+                break;
             case 16:
-                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
-                break ;
+                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128;
+                break;
             }
         } else
-            sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
+            sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128;
 
         ByteReverseWords(
         (word32 *)KVA0_TO_KVA1(sa.SA_ENCKEY + 8 - aes->keylen/sizeof(word32)),
@@ -2669,27 +2674,27 @@ int wc_InitAes_h(Aes* aes, void* h)
         bd_p->BD_CTRL.BUFLEN = sz;
         if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM) {
             if(sz % 0x10)
-                bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10 ;
+                bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10;
         }
         bd_p->BD_CTRL.LIFM = 1;
         bd_p->BD_CTRL.SA_FETCH_EN = 1;
         bd_p->BD_CTRL.LAST_BD = 1;
         bd_p->BD_CTRL.DESC_EN = 1;
 
-        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ;
-        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ;
+        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa);
+        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in);
         bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out);
-        bd_p->MSGLEN = sz ;
+        bd_p->MSGLEN = sz;
 
         CECON = 1 << 6;
         while (CECON);
 
         /* Run the engine */
-        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ;
+        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd);
         CEINTEN = 0x07;
         CECON = 0x27;
 
-        WAIT_ENGINE ;
+        WAIT_ENGINE;
 
         if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
            (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
@@ -2698,14 +2703,14 @@ int wc_InitAes_h(Aes* aes, void* h)
             if(dir == PIC32_ENCRYPTION) {
                 XMEMCPY((void *)aes->iv_ce,
                         (void*)KVA0_TO_KVA1(out + sz - AES_BLOCK_SIZE),
-                        AES_BLOCK_SIZE) ;
+                        AES_BLOCK_SIZE);
             } else {
                 ByteReverseWords((word32*)aes->iv_ce,
                         (word32 *)KVA0_TO_KVA1(in + sz - AES_BLOCK_SIZE),
                         AES_BLOCK_SIZE);
             }
         }
-        XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz) ;
+        XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz);
         ByteReverseWords((word32*)out, (word32 *)out, sz);
     }
 
@@ -2713,14 +2718,14 @@ int wc_InitAes_h(Aes* aes, void* h)
     {
         wc_AesCrypt(aes, out, in, sz, PIC32_ENCRYPTION, PIC32_ALGO_AES,
                                                       PIC32_CRYPTOALGO_RCBC );
-        return 0 ;
+        return 0;
     }
     #ifdef HAVE_AES_DECRYPT
     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
         wc_AesCrypt(aes, out, in, sz, PIC32_DECRYPTION, PIC32_ALGO_AES,
                                                       PIC32_CRYPTOALGO_RCBC);
-        return 0 ;
+        return 0;
     }
     #endif /* HAVE_AES_DECRYPT */
 
@@ -2729,10 +2734,28 @@ int wc_InitAes_h(Aes* aes, void* h)
     {
         word32 blocks = sz / AES_BLOCK_SIZE;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        /* if async and byte count above threshold */
+        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_CBC) {
+        #if defined(HAVE_CAVIUM)
             return NitroxAesCbcEncrypt(aes, out, in, sz);
-    #endif
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz,
+                aes->asyncKey, aes->keylen, aes->asyncIv, AES_BLOCK_SIZE);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_AES_CBC_ENCRYPT;
+                testDev->aes.aes = aes;
+                testDev->aes.out = out;
+                testDev->aes.in = in;
+                testDev->aes.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
     #ifdef WOLFSSL_AESNI
         if (haveAESNI) {
@@ -2747,22 +2770,25 @@ int wc_InitAes_h(Aes* aes, void* h)
             #endif
 
             /* check alignment, decrypt doesn't need alignment */
-            if ((wolfssl_word)in % 16) {
+            if ((wolfssl_word)in % AESNI_ALIGN) {
             #ifndef NO_WOLFSSL_ALLOC_ALIGN
-                byte* tmp = (byte*)XMALLOC(sz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
-                WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
+                byte* tmp = (byte*)XMALLOC(sz + AESNI_ALIGN, aes->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+                byte* tmp_align;
                 if (tmp == NULL) return MEMORY_E;
 
-                XMEMCPY(tmp, in, sz);
-                AES_CBC_encrypt(tmp, tmp, (byte*)aes->reg, sz, (byte*)aes->key,
+                tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
+                XMEMCPY(tmp_align, in, sz);
+                AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz, (byte*)aes->key,
                             aes->rounds);
                 /* store iv for next call */
-                XMEMCPY(aes->reg, tmp + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+                XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
 
-                XMEMCPY(out, tmp, sz);
+                XMEMCPY(out, tmp_align, sz);
                 XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
                 return 0;
             #else
+                WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
                 return BAD_ALIGN_E;
             #endif
             }
@@ -2791,11 +2817,28 @@ int wc_InitAes_h(Aes* aes, void* h)
     #ifdef HAVE_AES_DECRYPT
     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
-        word32 blocks = sz / AES_BLOCK_SIZE;
+        word32 blocks;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) {
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        /* if async and byte count above threshold */
+        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_CBC) {
+        #if defined(HAVE_CAVIUM)
             return NitroxAesCbcDecrypt(aes, out, in, sz);
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz,
+                aes->asyncKey, aes->keylen, aes->asyncIv, AES_BLOCK_SIZE);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_AES_CBC_DECRYPT;
+                testDev->aes.aes = aes;
+                testDev->aes.out = out;
+                testDev->aes.in = in;
+                testDev->aes.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
         }
     #endif
 
@@ -2829,6 +2872,7 @@ int wc_InitAes_h(Aes* aes, void* h)
         }
     #endif
 
+        blocks = sz / AES_BLOCK_SIZE;
         while (blocks--) {
             XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE);
             wc_AesDecrypt(aes, (byte*)aes->tmp, out);
@@ -3016,58 +3060,58 @@ int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     #elif defined(WOLFSSL_PIC32MZ_CRYPT)
         void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
         {
-            int i ;
-            char out_block[AES_BLOCK_SIZE] ;
-            int odd ;
-            int even ;
-            char *tmp ; /* (char *)aes->tmp, for short */
+            int i;
+            char out_block[AES_BLOCK_SIZE];
+            int odd;
+            int even;
+            char *tmp; /* (char *)aes->tmp, for short */
 
-            tmp = (char *)aes->tmp ;
+            tmp = (char *)aes->tmp;
             if(aes->left) {
                 if((aes->left + sz) >= AES_BLOCK_SIZE){
-                    odd = AES_BLOCK_SIZE - aes->left ;
+                    odd = AES_BLOCK_SIZE - aes->left;
                 } else {
-                    odd = sz ;
+                    odd = sz;
                 }
-                XMEMCPY(tmp+aes->left, in, odd) ;
+                XMEMCPY(tmp+aes->left, in, odd);
                 if((odd+aes->left) == AES_BLOCK_SIZE){
                     wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
                         PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
-                    XMEMCPY(out, out_block+aes->left, odd) ;
-                    aes->left = 0 ;
-                    XMEMSET(tmp, 0x0, AES_BLOCK_SIZE) ;
+                    XMEMCPY(out, out_block+aes->left, odd);
+                    aes->left = 0;
+                    XMEMSET(tmp, 0x0, AES_BLOCK_SIZE);
                     /* Increment IV */
                     for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
                         if (++((byte *)aes->iv_ce)[i])
-                            break ;
+                            break;
                     }
                 }
-                in += odd ;
-                out+= odd ;
-                sz -= odd ;
+                in += odd;
+                out+= odd;
+                sz -= odd;
             }
-            odd = sz % AES_BLOCK_SIZE ;  /* if there is tail fragment */
+            odd = sz % AES_BLOCK_SIZE;  /* if there is tail fragment */
             if(sz / AES_BLOCK_SIZE) {
-                even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE ;
+                even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE;
                 wc_AesCrypt(aes, out, in, even, PIC32_ENCRYPTION, PIC32_ALGO_AES,
                                                         PIC32_CRYPTOALGO_RCTR);
-                out += even ;
-                in  += even ;
+                out += even;
+                in  += even;
                 do {  /* Increment IV */
                     for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
                         if (++((byte *)aes->iv_ce)[i])
-                            break ;
+                            break;
                     }
-                    even -= AES_BLOCK_SIZE ;
-                } while((int)even > 0) ;
+                    even -= AES_BLOCK_SIZE;
+                } while((int)even > 0);
             }
             if(odd) {
-                XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ;
-                XMEMCPY(tmp+aes->left, in, odd) ;
+                XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left);
+                XMEMCPY(tmp+aes->left, in, odd);
                 wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
                         PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
-                XMEMCPY(out, out_block+aes->left,odd) ;
-                aes->left += odd ;
+                XMEMCPY(out, out_block+aes->left,odd);
+                aes->left += odd;
             }
         }
 
@@ -3155,6 +3199,7 @@ int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
 
 #endif /* WOLFSSL_AES_COUNTER */
 
+
 #ifdef HAVE_AESGCM
 
 /*
@@ -3751,8 +3796,8 @@ static void GMULT(byte* X, byte* Y)
 }
 
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     byte x[AES_BLOCK_SIZE];
     byte scratch[AES_BLOCK_SIZE];
@@ -3900,8 +3945,8 @@ static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE])
 }
 
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     byte x[AES_BLOCK_SIZE];
     byte scratch[AES_BLOCK_SIZE];
@@ -3960,9 +4005,9 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
 static void GMULT(word64* X, word64* Y)
 {
     word64 Z[2] = {0,0};
-    word64 V[2] ;
+    word64 V[2];
     int i, j;
-    V[0] = X[0] ;  V[1] = X[1] ;
+    V[0] = X[0];  V[1] = X[1];
 
     for (i = 0; i < 2; i++)
     {
@@ -3976,13 +4021,15 @@ static void GMULT(word64* X, word64* Y)
 
             if (V[1] & 0x0000000000000001) {
                 V[1] >>= 1;
-                V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0);
+                V[1] |= ((V[0] & 0x0000000000000001) ?
+                    0x8000000000000000ULL : 0);
                 V[0] >>= 1;
                 V[0] ^= 0xE100000000000000ULL;
             }
             else {
                 V[1] >>= 1;
-                V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0);
+                V[1] |= ((V[0] & 0x0000000000000001) ?
+                    0x8000000000000000ULL : 0);
                 V[0] >>= 1;
             }
             y <<= 1;
@@ -3992,8 +4039,9 @@ static void GMULT(word64* X, word64* Y)
     X[1] = Z[1];
 }
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     word64 x[2] = {0,0};
     word32 blocks, partial;
@@ -4060,8 +4108,8 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
 
     /* Hash in the lengths in bits of A and C */
     {
-        word64 len[2] ;
-        len[0] = aSz ; len[1] = cSz;
+        word64 len[2];
+        len[0] = aSz; len[1] = cSz;
 
         /* Lengths are in bytes. Convert to bits. */
         len[0] *= 8;
@@ -4084,7 +4132,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
 static void GMULT(word32* X, word32* Y)
 {
     word32 Z[4] = {0,0,0,0};
-    word32 V[4] ;
+    word32 V[4];
     int i, j;
 
     V[0] = X[0];  V[1] = X[1]; V[2] =  X[2]; V[3] =  X[3];
@@ -4129,8 +4177,8 @@ static void GMULT(word32* X, word32* Y)
 }
 
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     word32 x[4] = {0,0,0,0};
     word32 blocks, partial;
@@ -4263,7 +4311,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
     byte* c = out;
     byte counter[AES_BLOCK_SIZE];
     byte initialCounter[AES_BLOCK_SIZE];
-    byte *ctr ;
+    byte *ctr;
     byte scratch[AES_BLOCK_SIZE];
 
     /* Sanity check for XMEMCPY in GHASH function and local xorbuf call */
@@ -4275,6 +4323,35 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         return BAD_FUNC_ARG;
     }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    /* if async and byte count above threshold */
+    if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_GCM) {
+    #if defined(HAVE_CAVIUM)
+        /* Not yet supported, contact wolfSSL if interested in using */
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz,
+            aes->asyncKey, aes->keylen, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+    #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+        WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_AES_GCM_ENCRYPT;
+            testDev->aes.aes = aes;
+            testDev->aes.out = out;
+            testDev->aes.in = in;
+            testDev->aes.sz = sz;
+            testDev->aes.iv = iv;
+            testDev->aes.ivSz = ivSz;
+            testDev->aes.authTag = authTag;
+            testDev->aes.authTagSz = authTagSz;
+            testDev->aes.authIn = authIn;
+            testDev->aes.authInSz = authInSz;
+        }
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
 #ifdef WOLFSSL_AESNI
     if (haveAESNI) {
         AES_GCM_encrypt(in, out, authIn, iv, authTag,
@@ -4284,9 +4361,9 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 #endif
 
 #ifdef WOLFSSL_PIC32MZ_CRYPT
-    ctr = (char *)aes->iv_ce ;
+    ctr = (char *)aes->iv_ce;
 #else
-    ctr = counter ;
+    ctr = counter;
 #endif
 
     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
@@ -4363,12 +4440,44 @@ int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
     byte* p = out;
     byte counter[AES_BLOCK_SIZE];
     byte initialCounter[AES_BLOCK_SIZE];
-    byte *ctr ;
+    byte *ctr;
     byte scratch[AES_BLOCK_SIZE];
 
-    /* Sanity check for local ConstantCompare call */
-    if (authTagSz > AES_BLOCK_SIZE)
+    /* argument checks */
+    if (aes == NULL || out == NULL || in == NULL || sz == 0 || iv == NULL ||
+        authTag == NULL || authIn == NULL || authTagSz > AES_BLOCK_SIZE) {
         return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    /* if async and byte count above threshold */
+    if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_GCM) {
+    #if defined(HAVE_CAVIUM)
+        /* Not yet supported, contact wolfSSL if interested in using */
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz,
+            aes->asyncKey, aes->keylen, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+    #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+        WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_AES_GCM_DECRYPT;
+            testDev->aes.aes = aes;
+            testDev->aes.out = out;
+            testDev->aes.in = in;
+            testDev->aes.sz = sz;
+            testDev->aes.iv = iv;
+            testDev->aes.ivSz = ivSz;
+            testDev->aes.authTag = (byte*)authTag;
+            testDev->aes.authTagSz = authTagSz;
+            testDev->aes.authIn = authIn;
+            testDev->aes.authInSz = authInSz;
+            return WC_PENDING_E;
+        }
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef WOLFSSL_AESNI
     if (haveAESNI) {
@@ -4380,9 +4489,9 @@ int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 #endif
 
 #ifdef WOLFSSL_PIC32MZ_CRYPT
-    ctr = (char *)aes->iv_ce ;
+    ctr = (char *)aes->iv_ce;
 #else
-    ctr = counter ;
+    ctr = counter;
 #endif
 
     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
@@ -4926,28 +5035,36 @@ int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz,
 #endif /* HAVE_AES_KEYWRAP */
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Aes for use with Nitrox device */
-int wc_AesAsyncInit(Aes* aes, int devId)
+/* Initialize Aes for use with async hardware */
+int wc_AesInit(Aes* aes, void* heap, int devId)
 {
+    int ret = 0;
+
     if (aes == NULL)
         return BAD_FUNC_ARG;
 
-    return wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES, devId);
+    aes->heap = heap;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES,
+                                                        aes->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
 }
 
-
-/* Free Aes from use with Nitrox device */
-void wc_AesAsyncFree(Aes* aes)
+/* Free Aes from use with async hardware */
+void wc_AesFree(Aes* aes)
 {
     if (aes == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&aes->asyncDev);
-}
-
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
 
 int wc_AesGetKeySize(Aes* aes, word32* keySize)
diff --git a/wolfcrypt/src/arc4.c b/wolfcrypt/src/arc4.c
index 6922089de..160c36a91 100644
--- a/wolfcrypt/src/arc4.c
+++ b/wolfcrypt/src/arc4.c
@@ -32,12 +32,14 @@
 #include 
 
 
-void wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
+int wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
 {
+    int ret = 0;
     word32 i;
     word32 keyIndex = 0, stateIndex = 0;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+        defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
     if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) {
         return NitroxArc4SetKey(arc4, key, length);
     }
@@ -59,6 +61,8 @@ void wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
         if (++keyIndex >= length)
             keyIndex = 0;
     }
+
+    return ret;
 }
 
 
@@ -76,12 +80,14 @@ static INLINE byte MakeByte(word32* x, word32* y, byte* s)
 }
 
 
-void wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
+int wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
 {
+    int ret = 0;
     word32 x;
     word32 y;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+        defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
     if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) {
         return NitroxArc4Process(arc4, out, in, length);
     }
@@ -95,31 +101,41 @@ void wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
 
     arc4->x = (byte)x;
     arc4->y = (byte)y;
+
+    return ret;
 }
 
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Arc4 for use with Nitrox device */
-int wc_Arc4AsyncInit(Arc4* arc4, int devId)
+/* Initialize Arc4 for use with async device */
+int wc_Arc4Init(Arc4* arc4, void* heap, int devId)
 {
+    int ret = 0;
+
     if (arc4 == NULL)
         return BAD_FUNC_ARG;
 
-    return wolfAsync_DevCtxInit(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4, devId);
+    arc4->heap = heap;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+    ret = wolfAsync_DevCtxInit(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4,
+        arc4->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
 }
 
 
-/* Free Arc4 from use with Nitrox device */
-void wc_Arc4AsyncFree(Arc4* arc4)
+/* Free Arc4 from use with async device */
+void wc_Arc4Free(Arc4* arc4)
 {
     if (arc4 == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&arc4->asyncDev);
-}
-
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+    wolfAsync_DevCtxFree(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
 #endif /* NO_RC4 */
 
diff --git a/wolfcrypt/src/asn.c b/wolfcrypt/src/asn.c
index d68a4220b..1b287cc57 100644
--- a/wolfcrypt/src/asn.c
+++ b/wolfcrypt/src/asn.c
@@ -81,6 +81,10 @@ ASN Options:
     #include 
 #endif
 
+#ifndef NO_RSA
+    #include 
+#endif
+
 #ifdef WOLFSSL_DEBUG_ENCODING
     #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
         #if MQX_USE_IO_OLD
@@ -547,54 +551,316 @@ WOLFSSL_LOCAL int GetLength(const byte* input, word32* inOutIdx, int* len,
 }
 
 
-WOLFSSL_LOCAL int GetSequence(const byte* input, word32* inOutIdx, int* len,
-                           word32 maxIdx)
+/* Get the DER/BER encoding of an ASN.1 header.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * tag       ASN.1 tag value expected in header.
+ * inOutIdx  Current index into buffer to parse.
+ * len       The number of bytes in the ASN.1 data.
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_PARSE_E when the expected tag is not found or length is invalid.
+ *         Otherwise, the number of bytes in the ASN.1 data.
+ */
+static int GetASNHeader(const byte* input, byte tag, word32* inOutIdx, int* len,
+                        word32 maxIdx)
 {
-    int    length = -1;
-    word32 idx    = *inOutIdx;
+    word32 idx = *inOutIdx;
+    byte   b;
+    int    length;
 
     if ((idx + 1) > maxIdx)
         return BUFFER_E;
 
-    if (input[idx++] != (ASN_SEQUENCE | ASN_CONSTRUCTED) ||
-            GetLength(input, &idx, &length, maxIdx) < 0) {
+    b = input[idx++];
+    if (b != tag)
         return ASN_PARSE_E;
-    }
 
-    /* make sure length exists in buffer */
-    if ((idx + length) > maxIdx)
-        return BUFFER_E;
+    if (GetLength(input, &idx, &length, maxIdx) < 0)
+        return ASN_PARSE_E;
 
     *len      = length;
     *inOutIdx = idx;
-
     return length;
 }
 
+WOLFSSL_LOCAL int GetSequence(const byte* input, word32* inOutIdx, int* len,
+                           word32 maxIdx)
+{
+    return GetASNHeader(input, ASN_SEQUENCE | ASN_CONSTRUCTED, inOutIdx, len,
+                        maxIdx);
+}
+
 
 WOLFSSL_LOCAL int GetSet(const byte* input, word32* inOutIdx, int* len,
                         word32 maxIdx)
 {
-    int    length = -1;
-    word32 idx    = *inOutIdx;
-
-    if ((idx + 1) > maxIdx)
-        return BUFFER_E;
-
-    if (input[idx++] != (ASN_SET | ASN_CONSTRUCTED) ||
-            GetLength(input, &idx, &length, maxIdx) < 0)
-        return ASN_PARSE_E;
-
-    /* make sure length exists in buffer */
-    if ((idx + length) > maxIdx)
-        return BUFFER_E;
-
-    *len      = length;
-    *inOutIdx = idx;
-
-    return length;
+    return GetASNHeader(input, ASN_SET | ASN_CONSTRUCTED, inOutIdx, len,
+                        maxIdx);
 }
 
+/* Get the DER/BER encoded ASN.1 NULL element.
+ * Ensure that the all fields are as expected and move index past the element.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * inOutIdx  Current index into buffer to parse.
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_TAG_NULL_E when the NULL tag is not found.
+ *         ASN_EXPECT_0_E when the length is not zero.
+ *         Otherwise, 0 to indicate success.
+ */
+static int GetASNNull(const byte* input, word32* inOutIdx, word32 maxIdx)
+{
+    word32 idx = *inOutIdx;
+    byte   b;
+
+    if ((idx + 2) > maxIdx)
+        return BUFFER_E;
+
+    b = input[idx++];
+    if (b != ASN_TAG_NULL)
+        return ASN_TAG_NULL_E;
+
+    if (input[idx++] != 0)
+        return ASN_EXPECT_0_E;
+
+    *inOutIdx = idx;
+    return 0;
+}
+
+/* Set the DER/BER encoding of the ASN.1 NULL element.
+ *
+ * output  Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetASNNull(byte* output)
+{
+    output[0] = ASN_TAG_NULL;
+    output[1] = 0;
+
+    return 2;
+}
+
+/* Get the DER/BER encoding of an ASN.1 BOOLEAN.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * inOutIdx  Current index into buffer to parse.
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_PARSE_E when the BOOLEAN tag is not found or length is not 1.
+ *         Otherwise, 0 to indicate the value was false and 1 to indicate true.
+ */
+static int GetBoolean(const byte* input, word32* inOutIdx, word32 maxIdx)
+{
+    word32 idx = *inOutIdx;
+    byte   b;
+
+    if ((idx + 3) > maxIdx)
+        return BUFFER_E;
+
+    b = input[idx++];
+    if (b != ASN_BOOLEAN)
+        return ASN_PARSE_E;
+
+    if (input[idx++] != 1)
+        return ASN_PARSE_E;
+
+    b = input[idx++] != 0;
+
+    *inOutIdx = idx;
+    return b;
+}
+
+#ifdef ASN1_SET_BOOLEAN
+/* Set the DER/BER encoding of the ASN.1 NULL element.
+ * Note: Function not required as yet.
+ *
+ * val     Boolean value to encode.
+ * output  Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetBoolean(int val, byte* output)
+{
+    output[0] = ASN_BOOLEAN;
+    output[1] = 1;
+    output[2] = val ? -1 : 0;
+
+    return 3;
+}
+#endif
+
+/* Get the DER/BER encoding of an ASN.1 OCTET_STRING header.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * inOutIdx  Current index into buffer to parse.
+ * len       The number of bytes in the ASN.1 data.
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_PARSE_E when the OCTET_STRING tag is not found or length is
+ *         invalid.
+ *         Otherwise, the number of bytes in the ASN.1 data.
+ */
+static int GetOctetString(const byte* input, word32* inOutIdx, int* len,
+                          word32 maxIdx)
+{
+    return GetASNHeader(input, ASN_OCTET_STRING, inOutIdx, len, maxIdx);
+}
+
+/* Get the DER/BER encoding of an ASN.1 INTEGER header.
+ * Removes the leading zero byte when found.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * inOutIdx  Current index into buffer to parse.
+ * len       The number of bytes in the ASN.1 data (excluding any leading zero).
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_PARSE_E when the INTEGER tag is not found, length is invalid,
+ *         or invalid use of or missing leading zero.
+ *         Otherwise, 0 to indicate success.
+ */
+static int GetASNInt(const byte* input, word32* inOutIdx, int* len,
+                     word32 maxIdx)
+{
+    int    ret;
+
+    ret = GetASNHeader(input, ASN_INTEGER, inOutIdx, len, maxIdx);
+    if (ret < 0)
+        return ret;
+
+    if (*len > 0) {
+        if (input[*inOutIdx] == 0x00) {
+            (*inOutIdx)++;
+            (*len)--;
+
+            if (*len > 0 && (input[*inOutIdx] & 0x80) == 0)
+                return ASN_PARSE_E;
+        }
+        else if ((input[*inOutIdx] & 0x80) == 0x80)
+            return ASN_PARSE_E;
+    }
+
+    return 0;
+}
+
+/* Get the DER/BER encoding of an ASN.1 INTEGER that has a value of no more than
+ * 7 bits.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * inOutIdx  Current index into buffer to parse.
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_PARSE_E when the INTEGER tag is not found or length is invalid.
+ *         Otherwise, the 7-bit value.
+ */
+static int GetInteger7Bit(const byte* input, word32* inOutIdx, word32 maxIdx)
+{
+    word32 idx = *inOutIdx;
+    byte   b;
+
+    if ((idx + 3) > maxIdx)
+        return BUFFER_E;
+
+    if (input[idx++] != ASN_INTEGER)
+        return ASN_PARSE_E;
+    if (input[idx++] != 1)
+        return ASN_PARSE_E;
+    b = input[idx++];
+
+    *inOutIdx = idx;
+    return b;
+}
+
+#if !defined(NO_DSA) || defined(HAVE_ECC) || (!defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || (defined(WOLFSSL_KEY_GEN) && !defined(HAVE_USER_RSA))))
+/* Set the DER/BER encoding of the ASN.1 INTEGER header.
+ *
+ * len        Length of data to encode.
+ * firstByte  First byte of data, most significant byte of integer, to encode.
+ * output     Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetASNInt(int len, byte firstByte, byte* output)
+{
+    word32 idx = 0;
+
+    output[idx++] = ASN_INTEGER;
+    if (firstByte & 0x80)
+        len++;
+    idx += SetLength(len, output + idx);
+    if (firstByte & 0x80)
+        output[idx++] = 0x00;
+
+    return idx;
+}
+#endif
+
+#if !defined(NO_DSA) || defined(HAVE_ECC) || (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA))
+/* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int.
+ * The number is assumed to be positive.
+ *
+ * n       Multi-precision integer to encode.
+ * maxSz   Maximum size of the encoded integer.
+ *         A negative value indicates no check of length requested.
+ * output  Buffer to write into.
+ * returns BUFFER_E when the data is too long for the buffer.
+ *         MP_TO_E when encoding the integer fails.
+ *         Otherwise, the number of bytes added to the buffer.
+ */
+static int SetASNIntMP(mp_int* n, int maxSz, byte* output)
+{
+    int idx = 0;
+    int leadingBit;
+    int length;
+    int err;
+
+    leadingBit = mp_leading_bit(n);
+    length = mp_unsigned_bin_size(n);
+    idx = SetASNInt(length, leadingBit ? 0x80 : 0x00, output);
+    if (maxSz >= 0 && (idx + length) > maxSz)
+        return BUFFER_E;
+
+    err = mp_to_unsigned_bin(n, output + idx);
+    if (err != MP_OKAY)
+        return MP_TO_E;
+    idx += length;
+
+    return idx;
+}
+#endif
+
+#if !defined(NO_RSA) && defined(HAVE_USER_RSA) && defined(WOLFSSL_CERT_GEN)
+/* Set the DER/BER encoding of the ASN.1 INTEGER element with an mp_int from
+ * an RSA key.
+ * The number is assumed to be positive.
+ *
+ * n       Multi-precision integer to encode.
+ * output  Buffer to write into.
+ * returns BUFFER_E when the data is too long for the buffer.
+ *         MP_TO_E when encoding the integer fails.
+ *         Otherwise, the number of bytes added to the buffer.
+ */
+static int SetASNIntRSA(mp_int* n, byte* output)
+{
+    int idx = 0;
+    int leadingBit;
+    int length;
+    int err;
+
+    leadingBit = wc_Rsa_leading_bit(n);
+    length = wc_Rsa_unsigned_bin_size(n);
+    idx = SetASNInt(length, leadingBit ? 0x80 : 0x00, output);
+    if ((idx + length) > MAX_RSA_INT_SZ)
+        return BUFFER_E;
+
+    err = wc_Rsa_to_unsigned_bin(n, output + idx, length);
+    if (err != MP_OKAY)
+        return MP_TO_E;
+    idx += length;
+
+    return idx;
+}
+#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
+                                           !HAVE_USER_RSA))) */
 
 /* Windows header clash for WinCE using GetVersion */
 WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx,
@@ -602,8 +868,6 @@ WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx,
 {
     word32 idx = *inOutIdx;
 
-    WOLFSSL_ENTER("GetMyVersion");
-
     if ((idx + MIN_VERSION_SZ) > maxIdx)
         return ASN_PARSE_E;
 
@@ -675,30 +939,15 @@ static int GetExplicitVersion(const byte* input, word32* inOutIdx, int* version,
     return 0;
 }
 
-int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx,
-                  word32 maxIdx)
+int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx, word32 maxIdx)
 {
     word32 idx = *inOutIdx;
-    byte   b;
+    int    ret;
     int    length;
 
-    if ((idx + 1) > maxIdx)
-        return BUFFER_E;
-
-    b = input[idx++];
-    if (b != ASN_INTEGER)
-        return ASN_PARSE_E;
-
-    if (GetLength(input, &idx, &length, maxIdx) < 0)
-        return ASN_PARSE_E;
-
-    if (length > 0) {
-        /* remove leading zero */
-        if ( (b = input[idx++]) == 0x00)
-            length--;
-        else
-            idx--;
-    }
+    ret = GetASNInt(input, &idx, &length, maxIdx);
+    if (ret != 0)
+        return ret;
 
     if (mp_init(mpi) != MP_OKAY)
         return MP_INIT_E;
@@ -708,72 +957,118 @@ int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx,
         return ASN_GETINT_E;
     }
 
+#ifdef HAVE_WOLF_BIGINT
+    if (wc_bigint_from_unsigned_bin(&mpi->raw, input + idx, length) != 0) {
+        mp_clear(mpi);
+        return ASN_GETINT_E;
+    }
+#endif /* HAVE_WOLF_BIGINT */
+
     *inOutIdx = idx + length;
+
     return 0;
 }
 
-#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
-static int GetIntRsa(RsaKey* key, mp_int* mpi, const byte* input,
-                        word32* inOutIdx, word32 maxIdx)
+static int CheckBitString(const byte* input, word32* inOutIdx, int* len,
+                          word32 maxIdx, int zeroBits, byte* unusedBits)
 {
     word32 idx = *inOutIdx;
-    byte   b;
     int    length;
-
-    (void)key;
+    byte   b;
 
     if ((idx + 1) > maxIdx)
         return BUFFER_E;
 
-    b = input[idx++];
-    if (b != ASN_INTEGER)
-        return ASN_PARSE_E;
+    if (input[idx++] != ASN_BIT_STRING)
+        return ASN_BITSTR_E;
 
     if (GetLength(input, &idx, &length, maxIdx) < 0)
         return ASN_PARSE_E;
 
-    if (length > 0) {
-        /* remove leading zero */
-        if ( (b = input[idx++]) == 0x00)
-            length--;
-        else
-            idx--;
+    b = input[idx];
+    if (zeroBits && b != 0x00)
+        return ASN_EXPECT_0_E;
+    if (b >= 0x08)
+        return ASN_PARSE_E;
+    if (b != 0) {
+        if ((byte)(input[idx + length - 1] << (8 - b)) != 0)
+            return ASN_PARSE_E;
+        if (((input[idx + length - 1] >> b) & 0x01) != 0x01)
+            return ASN_PARSE_E;
     }
+    idx++;
+    length--;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        XMEMSET(mpi, 0, sizeof(mp_int));
-        mpi->used = length;
-    #ifdef USE_FAST_MATH
-        if (length > (FP_SIZE * (int)sizeof(fp_digit))) {
-            return MEMORY_E;
-        }
-        mpi->dpraw = (byte*)mpi->dp;
-    #else
-        mpi->dpraw = (byte*)XMALLOC(length, key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-    #endif
-        if (mpi->dpraw == NULL) {
-            return MEMORY_E;
-        }
+    *inOutIdx = idx;
+    if (len != NULL)
+        *len = length;
+    if (unusedBits != NULL)
+        *unusedBits = b;
 
-        XMEMCPY(mpi->dpraw, input + idx, length);
-    }
-    else
-#endif /* WOLFSSL_ASYNC_CRYPT && HAVE_CAVIUM */
-    {
-        if (mp_init(mpi) != MP_OKAY)
-            return MP_INIT_E;
-
-        if (mp_read_unsigned_bin(mpi, (byte*)input + idx, length) != 0) {
-            mp_clear(mpi);
-            return ASN_GETINT_E;
-        }
-    }
-
-    *inOutIdx = idx + length;
     return 0;
 }
-#endif /* !NO_RSA && !HAVE_USER_RSA */
+
+#if (!defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) || \
+                          (defined(WOLFSSL_KEY_GEN) && \
+                           !defined(HAVE_USER_RSA)))) || \
+    (defined(HAVE_ECC) && (defined(WOLFSSL_CERT_GEN) || \
+                           defined(WOLFSSL_KEY_GEN)))
+/* Set the DER/BER encoding of the ASN.1 BIT_STRING header.
+ *
+ * len         Length of data to encode.
+ * unusedBits  The number of unused bits in the last byte of data.
+ *             That is, the number of least significant zero bits before a one.
+ *             The last byte is the most-significant non-zero byte of a number.
+ * output      Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static word32 SetBitString(word32 len, byte unusedBits, byte* output)
+{
+    word32 idx = 0;
+
+    output[idx++] = ASN_BIT_STRING;
+    idx += SetLength(len + 1, output + idx);
+    output[idx++] = unusedBits;
+
+    return idx;
+}
+
+#ifdef WOLFSSL_CERT_EXT
+/* Set the DER/BER encoding of the ASN.1 BIT_STRING with a 16-bit value.
+ *
+ * val         16-bit value to encode.
+ * output      Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static word32 SetBitString16Bit(word16 val, byte* output)
+{
+    word32 idx;
+    int    len;
+    byte   lastByte;
+    byte   unusedBits = 0;
+
+    if ((val >> 8) != 0) {
+        len = 2;
+        lastByte = val >> 8;
+    }
+    else {
+        len = 1;
+        lastByte = val;
+    }
+
+    while (((lastByte >> unusedBits) & 0x01) == 0x00)
+        unusedBits++;
+
+    idx = SetBitString(len, unusedBits, output);
+    output[idx++] = val;
+    output[idx++] = val >> 8;
+
+    return idx;
+}
+#endif /* WOLFSSL_CERT_EXT */
+#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
+                                           !HAVE_USER_RSA)) */
+
 
 
 /* hashType */
@@ -1342,6 +1637,55 @@ int DecodeObjectId(const byte* in, word32 inSz, word16* out, word32* outSz)
 }
 #endif /* HAVE_OID_DECODING */
 
+/* Get the DER/BER encoding of an ASN.1 OBJECT_ID header.
+ *
+ * input     Buffer holding DER/BER encoded data.
+ * inOutIdx  Current index into buffer to parse.
+ * len       The number of bytes in the ASN.1 data.
+ * maxIdx    Length of data in buffer.
+ * returns BUFFER_E when there is not enough data to parse.
+ *         ASN_OBJECt_ID_E when the OBJECT_ID tag is not found.
+ *         ASN_PARSE_E when length is invalid.
+ *         Otherwise, 0 to indicate success.
+ */
+static int GetASNObjectId(const byte* input, word32* inOutIdx, int* len,
+                          word32 maxIdx)
+{
+    word32 idx = *inOutIdx;
+    byte   b;
+    int    length;
+
+    if ((idx + 1) > maxIdx)
+        return BUFFER_E;
+
+    b = input[idx++];
+    if (b != ASN_OBJECT_ID)
+        return ASN_OBJECT_ID_E;
+
+    if (GetLength(input, &idx, &length, maxIdx) < 0)
+        return ASN_PARSE_E;
+
+    *len = length;
+    *inOutIdx = idx;
+    return 0;
+}
+
+/* Set the DER/BER encoding of the ASN.1 OBJECT_ID header.
+ *
+ * len         Length of the OBJECT_ID data.
+ * output      Buffer to write into.
+ * returns the number of bytes added to the buffer.
+ */
+static int SetObjectId(int len, byte* output)
+{
+    int idx = 0;
+
+    output[idx++] = ASN_OBJECT_ID;
+    idx += SetLength(len, output + idx);
+
+    return idx;
+}
+
 int GetObjectId(const byte* input, word32* inOutIdx, word32* oid,
                                   word32 oidType, word32 maxIdx)
 {
@@ -1351,18 +1695,14 @@ int GetObjectId(const byte* input, word32* inOutIdx, word32* oid,
     word32 actualOidSz = 0;
     const byte* actualOid;
 #endif /* NO_VERIFY_OID */
-    byte   b;
 
     (void)oidType;
     WOLFSSL_ENTER("GetObjectId()");
     *oid = 0;
 
-    b = input[idx++];
-    if (b != ASN_OBJECT_ID)
-        return ASN_OBJECT_ID_E;
-
-    if (GetLength(input, &idx, &length, maxIdx) < 0)
-        return ASN_PARSE_E;
+    ret = GetASNObjectId(input, &idx, &length, maxIdx);
+    if (ret != 0)
+        return ret;
 
 #ifndef NO_VERIFY_OID
     actualOid = &input[idx];
@@ -1430,38 +1770,30 @@ int GetObjectId(const byte* input, word32* inOutIdx, word32* oid,
 }
 
 
-#ifndef NO_RSA
-#ifndef HAVE_USER_RSA
-#if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
+#if defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(HAVE_USER_RSA) && (defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)))
 static int SkipObjectId(const byte* input, word32* inOutIdx, word32 maxIdx)
 {
     word32 idx = *inOutIdx;
     int    length;
+    int ret;
 
-    if ((idx + 1) > maxIdx)
-        return BUFFER_E;
-
-    if (input[idx++] != ASN_OBJECT_ID)
-        return ASN_OBJECT_ID_E;
-
-    if (GetLength(input, &idx, &length, maxIdx) < 0)
-        return ASN_PARSE_E;
+    ret = GetASNObjectId(input, &idx, &length, maxIdx);
+    if (ret != 0)
+        return ret;
 
     idx += length;
     *inOutIdx = idx;
 
     return 0;
 }
-#endif /* OPENSSL_EXTRA || RSA_DECODE_EXTRA */
-#endif /* !HAVE_USER_RSA */
-#endif /* !NO_RSA */
+#endif
 
 WOLFSSL_LOCAL int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
                      word32 oidType, word32 maxIdx)
 {
     int    length;
     word32 idx = *inOutIdx;
-    byte   b;
+    int    ret;
     *oid = 0;
 
     WOLFSSL_ENTER("GetAlgoId");
@@ -1473,16 +1805,10 @@ WOLFSSL_LOCAL int GetAlgoId(const byte* input, word32* inOutIdx, word32* oid,
         return ASN_OBJECT_ID_E;
 
     /* could have NULL tag and 0 terminator, but may not */
-    b = input[idx];
-
-    if (b == ASN_TAG_NULL) {
-        if ((idx + 1) > maxIdx)
-            return BUFFER_E;
-
-        idx++;
-        b = input[idx++];
-        if (b != 0)
-            return ASN_EXPECT_0_E;
+    if (input[idx] == ASN_TAG_NULL) {
+        ret = GetASNNull(input, &idx, maxIdx);
+        if (ret != 0)
+            return ret;
     }
 
     *inOutIdx = idx;
@@ -1506,14 +1832,14 @@ int wc_RsaPrivateKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
 
     key->type = RSA_PRIVATE;
 
-    if (GetIntRsa(key, &key->n,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->e,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->d,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->p,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->q,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->dP, input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->dQ, input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->u,  input, inOutIdx, inSz) < 0 )  return ASN_RSA_KEY_E;
+    if (GetInt(&key->n,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->e,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->d,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->q,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->dP, input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->dQ, input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->u,  input, inOutIdx, inSz) < 0 )  return ASN_RSA_KEY_E;
 
     return 0;
 }
@@ -1526,6 +1852,7 @@ int ToTraditionalInline(const byte* input, word32* inOutIdx, word32 sz)
 {
     word32 idx, oid;
     int    version, length;
+    int    ret;
 
     if (input == NULL || inOutIdx == NULL)
         return BAD_FUNC_ARG;
@@ -1542,18 +1869,13 @@ int ToTraditionalInline(const byte* input, word32* inOutIdx, word32 sz)
         return ASN_PARSE_E;
 
     if (input[idx] == ASN_OBJECT_ID) {
-        /* pkcs8 ecc uses slightly different format */
-        idx++;  /* past id */
-        if (GetLength(input, &idx, &length, sz) < 0)
+        if (SkipObjectId(input, &idx, sz) < 0)
             return ASN_PARSE_E;
-        idx += length;  /* over sub id, key input will verify */
     }
 
-    if (input[idx++] != ASN_OCTET_STRING)
-        return ASN_PARSE_E;
-
-    if (GetLength(input, &idx, &length, sz) < 0)
-        return ASN_PARSE_E;
+    ret = GetOctetString(input, &idx, &length, sz);
+    if (ret < 0)
+        return ret;
 
     *inOutIdx = idx;
 
@@ -1685,17 +2007,13 @@ int wc_CreatePKCS8Key(byte* out, word32* outSz, byte* key, word32 keySz,
          * pkcs8 ecc uses slightly different format. Places curve oid in
          * buffer */
         if (curveOID != NULL && oidSz > 0) {
-            out[keyIdx++] = ASN_OBJECT_ID; tmpSz++;
-            sz = SetLength(oidSz, out + keyIdx);
+            sz = SetObjectId(oidSz, out + keyIdx);
             keyIdx += sz; tmpSz += sz;
             XMEMCPY(out + keyIdx, curveOID, oidSz);
             keyIdx += oidSz; tmpSz += oidSz;
         }
 
-        out[keyIdx] = ASN_OCTET_STRING;
-        keyIdx++; tmpSz++;
-
-        sz = SetLength(keySz, out + keyIdx);
+        sz = SetOctetString(keySz, out + keyIdx);
         keyIdx += sz; tmpSz += sz;
         XMEMCPY(out + keyIdx, key, keySz);
         tmpSz += keySz;
@@ -2166,13 +2484,9 @@ int ToTraditionalEnc(byte* input, word32 sz,const char* password,int passwordSz)
         ERROR_OUT(ASN_PARSE_E, exit_tte);
     }
 
-    if (input[inOutIdx++] != ASN_OCTET_STRING) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-
-    if (GetLength(input, &inOutIdx, &saltSz, sz) < 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
+    ret = GetOctetString(input, &inOutIdx, &saltSz, sz);
+    if (ret < 0)
+        goto exit_tte;
 
     if (saltSz > MAX_SALT_SIZE) {
         ERROR_OUT(ASN_PARSE_E, exit_tte);
@@ -2210,13 +2524,9 @@ int ToTraditionalEnc(byte* input, word32 sz,const char* password,int passwordSz)
             ERROR_OUT(ASN_PARSE_E, exit_tte); /* PKCS v2 algo id error */
         }
 
-        if (input[inOutIdx++] != ASN_OCTET_STRING) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
-
-        if (GetLength(input, &inOutIdx, &length, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_tte);
-        }
+        ret = GetOctetString(input, &inOutIdx, &length, sz);
+        if (ret < 0)
+            goto exit_tte;
 
         if (length > MAX_IV_SIZE) {
             ERROR_OUT(ASN_PARSE_E, exit_tte);
@@ -2226,13 +2536,9 @@ int ToTraditionalEnc(byte* input, word32 sz,const char* password,int passwordSz)
         inOutIdx += length;
     }
 
-    if (input[inOutIdx++] != ASN_OCTET_STRING) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
-
-    if (GetLength(input, &inOutIdx, &length, sz) < 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_tte);
-    }
+    ret = GetOctetString(input, &inOutIdx, &length, sz);
+    if (ret < 0)
+        goto exit_tte;
 
     ret = DecryptKey(password, passwordSz, salt, saltSz, iterations, id,
                                    input + inOutIdx, length, version, cbcIv);
@@ -2295,13 +2601,9 @@ int DecryptContent(byte* input, word32 sz,const char* password,int passwordSz)
         ERROR_OUT(ASN_PARSE_E, exit_dc);
     }
 
-    if (input[inOutIdx++] != ASN_OCTET_STRING) {
-        ERROR_OUT(ASN_PARSE_E, exit_dc);
-    }
-
-    if (GetLength(input, &inOutIdx, &saltSz, sz) < 0) {
-        ERROR_OUT(ASN_PARSE_E, exit_dc);
-    }
+    ret = GetOctetString(input, &inOutIdx, &saltSz, sz);
+    if (ret < 0)
+        goto exit_dc;
 
     if (saltSz > MAX_SALT_SIZE) {
         ERROR_OUT(ASN_PARSE_E, exit_dc);
@@ -2339,23 +2641,15 @@ int DecryptContent(byte* input, word32 sz,const char* password,int passwordSz)
             ERROR_OUT(ASN_PARSE_E, exit_dc); /* PKCS v2 algo id error */
         }
 
-        if ((inOutIdx + 1) > sz) {
-            ERROR_OUT(BUFFER_E, exit_dc);
-        }
-
-        if (input[inOutIdx++] != ASN_OCTET_STRING) {
-            ERROR_OUT(ASN_PARSE_E, exit_dc);
-        }
-
-        if (GetLength(input, &inOutIdx, &length, sz) < 0) {
-            ERROR_OUT(ASN_PARSE_E, exit_dc);
-        }
+        ret = GetOctetString(input, &inOutIdx, &length, sz);
+        if (ret < 0)
+            goto exit_dc;
 
         XMEMCPY(cbcIv, &input[inOutIdx], length);
         inOutIdx += length;
     }
 
-    if (input[inOutIdx++] != ASN_LONG_LENGTH) {
+    if (input[inOutIdx++] != (ASN_CONTEXT_SPECIFIC | 0)) {
         ERROR_OUT(ASN_PARSE_E, exit_dc);
     }
 
@@ -2392,6 +2686,7 @@ int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
 #if defined(OPENSSL_EXTRA) || defined(RSA_DECODE_EXTRA)
     byte b;
 #endif
+    int ret;
 
     if (input == NULL || inOutIdx == NULL || key == NULL)
         return BAD_FUNC_ARG;
@@ -2414,31 +2709,17 @@ int wc_RsaPublicKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
         if (SkipObjectId(input, inOutIdx, inSz) < 0)
             return ASN_PARSE_E;
 
-        /* could have NULL tag and 0 terminator, but may not */
-        b = input[(*inOutIdx)++];
-
-        if (b == ASN_TAG_NULL) {
-            b = input[(*inOutIdx)++];
-            if (b != 0)
-                return ASN_EXPECT_0_E;
-        }
-        else {
-            /* go back, didn't have it */
-            (*inOutIdx)--;
+        /* Option NULL ASN.1 tag */
+        if (input[*inOutIdx] == ASN_TAG_NULL) {
+            ret = GetASNNull(input, inOutIdx, inSz);
+            if (ret != 0)
+                return ret;
         }
 
         /* should have bit tag length and seq next */
-        b = input[(*inOutIdx)++];
-        if (b != ASN_BIT_STRING)
-            return ASN_BITSTR_E;
-
-        if (GetLength(input, inOutIdx, &length, inSz) <= 0)
-            return ASN_PARSE_E;
-
-        /* could have 0 */
-        b = input[(*inOutIdx)++];
-        if (b != 0)
-            (*inOutIdx)--;
+        ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+        if (ret != 0)
+            return ret;
 
         if (GetSequence(input, inOutIdx, &length, inSz) < 0)
             return ASN_PARSE_E;
@@ -2510,26 +2791,15 @@ int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p, word32* pInOutSz,
                  byte* g, word32* gInOutSz)
 {
     word32 idx = 0;
-    byte   b;
+    int    ret;
     int    length;
 
     if (GetSequence(input, &idx, &length, inSz) <= 0)
         return ASN_PARSE_E;
 
-    b = input[idx++];
-    if (b != ASN_INTEGER)
-        return ASN_PARSE_E;
-
-    if (GetLength(input, &idx, &length, inSz) < 0)
-        return ASN_PARSE_E;
-
-    if (length > 0) {
-        /* remove leading zero */
-        if ((b = input[idx++]) == 0x00)
-            length--;
-        else
-            idx--;
-    }
+    ret = GetASNInt(input, &idx, &length, inSz);
+    if (ret != 0)
+        return ret;
 
     if (length <= (int)*pInOutSz) {
         XMEMCPY(p, &input[idx], length);
@@ -2540,15 +2810,9 @@ int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p, word32* pInOutSz,
     }
     idx += length;
 
-    if ((idx + 1) > inSz)
-        return BUFFER_E;
-
-    b = input[idx++];
-    if (b != ASN_INTEGER)
-        return ASN_PARSE_E;
-
-    if (GetLength(input, &idx, &length, inSz) < 0)
-        return ASN_PARSE_E;
+    ret = GetASNInt(input, &idx, &length, inSz);
+    if (ret != 0)
+        return ret;
 
     if (length <= (int)*gInOutSz) {
         XMEMCPY(g, &input[idx], length);
@@ -2640,7 +2904,7 @@ int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen)
 {
     word32 seqSz, verSz, rawLen, intTotalLen = 0;
     word32 sizes[DSA_INTS];
-    int    i, j, outLen, ret = 0, lbit;
+    int    i, j, outLen, ret = 0, mpSz;
 
     byte  seq[MAX_SEQ_SZ];
     byte  ver[MAX_VERSION_SZ];
@@ -2659,10 +2923,7 @@ int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen)
     for (i = 0; i < DSA_INTS; i++) {
         mp_int* keyInt = GetDsaInt(key, i);
 
-        /* leading zero */
-        lbit = mp_leading_bit(keyInt);
-        rawLen = mp_unsigned_bin_size(keyInt) + lbit;
-
+        rawLen = mp_unsigned_bin_size(keyInt) + 1;
         tmps[i] = (byte*)XMALLOC(rawLen + MAX_SEQ_SZ, key->heap,
                                                               DYNAMIC_TYPE_DSA);
         if (tmps[i] == NULL) {
@@ -2670,30 +2931,12 @@ int wc_DsaKeyToDer(DsaKey* key, byte* output, word32 inLen)
             break;
         }
 
-        tmps[i][0] = ASN_INTEGER;
-        sizes[i] = SetLength(rawLen, tmps[i] + 1) + 1 + lbit; /* tag & lbit */
-
-        if (sizes[i] <= MAX_SEQ_SZ) {
-            int err;
-
-            /* leading zero */
-            if (lbit)
-                tmps[i][sizes[i]-1] = 0x00;
-
-            err = mp_to_unsigned_bin(keyInt, tmps[i] + sizes[i]);
-            if (err == MP_OKAY) {
-                sizes[i] += (rawLen-lbit); /* lbit included in rawLen */
-                intTotalLen += sizes[i];
-            }
-            else {
-                ret = err;
-                break;
-            }
-        }
-        else {
-            ret = ASN_INPUT_E;
+        mpSz = SetASNIntMP(keyInt, -1, tmps[i]);
+        if (mpSz < 0) {
+            ret = mpSz;
             break;
         }
+        intTotalLen += (sizes[i] = mpSz);
     }
 
     if (ret != 0) {
@@ -2847,6 +3090,9 @@ void InitDecodedCert(DecodedCert* cert, byte* source, word32 inSz, void* heap)
     XMEMSET(cert->extCertPolicies, 0, MAX_CERTPOL_NB*MAX_CERTPOL_SZ);
     cert->extCertPoliciesNb = 0;
 #endif
+
+    cert->ca = NULL;
+    InitSignatureCtx(&cert->sigCtx, heap, INVALID_DEVID);
 }
 
 
@@ -2907,6 +3153,7 @@ void FreeDecodedCert(DecodedCert* cert)
     if (cert->subjectName.fullName != NULL)
         XFREE(cert->subjectName.fullName, cert->heap, DYNAMIC_TYPE_X509);
 #endif /* OPENSSL_EXTRA */
+    FreeSignatureCtx(&cert->sigCtx);
 }
 
 static int GetCertHeader(DecodedCert* cert)
@@ -2994,18 +3241,11 @@ static int GetKey(DecodedCert* cert)
    #ifndef NO_RSA
         case RSAk:
         {
-            byte b = cert->source[cert->srcIdx++];
-            if (b != ASN_BIT_STRING)
-                return ASN_BITSTR_E;
-
-            if (GetLength(cert->source, &cert->srcIdx, &length,
-                                                           cert->maxIdx) <= 0) {
-                return ASN_PARSE_E;
-            }
-
-            b = cert->source[cert->srcIdx++];
-            if (b != 0x00)
-                return ASN_EXPECT_0_E;
+            int ret;
+            ret = CheckBitString(cert->source, &cert->srcIdx, NULL,
+                                 cert->maxIdx, 1, NULL);
+            if (ret != 0)
+                return ret;
 
             return StoreRsaKey(cert);
         }
@@ -3078,7 +3318,7 @@ static int GetKey(DecodedCert* cert)
     #ifdef HAVE_ECC
         case ECDSAk:
         {
-            byte b;
+            int ret;
 
             if (GetObjectId(cert->source, &cert->srcIdx,
                             &cert->pkCurveOID, oidCurveType, cert->maxIdx) < 0)
@@ -3088,21 +3328,10 @@ static int GetKey(DecodedCert* cert)
                 return ECC_CURVE_OID_E;
 
             /* key header */
-            b = cert->source[cert->srcIdx++];
-            if (b != ASN_BIT_STRING)
-                return ASN_BITSTR_E;
-
-            if (GetLength(cert->source, &cert->srcIdx, &length,
-                                                           cert->maxIdx) <= 0) {
-                return ASN_PARSE_E;
-            }
-
-            b = cert->source[cert->srcIdx++];
-            if (b != 0x00)
-                return ASN_EXPECT_0_E;
-
-            /* actual key, use length - 1 since ate preceding 0 */
-            length -= 1;
+            ret = CheckBitString(cert->source, &cert->srcIdx, &length,
+                                 cert->maxIdx, 1, NULL);
+            if (ret != 0)
+                return ret;
 
             cert->publicKey = (byte*) XMALLOC(length, cert->heap,
                                               DYNAMIC_TYPE_PUBLIC_KEY);
@@ -3150,10 +3379,8 @@ static int GetName(DecodedCert* cert, int nameType)
     if (cert->source[cert->srcIdx] == ASN_OBJECT_ID) {
         WOLFSSL_MSG("Trying optional prefix...");
 
-        if (GetLength(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
+        if (SkipObjectId(cert->source, &cert->srcIdx, cert->maxIdx) < 0)
             return ASN_PARSE_E;
-
-        cert->srcIdx += length;
         WOLFSSL_MSG("Got optional prefix");
     }
 
@@ -3202,12 +3429,9 @@ static int GetName(DecodedCert* cert, int nameType)
         if (GetSequence(cert->source, &cert->srcIdx, &dummy, cert->maxIdx) <= 0)
             return ASN_PARSE_E;
 
-        b = cert->source[cert->srcIdx++];
-        if (b != ASN_OBJECT_ID)
-            return ASN_OBJECT_ID_E;
-
-        if (GetLength(cert->source, &cert->srcIdx, &oidSz, cert->maxIdx) < 0)
-            return ASN_PARSE_E;
+        ret = GetASNObjectId(cert->source, &cert->srcIdx, &oidSz, cert->maxIdx);
+        if (ret != 0)
+            return ret;
 
         /* make sure there is room for joint */
         if ((cert->srcIdx + sizeof(joint)) > cert->maxIdx)
@@ -3919,37 +4143,33 @@ int DecodeToKey(DecodedCert* cert, int verify)
 
 static int GetSignature(DecodedCert* cert)
 {
-    int    length;
-    byte   b = cert->source[cert->srcIdx++];
-
-    if (b != ASN_BIT_STRING)
-        return ASN_BITSTR_E;
-
-    if (GetLength(cert->source, &cert->srcIdx, &length, cert->maxIdx) < 0)
-        return ASN_PARSE_E;
+    int length;
+    int ret;
+    ret = CheckBitString(cert->source, &cert->srcIdx, &length, cert->maxIdx, 1,
+                         NULL);
+    if (ret != 0)
+        return ret;
 
     cert->sigLength = length;
-
-    if (length > 0) {
-        b = cert->source[cert->srcIdx++];
-        if (b != 0x00)
-            return ASN_EXPECT_0_E;
-        cert->sigLength--;
-    }
-
     cert->signature = &cert->source[cert->srcIdx];
     cert->srcIdx += cert->sigLength;
 
     return 0;
 }
 
-static word32 SetDigest(const byte* digest, word32 digSz, byte* output)
+static word32 SetOctetString8Bit(word32 len, byte* output)
 {
     output[0] = ASN_OCTET_STRING;
-    output[1] = (byte)digSz;
-    XMEMCPY(&output[2], digest, digSz);
+    output[1] = (byte)len;
+    return 2;
+}
 
-    return digSz + 2;
+static word32 SetDigest(const byte* digest, word32 digSz, byte* output)
+{
+    word32 idx = SetOctetString8Bit(digSz, output);
+    XMEMCPY(&output[idx], digest, digSz);
+
+    return idx + digSz;
 }
 
 
@@ -4021,7 +4241,9 @@ WOLFSSL_LOCAL word32 SetExplicit(byte number, word32 len, byte* output)
 
 static int SetCurve(ecc_key* key, byte* output)
 {
+#ifdef HAVE_OID_ENCODING
     int ret;
+#endif
     int idx = 0;
     word32 oidSz = 0;
 
@@ -4039,11 +4261,7 @@ static int SetCurve(ecc_key* key, byte* output)
     oidSz = key->dp->oidSz;
 #endif
 
-    output[0] = ASN_OBJECT_ID;
-    idx++;
-
-    ret = SetLength(oidSz, output+idx);
-    idx += ret;
+    idx += SetObjectId(oidSz, output);
 
 #ifdef HAVE_OID_ENCODING
     ret = EncodeObjectId(key->dp->oid, key->dp->oidSz, output+idx, &oidSz);
@@ -4076,7 +4294,7 @@ WOLFSSL_LOCAL word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz)
 {
     word32 tagSz, idSz, seqSz, algoSz = 0;
     const  byte* algoName = 0;
-    byte   ID_Length[MAX_LENGTH_SZ];
+    byte   ID_Length[1 + MAX_LENGTH_SZ];
     byte   seqArray[MAX_SEQ_SZ + 1];  /* add object_id to end */
 
     tagSz = (type == oidHashType ||
@@ -4090,18 +4308,14 @@ WOLFSSL_LOCAL word32 SetAlgoID(int algoOID, byte* output, int type, int curveSz)
         return 0;
     }
 
-    idSz  = SetLength(algoSz, ID_Length);
-    seqSz = SetSequence(idSz + algoSz + 1 + tagSz + curveSz, seqArray);
-                 /* +1 for object id, curveID of curveSz follows for ecc */
-    seqArray[seqSz++] = ASN_OBJECT_ID;
+    idSz  = SetObjectId(algoSz, ID_Length);
+    seqSz = SetSequence(idSz + algoSz + tagSz + curveSz, seqArray);
 
     XMEMCPY(output, seqArray, seqSz);
     XMEMCPY(output + seqSz, ID_Length, idSz);
     XMEMCPY(output + seqSz + idSz, algoName, algoSz);
-    if (tagSz == 2) {
-        output[seqSz + idSz + algoSz] = ASN_TAG_NULL;
-        output[seqSz + idSz + algoSz + 1] = 0;
-    }
+    if (tagSz == 2)
+        SetASNNull(&output[seqSz + idSz + algoSz]);
 
     return seqSz + idSz + algoSz + tagSz;
 
@@ -4164,274 +4378,347 @@ int wc_GetCTC_HashOID(int type)
     };
 }
 
-/* return true (1) or false (0) for Confirmation */
-static int ConfirmSignature(const byte* buf, word32 bufSz,
-    const byte* key, word32 keySz, word32 keyOID,
-    const byte* sig, word32 sigSz, word32 sigOID,
-    void* heap)
+void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId)
 {
-    int  typeH = 0, digestSz = 0, ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* digest;
-#else
-    byte digest[WC_MAX_DIGEST_SIZE];
-#endif
+    if (sigCtx) {
+        XMEMSET(sigCtx, 0, sizeof(SignatureCtx));
+        sigCtx->devId = devId;
+        sigCtx->heap = heap;
+    }
+}
 
-#ifdef WOLFSSL_SMALL_STACK
-    digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (digest == NULL)
-        return 0; /* not confirmed */
+void FreeSignatureCtx(SignatureCtx* sigCtx)
+{
+    if (sigCtx == NULL)
+        return;
+
+    if (sigCtx->digest) {
+        XFREE(sigCtx->digest, sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        sigCtx->digest = NULL;
+    }
+#ifndef NO_RSA
+    if (sigCtx->plain) {
+        XFREE(sigCtx->plain, sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        sigCtx->plain = NULL;
+    }
 #endif
+    if (sigCtx->key.ptr) {
+        switch (sigCtx->keyOID) {
+        #ifndef NO_RSA
+            case RSAk:
+                wc_FreeRsaKey(sigCtx->key.rsa);
+                XFREE(sigCtx->key.ptr, sigCtx->heap, DYNAMIC_TYPE_RSA);
+                break;
+        #endif /* !NO_RSA */
+        #ifdef HAVE_ECC
+            case ECDSAk:
+                wc_ecc_free(sigCtx->key.ecc);
+                XFREE(sigCtx->key.ecc, sigCtx->heap, DYNAMIC_TYPE_ECC);
+                break;
+        #endif /* HAVE_ECC */
+            default:
+                break;
+        } /* switch (keyOID) */
+        sigCtx->key.ptr = NULL;
+    }
+
+    /* reset state, we are done */
+    sigCtx->state = SIG_STATE_BEGIN;
+}
+
+/* Return codes: 0=Success, Negative (see error-crypt.h), ASN_SIG_CONFIRM_E */
+static int ConfirmSignature(SignatureCtx* sigCtx,
+    const byte* buf, word32 bufSz,
+    const byte* key, word32 keySz, word32 keyOID,
+    const byte* sig, word32 sigSz, word32 sigOID)
+{
+    int ret = 0;
+
+    if (sigCtx == NULL || buf == NULL || bufSz == 0 || key == NULL ||
+        keySz == 0 || sig == NULL || sigSz == 0) {
+        return BAD_FUNC_ARG;
+    }
 
     (void)key;
     (void)keySz;
     (void)sig;
     (void)sigSz;
-    (void)heap;
 
-    switch (sigOID) {
-    #ifndef NO_MD5
-        case CTC_MD5wRSA:
-        if (wc_Md5Hash(buf, bufSz, digest) == 0) {
-            typeH    = MD5h;
-            digestSz = MD5_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #if defined(WOLFSSL_MD2)
-        case CTC_MD2wRSA:
-        if (wc_Md2Hash(buf, bufSz, digest) == 0) {
-            typeH    = MD2h;
-            digestSz = MD2_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifndef NO_SHA
-        case CTC_SHAwRSA:
-        case CTC_SHAwDSA:
-        case CTC_SHAwECDSA:
-        if (wc_ShaHash(buf, bufSz, digest) == 0) {
-            typeH    = SHAh;
-            digestSz = SHA_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA224
-        case CTC_SHA224wRSA:
-        case CTC_SHA224wECDSA:
-        if (wc_Sha224Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA224h;
-            digestSz = SHA224_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifndef NO_SHA256
-        case CTC_SHA256wRSA:
-        case CTC_SHA256wECDSA:
-        if (wc_Sha256Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA256h;
-            digestSz = SHA256_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA512
-        case CTC_SHA512wRSA:
-        case CTC_SHA512wECDSA:
-        if (wc_Sha512Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA512h;
-            digestSz = SHA512_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA384
-        case CTC_SHA384wRSA:
-        case CTC_SHA384wECDSA:
-        if (wc_Sha384Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA384h;
-            digestSz = SHA384_DIGEST_SIZE;
-        }
-        break;
-    #endif
-        default:
-            WOLFSSL_MSG("Verify Signature has unsupported type");
-    }
+    WOLFSSL_ENTER("ConfirmSignature");
 
-    if (typeH == 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return 0; /* not confirmed */
-    }
-
-    switch (keyOID) {
-    #ifndef NO_RSA
-        case RSAk:
+    switch (sigCtx->state) {
+        case SIG_STATE_BEGIN:
         {
-            word32 idx = 0;
-            int    encodedSigSz, verifySz;
-            byte*  out;
-#ifdef WOLFSSL_SMALL_STACK
-            RsaKey* pubKey;
-            byte* plain;
-            byte* encodedSig;
-#else
-            RsaKey pubKey[1];
-            byte plain[MAX_ENCODED_SIG_SZ];
-            byte encodedSig[MAX_ENCODED_SIG_SZ];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-            pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-            plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-            encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-
-            if (pubKey == NULL || plain == NULL || encodedSig == NULL) {
-                WOLFSSL_MSG("Failed to allocate memory at ConfirmSignature");
-
-                if (pubKey)
-                    XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                if (plain)
-                    XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                if (encodedSig)
-                    XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-                break; /* not confirmed */
+            sigCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, sigCtx->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+            if (sigCtx->digest == NULL) {
+                ERROR_OUT(MEMORY_E, exit_cs);
             }
-#endif
-            if (wc_InitRsaKey(pubKey, heap) != 0) {
-                WOLFSSL_MSG("InitRsaKey failed");
-            }
-            else if (sigSz > MAX_ENCODED_SIG_SZ) {
-                WOLFSSL_MSG("Verify Signature is too big");
-            }
-            else if (wc_RsaPublicKeyDecode(key, &idx, pubKey, keySz) < 0) {
-                WOLFSSL_MSG("ASN Key decode error RSA");
-            }
-            else {
-                XMEMCPY(plain, sig, sigSz);
 
-                ret = 0;
-                do {
-                #if defined(WOLFSSL_ASYNC_CRYPT)
-                    ret = wc_RsaAsyncWait(ret, pubKey);
+            /* fall through */
+            sigCtx->state = SIG_STATE_HASH;
+        } /* SIG_STATE_BEGIN */
+
+        case SIG_STATE_HASH:
+        {
+            switch (sigOID) {
+            #ifndef NO_MD5
+                case CTC_MD5wRSA:
+                if ((ret = wc_Md5Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = MD5h;
+                    sigCtx->digestSz = MD5_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #if defined(WOLFSSL_MD2)
+                case CTC_MD2wRSA:
+                if ((ret = wc_Md2Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = MD2h;
+                    sigCtx->digestSz = MD2_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifndef NO_SHA
+                case CTC_SHAwRSA:
+                case CTC_SHAwDSA:
+                case CTC_SHAwECDSA:
+                if ((ret = wc_ShaHash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHAh;
+                    sigCtx->digestSz = SHA_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifdef WOLFSSL_SHA224
+                case CTC_SHA224wRSA:
+                case CTC_SHA224wECDSA:
+                if ((ret = wc_Sha224Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA224h;
+                    sigCtx->digestSz = SHA224_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifndef NO_SHA256
+                case CTC_SHA256wRSA:
+                case CTC_SHA256wECDSA:
+                if ((ret = wc_Sha256Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA256h;
+                    sigCtx->digestSz = SHA256_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifdef WOLFSSL_SHA512
+                case CTC_SHA512wRSA:
+                case CTC_SHA512wECDSA:
+                if ((ret = wc_Sha512Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA512h;
+                    sigCtx->digestSz = SHA512_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifdef WOLFSSL_SHA384
+                case CTC_SHA384wRSA:
+                case CTC_SHA384wECDSA:
+                if ((ret = wc_Sha384Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA384h;
+                    sigCtx->digestSz = SHA384_DIGEST_SIZE;
+                }
+                break;
+            #endif
+                default:
+                    ret = HASH_TYPE_E;
+                    WOLFSSL_MSG("Verify Signature has unsupported type");
+            }
+
+            if (ret != 0) {
+                goto exit_cs;
+            }
+
+            /* fall through */
+            sigCtx->state = SIG_STATE_KEY;
+        } /* SIG_STATE_HASH */
+
+        case SIG_STATE_KEY:
+        {
+            sigCtx->keyOID = keyOID;
+
+            switch (keyOID) {
+            #ifndef NO_RSA
+                case RSAk:
+                {
+                    word32 idx = 0;
+
+                    sigCtx->key.rsa = (RsaKey*)XMALLOC(sizeof(RsaKey),
+                                                sigCtx->heap, DYNAMIC_TYPE_RSA);
+                    sigCtx->plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+                                         sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    if (sigCtx->key.rsa == NULL || sigCtx->plain == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+
+                    if ((ret = wc_InitRsaKey_ex(sigCtx->key.rsa, sigCtx->heap,
+                                                        sigCtx->devId)) != 0) {
+                        goto exit_cs;
+                    }
+
+                    if (sigSz > MAX_ENCODED_SIG_SZ) {
+                        WOLFSSL_MSG("Verify Signature is too big");
+                        ERROR_OUT(BUFFER_E, exit_cs);
+                    }
+
+                    if ((ret = wc_RsaPublicKeyDecode(key, &idx, sigCtx->key.rsa,
+                                                                 keySz)) != 0) {
+                        WOLFSSL_MSG("ASN Key decode error RSA");
+                        goto exit_cs;
+                    }
+
+                    XMEMCPY(sigCtx->plain, sig, sigSz);
+                    sigCtx->out = NULL;
+                    break;
+                }
+            #endif /* !NO_RSA */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                {
+                    sigCtx->verify = 0;
+                    sigCtx->key.ecc = (ecc_key*)XMALLOC(sizeof(ecc_key),
+                                                sigCtx->heap, DYNAMIC_TYPE_ECC);
+                    if (sigCtx->key.ecc == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+
+                    if ((ret = wc_ecc_init_ex(sigCtx->key.ecc, sigCtx->heap,
+                                                          sigCtx->devId)) < 0) {
+                        goto exit_cs;
+                    }
+                    if ((ret = wc_ecc_import_x963(key, keySz,
+                                                        sigCtx->key.ecc)) < 0) {
+                        WOLFSSL_MSG("ASN Key import error ECC");
+                        goto exit_cs;
+                    }
+                    break;
+                }
+            #endif /* HAVE_ECC */
+                default:
+                    WOLFSSL_MSG("Verify Key type unknown");
+                    ret = ASN_UNKNOWN_OID_E;
+                    break;
+            } /* switch (keyOID) */
+
+            if (ret != 0) {
+                goto exit_cs;
+            }
+
+            /* fall through */
+            sigCtx->state = SIG_STATE_DO;
+        } /* SIG_STATE_KEY */
+
+        case SIG_STATE_DO:
+        {
+            switch (keyOID) {
+            #ifndef NO_RSA
+                case RSAk:
+                {
+                    ret = wc_RsaSSL_VerifyInline(sigCtx->plain, sigSz,
+                                                &sigCtx->out, sigCtx->key.rsa);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (ret == WC_PENDING_E)
+                        sigCtx->asyncDev = &sigCtx->key.rsa->asyncDev;
                 #endif
-                    if (ret >= 0) {
-                        ret = wc_RsaSSL_VerifyInline(plain, sigSz, &out,
-                                                                    pubKey);
-                    }
-                } while (ret == WC_PENDING_E);
-
-                if (ret < 0) {
-                    WOLFSSL_MSG("Rsa SSL verify error");
+                    break;
                 }
-                else {
-                    verifySz = ret;
-                    /* make sure we're right justified */
-                    encodedSigSz =
-                        wc_EncodeSignature(encodedSig, digest, digestSz, typeH);
-                    if (encodedSigSz != verifySz ||
-                                XMEMCMP(out, encodedSig, encodedSigSz) != 0) {
-                        WOLFSSL_MSG("Rsa SSL verify match encode error");
-                    }
-                    else
-                        ret = 1; /* match */
-
-                    #ifdef WOLFSSL_DEBUG_ENCODING
-                    {
-                        int x;
-
-                        printf("wolfssl encodedSig:\n");
-
-                        for (x = 0; x < encodedSigSz; x++) {
-                            printf("%02x ", encodedSig[x]);
-                            if ( (x % 16) == 15)
-                                printf("\n");
-                        }
-
-                        printf("\n");
-                        printf("actual digest:\n");
-
-                        for (x = 0; x < verifySz; x++) {
-                            printf("%02x ", out[x]);
-                            if ( (x % 16) == 15)
-                                printf("\n");
-                        }
-
-                        printf("\n");
-                    }
-                    #endif /* WOLFSSL_DEBUG_ENCODING */
-
+            #endif /* !NO_RSA */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                {
+                    ret = wc_ecc_verify_hash(sig, sigSz, sigCtx->digest,
+                        sigCtx->digestSz, &sigCtx->verify, sigCtx->key.ecc);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (ret == WC_PENDING_E)
+                        sigCtx->asyncDev = &sigCtx->key.ecc->asyncDev;
+                #endif
+                    break;
                 }
+            #endif /* HAVE_ECC */
+                default:
+                    break;
+            }  /* switch (keyOID) */
 
+            if (ret < 0) {
+                /* treat all non async RSA errors as ASN_SIG_CONFIRM_E */
+                if (ret != WC_PENDING_E)
+                    ret = ASN_SIG_CONFIRM_E;
+                goto exit_cs;
             }
 
-            wc_FreeRsaKey(pubKey);
+            /* fall through */
+            sigCtx->state = SIG_STATE_CHECK;
+        } /* SIG_STATE_DO */
 
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(pubKey,     NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            XFREE(plain,      NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            break;
-        }
-
-    #endif /* NO_RSA */
-    #ifdef HAVE_ECC
-        case ECDSAk:
+        case SIG_STATE_CHECK:
         {
-            int verify = 0;
-#ifdef WOLFSSL_SMALL_STACK
-            ecc_key* pubKey;
-#else
-            ecc_key pubKey[1];
-#endif
+            switch (keyOID) {
+            #ifndef NO_RSA
+                case RSAk:
+                {
+                    int encodedSigSz, verifySz;
+                #ifdef WOLFSSL_SMALL_STACK
+                    byte* encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+                                        sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    if (encodedSig == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+                #else
+                    byte encodedSig[MAX_ENCODED_SIG_SZ];
+                #endif
 
-#ifdef WOLFSSL_SMALL_STACK
-            pubKey = (ecc_key*)XMALLOC(sizeof(ecc_key), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-            if (pubKey == NULL) {
-                WOLFSSL_MSG("Failed to allocate pubKey");
-                break; /* not confirmed */
-            }
-#endif
+                    verifySz = ret;
 
-            if (wc_ecc_init(pubKey) < 0) {
-                WOLFSSL_MSG("Failed to initialize key");
-                break; /* not confirmed */
-            }
-            if (wc_ecc_import_x963(key, keySz, pubKey) < 0) {
-                WOLFSSL_MSG("ASN Key import error ECC");
-            }
-            else {
-                if (wc_ecc_verify_hash(sig, sigSz, digest, digestSz, &verify,
-                                                                pubKey) != 0) {
-                    WOLFSSL_MSG("ECC verify hash error");
+                    /* make sure we're right justified */
+                    encodedSigSz = wc_EncodeSignature(encodedSig,
+                            sigCtx->digest, sigCtx->digestSz, sigCtx->typeH);
+                    if (encodedSigSz == verifySz &&
+                        XMEMCMP(sigCtx->out, encodedSig, encodedSigSz) == 0) {
+                        ret = 0;
+                    }
+                    else {
+                        WOLFSSL_MSG("RSA SSL verify match encode error");
+                        ret = ASN_SIG_CONFIRM_E;
+                    }
+
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encodedSig, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                #endif
+                    break;
                 }
-                else if (1 != verify) {
-                    WOLFSSL_MSG("ECC Verify didn't match");
-                } else
-                    ret = 1; /* match */
+            #endif /* NO_RSA */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                {
+                    if (sigCtx->verify == 1) {
+                        ret = 0;
+                    }
+                    else {
+                        WOLFSSL_MSG("ECC Verify didn't match");
+                        ret = ASN_SIG_CONFIRM_E;
+                    }
+                    break;
+                }
+            #endif /* HAVE_ECC */
+                default:
+                    break;
+            }  /* switch (keyOID) */
 
-            }
-            wc_ecc_free(pubKey);
-
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
             break;
-        }
-    #endif /* HAVE_ECC */
-        default:
-            WOLFSSL_MSG("Verify Key type unknown");
-    }
+        } /* SIG_STATE_CHECK */
+    } /* switch (sigCtx->state) */
 
-    (void)digestSz;
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+exit_cs:
+
+    WOLFSSL_LEAVE("ConfirmSignature", ret);
+
+    if (ret != WC_PENDING_E) {
+        FreeSignatureCtx(sigCtx);
+    }
 
     return ret;
 }
@@ -4661,7 +4948,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             length -= strLen;
             idx    += strLen;
         }
-#ifndef IGNORE_NAME_CONSTRAINTS
+    #ifndef IGNORE_NAME_CONSTRAINTS
         else if (b == (ASN_CONTEXT_SPECIFIC | ASN_RFC822_TYPE)) {
             DNS_entry* emailEntry;
             int strLen;
@@ -4697,13 +4984,14 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             length -= strLen;
             idx    += strLen;
         }
-#endif /* IGNORE_NAME_CONSTRAINTS */
-#ifdef WOLFSSL_SEP
+    #endif /* IGNORE_NAME_CONSTRAINTS */
+    #ifdef WOLFSSL_SEP
         else if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | ASN_OTHER_TYPE))
         {
             int strLen;
             word32 lenStartIdx = idx;
             word32 oid = 0;
+            int    ret;
 
             if (GetLength(input, &idx, &strLen, sz) < 0) {
                 WOLFSSL_MSG("\tfail: other name length");
@@ -4737,14 +5025,10 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
                 return ASN_PARSE_E;
             }
 
-            if (input[idx++] != ASN_OBJECT_ID) {
-                WOLFSSL_MSG("\texpected OID");
-                return ASN_PARSE_E;
-            }
-
-            if (GetLength(input, &idx, &strLen, sz) <= 0) {
-                WOLFSSL_MSG("\tfailed: str len");
-                return ASN_PARSE_E;
+            ret = GetASNObjectId(input, &idx, &strLen, sz);
+            if (ret != 0) {
+                WOLFSSL_MSG("\tbad OID");
+                return ret;
             }
 
             cert->hwType = (byte*)XMALLOC(strLen, cert->heap,
@@ -4758,15 +5042,9 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             cert->hwTypeSz = strLen;
             idx += strLen;
 
-            if (input[idx++] != ASN_OCTET_STRING) {
-                WOLFSSL_MSG("\texpected Octet String");
-                return ASN_PARSE_E;
-            }
-
-            if (GetLength(input, &idx, &strLen, sz) < 0) {
-                WOLFSSL_MSG("\tfailed: str len");
-                return ASN_PARSE_E;
-            }
+            ret = GetOctetString(input, &idx, &strLen, sz);
+            if (ret < 0)
+                return ret;
 
             cert->hwSerialNum = (byte*)XMALLOC(strLen + 1, cert->heap,
                                                DYNAMIC_TYPE_X509_EXT);
@@ -4780,7 +5058,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             cert->hwSerialNumSz = strLen;
             idx += strLen;
         }
-#endif /* WOLFSSL_SEP */
+    #endif /* WOLFSSL_SEP */
         else {
             int strLen;
             word32 lenStartIdx = idx;
@@ -4802,6 +5080,7 @@ static int DecodeBasicCaConstraint(byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length = 0;
+    int ret;
 
     WOLFSSL_ENTER("DecodeBasicCaConstraint");
 
@@ -4816,35 +5095,23 @@ static int DecodeBasicCaConstraint(byte* input, int sz, DecodedCert* cert)
     /* If the basic ca constraint is false, this extension may be named, but
      * left empty. So, if the length is 0, just return. */
 
-    if (input[idx++] != ASN_BOOLEAN) {
-        WOLFSSL_MSG("\tfail: constraint not BOOLEAN");
-        return ASN_PARSE_E;
+    ret = GetBoolean(input, &idx, sz);
+    if (ret < 0) {
+        WOLFSSL_MSG("\tfail: constraint not valid BOOLEAN");
+        return ret;
     }
 
-    if (GetLength(input, &idx, &length, sz) <= 0) {
-        WOLFSSL_MSG("\tfail: length");
-        return ASN_PARSE_E;
-    }
-
-    if (input[idx++])
-        cert->isCA = 1;
+    cert->isCA = (byte)ret;
 
     /* If there isn't any more data, return. */
     if (idx >= (word32)sz)
         return 0;
 
-    /* Anything left should be the optional pathlength */
-    if (input[idx++] != ASN_INTEGER) {
-        WOLFSSL_MSG("\tfail: pathlen not INTEGER");
-        return ASN_PARSE_E;
-    }
+    ret = GetInteger7Bit(input, &idx, sz);
+    if (ret < 0)
+        return ret;
 
-    if (input[idx++] != 1) {
-        WOLFSSL_MSG("\tfail: pathlen too long");
-        return ASN_PATHLEN_SIZE_E;
-    }
-
-    cert->pathLength = input[idx];
+    cert->pathLength = (byte)ret;
     cert->pathLengthSet = 1;
 
     return 0;
@@ -5005,10 +5272,10 @@ static int DecodeAuthKeyId(byte* input, int sz, DecodedCert* cert)
         return ASN_PARSE_E;
     }
 
-    #ifdef OPENSSL_EXTRA
-        cert->extAuthKeyIdSrc = &input[idx];
-        cert->extAuthKeyIdSz = length;
-    #endif /* OPENSSL_EXTRA */
+#ifdef OPENSSL_EXTRA
+    cert->extAuthKeyIdSrc = &input[idx];
+    cert->extAuthKeyIdSz = length;
+#endif /* OPENSSL_EXTRA */
 
     if (length == KEYID_SIZE) {
         XMEMCPY(cert->extAuthKeyId, input + idx, length);
@@ -5035,15 +5302,9 @@ static int DecodeSubjKeyId(byte* input, int sz, DecodedCert* cert)
     if (sz <= 0)
         return ASN_PARSE_E;
 
-    if (input[idx++] != ASN_OCTET_STRING) {
-        WOLFSSL_MSG("\tfail: should be an OCTET STRING");
-        return ASN_PARSE_E;
-    }
-
-    if (GetLength(input, &idx, &length, sz) <= 0) {
-        WOLFSSL_MSG("\tfail: extension data length");
-        return ASN_PARSE_E;
-    }
+    ret = GetOctetString(input, &idx, &length, sz);
+    if (ret < 0)
+        return ret;
 
     #ifdef OPENSSL_EXTRA
         cert->extSubjKeyIdSrc = &input[idx];
@@ -5069,23 +5330,12 @@ static int DecodeKeyUsage(byte* input, int sz, DecodedCert* cert)
 {
     word32 idx = 0;
     int length;
+    int ret;
     WOLFSSL_ENTER("DecodeKeyUsage");
 
-    if (sz <= 0)
-        return ASN_PARSE_E;
-
-    if (input[idx++] != ASN_BIT_STRING) {
-        WOLFSSL_MSG("\tfail: key usage expected bit string");
-        return ASN_PARSE_E;
-    }
-
-    if (GetLength(input, &idx, &length, sz) <= 0) {
-        WOLFSSL_MSG("\tfail: key usage bad length");
-        return ASN_PARSE_E;
-    }
-
-    /* pass the unusedBits value */
-    idx++; length--;
+    ret = CheckBitString(input, &idx, &length, sz, 0, NULL);
+    if (ret != 0)
+        return ret;
 
     cert->extKeyUsage = (word16)(input[idx]);
     if (length == 2)
@@ -5107,10 +5357,10 @@ static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
         return ASN_PARSE_E;
     }
 
-    #ifdef OPENSSL_EXTRA
-        cert->extExtKeyUsageSrc = input + idx;
-        cert->extExtKeyUsageSz = length;
-    #endif
+#ifdef OPENSSL_EXTRA
+    cert->extExtKeyUsageSrc = input + idx;
+    cert->extExtKeyUsageSz = length;
+#endif
 
     while (idx < (word32)sz) {
         if (GetObjectId(input, &idx, &oid, oidCertKeyUseType, sz) < 0)
@@ -5131,9 +5381,9 @@ static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
                 break;
         }
 
-        #ifdef OPENSSL_EXTRA
-            cert->extExtKeyUsageCount++;
-        #endif
+    #ifdef OPENSSL_EXTRA
+        cert->extExtKeyUsageCount++;
+    #endif
     }
 
     return 0;
@@ -5237,6 +5487,7 @@ static int DecodeNameConstraints(byte* input, int sz, DecodedCert* cert)
 }
 #endif /* IGNORE_NAME_CONSTRAINTS */
 
+
 #if defined(WOLFSSL_CERT_EXT) && !defined(WOLFSSL_SEP)
 
 static int Word32ToString(char* d, word32 number)
@@ -5329,6 +5580,8 @@ static int DecodePolicyOID(char *out, word32 outSz, byte *in, word32 inSz)
     static int DecodeCertPolicy(byte* input, int sz, DecodedCert* cert)
     {
         word32 idx = 0;
+        word32 oldIdx;
+        int ret;
         int total_length = 0, policy_length = 0, length = 0;
     #if !defined(WOLFSSL_SEP) && defined(WOLFSSL_CERT_EXT) && \
         !defined(WOLFSSL_DUP_CERTPOL)
@@ -5355,17 +5608,11 @@ static int DecodePolicyOID(char *out, word32 outSz, byte *in, word32 inSz)
                 return ASN_PARSE_E;
             }
 
-            if (input[idx++] != ASN_OBJECT_ID) {
-                WOLFSSL_MSG("\tCertPolicy isn't OID");
-                return ASN_PARSE_E;
-            }
-            policy_length--;
-
-            if (GetLength(input, &idx, &length, sz) < 0) {
-                WOLFSSL_MSG("\tGet CertPolicy length failed");
-                return ASN_PARSE_E;
-            }
-            policy_length--;
+            oldIdx = idx;
+            ret = GetASNObjectId(input, &idx, &length, sz);
+            if (ret != 0)
+                return ret;
+            policy_length -= idx - oldIdx;
 
             if (length > 0) {
                 /* Verify length won't overrun buffer */
@@ -5475,25 +5722,20 @@ static int DecodeCertExtensions(DecodedCert* cert)
         /* check for critical flag */
         critical = 0;
         if (input[idx] == ASN_BOOLEAN) {
-            int boolLength = 0;
-            idx++;
-            if (GetLength(input, &idx, &boolLength, sz) < 0) {
-                WOLFSSL_MSG("\tfail: critical boolean length");
-                return ASN_PARSE_E;
+            ret = GetBoolean(input, &idx, sz);
+            if (ret < 0) {
+                WOLFSSL_MSG("\tfail: critical boolean");
+                return ret;
             }
-            if (input[idx++])
-                critical = 1;
+
+            critical = (byte)ret;
         }
 
         /* process the extension based on the OID */
-        if (input[idx++] != ASN_OCTET_STRING) {
-            WOLFSSL_MSG("\tfail: should be an OCTET STRING");
-            return ASN_PARSE_E;
-        }
-
-        if (GetLength(input, &idx, &length, sz) < 0) {
-            WOLFSSL_MSG("\tfail: extension data length");
-            return ASN_PARSE_E;
+        ret = GetOctetString(input, &idx, &length, sz);
+        if (ret < 0) {
+            WOLFSSL_MSG("\tfail: bad OCTET STRING");
+            return ret;
         }
 
         switch (oid) {
@@ -5697,127 +5939,141 @@ Signer* GetCAByName(void* signers, byte* hash)
 
 int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
 {
-    word32 confirmOID;
-    int    ret;
-    int    badDate     = 0;
+    int    ret = 0;
+    int    badDate = 0;
     int    criticalExt = 0;
+    word32 confirmOID;
 
-    if ((ret = DecodeToKey(cert, verify)) < 0) {
-        if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
-            badDate = ret;
-        else
-            return ret;
+    if (cert == NULL) {
+        return BAD_FUNC_ARG;
     }
 
-    WOLFSSL_MSG("Parsed Past Key");
-
-    if (cert->srcIdx < cert->sigIndex) {
-        #ifndef ALLOW_V1_EXTENSIONS
-            if (cert->version < 2) {
-                WOLFSSL_MSG("    v1 and v2 certs not allowed extensions");
-                return ASN_VERSION_E;
-            }
-        #endif
-        /* save extensions */
-        cert->extensions    = &cert->source[cert->srcIdx];
-        cert->extensionsSz  =  cert->sigIndex - cert->srcIdx;
-        cert->extensionsIdx = cert->srcIdx;   /* for potential later use */
-
-        if ((ret = DecodeCertExtensions(cert)) < 0) {
-            if (ret == ASN_CRIT_EXT_E)
-                criticalExt = ret;
+    if (cert->sigCtx.state == SIG_STATE_BEGIN) {
+        if ((ret = DecodeToKey(cert, verify)) < 0) {
+            if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
+                badDate = ret;
             else
                 return ret;
         }
 
-        /* advance past extensions */
-        cert->srcIdx =  cert->sigIndex;
-    }
+        WOLFSSL_MSG("Parsed Past Key");
 
-    if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID,
-                         oidSigType, cert->maxIdx)) < 0)
-        return ret;
+        if (cert->srcIdx < cert->sigIndex) {
+        #ifndef ALLOW_V1_EXTENSIONS
+            if (cert->version < 2) {
+                WOLFSSL_MSG("\tv1 and v2 certs not allowed extensions");
+                return ASN_VERSION_E;
+            }
+        #endif
 
-    if ((ret = GetSignature(cert)) < 0)
-        return ret;
+            /* save extensions */
+            cert->extensions    = &cert->source[cert->srcIdx];
+            cert->extensionsSz  =  cert->sigIndex - cert->srcIdx;
+            cert->extensionsIdx = cert->srcIdx;   /* for potential later use */
 
-    if (confirmOID != cert->signatureOID)
-        return ASN_SIG_OID_E;
+            if ((ret = DecodeCertExtensions(cert)) < 0) {
+                if (ret == ASN_CRIT_EXT_E)
+                    criticalExt = ret;
+                else
+                    return ret;
+            }
+
+            /* advance past extensions */
+            cert->srcIdx = cert->sigIndex;
+        }
+
+        if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID,
+                             oidSigType, cert->maxIdx)) < 0)
+            return ret;
+
+        if ((ret = GetSignature(cert)) < 0)
+            return ret;
+
+        if (confirmOID != cert->signatureOID)
+            return ASN_SIG_OID_E;
 
     #ifndef NO_SKID
-        if (cert->extSubjKeyIdSet == 0
-                          && cert->publicKey != NULL && cert->pubKeySize > 0) {
+        if (cert->extSubjKeyIdSet == 0 && cert->publicKey != NULL &&
+                                                        cert->pubKeySize > 0) {
         #ifdef NO_SHA
             ret = wc_Sha256Hash(cert->publicKey, cert->pubKeySize,
                                                             cert->extSubjKeyId);
         #else
             ret = wc_ShaHash(cert->publicKey, cert->pubKeySize,
                                                             cert->extSubjKeyId);
-        #endif
+        #endif /* NO_SHA */
             if (ret != 0)
                 return ret;
         }
-    #endif
+    #endif /* !NO_SKID */
 
-   if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
-        Signer* ca = NULL;
+        if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
+            cert->ca = NULL;
         #ifndef NO_SKID
             if (cert->extAuthKeyIdSet)
-                ca = GetCA(cm, cert->extAuthKeyId);
-            if (ca == NULL)
-                ca = GetCAByName(cm, cert->issuerHash);
-        #else /* NO_SKID */
-            ca = GetCA(cm, cert->issuerHash);
-        #endif /* NO SKID */
-        WOLFSSL_MSG("About to verify certificate signature");
+                cert->ca = GetCA(cm, cert->extAuthKeyId);
+            if (cert->ca == NULL)
+                cert->ca = GetCAByName(cm, cert->issuerHash);
+        #else
+            cert->ca = GetCA(cm, cert->issuerHash);
+        #endif /* !NO_SKID */
 
-        if (ca) {
-            if (cert->isCA) {
-                if (ca->pathLengthSet) {
-                    if (ca->pathLength == 0) {
-                        WOLFSSL_MSG("CA with path length 0 signing a CA");
-                        return ASN_PATHLEN_INV_E;
-                    }
-                    if (cert->pathLengthSet &&
-                        cert->pathLength >= ca->pathLength) {
+            WOLFSSL_MSG("About to verify certificate signature");
+            if (cert->ca) {
+                if (cert->isCA) {
+                    if (cert->ca->pathLengthSet) {
+                        if (cert->ca->pathLength == 0) {
+                            WOLFSSL_MSG("CA with path length 0 signing a CA");
+                            return ASN_PATHLEN_INV_E;
+                        }
+                        if (cert->pathLengthSet &&
+                            cert->pathLength >= cert->ca->pathLength) {
 
-                        WOLFSSL_MSG("CA signing CA with longer path length");
-                        return ASN_PATHLEN_INV_E;
+                            WOLFSSL_MSG("CA signing CA with longer path length");
+                            return ASN_PATHLEN_INV_E;
+                        }
                     }
                 }
+
+        #ifdef HAVE_OCSP
+                /* Need the CA's public key hash for OCSP */
+            #ifdef NO_SHA
+                ret = wc_Sha256Hash(cert->ca->publicKey, cert->ca->pubKeySize,
+                                                            cert->issuerKeyHash);
+            #else
+                ret = wc_ShaHash(cert->ca->publicKey, cert->ca->pubKeySize,
+                                                            cert->issuerKeyHash);
+            #endif /* NO_SHA */
+                if (ret != 0)
+                    return ret;
+        #endif /* HAVE_OCSP */
             }
+        }
+    }
 
-#ifdef HAVE_OCSP
-            /* Need the ca's public key hash for OCSP */
-    #ifdef NO_SHA
-            ret = wc_Sha256Hash(ca->publicKey, ca->pubKeySize,
-                                cert->issuerKeyHash);
-    #else /* NO_SHA */
-            ret = wc_ShaHash(ca->publicKey, ca->pubKeySize,
-                                cert->issuerKeyHash);
-    #endif /* NO_SHA */
-            if (ret != 0)
-                return ret;
-#endif /* HAVE_OCSP */
-
+    if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
+        if (cert->ca) {
             if (verify == VERIFY) {
                 /* try to confirm/verify signature */
-                if (!ConfirmSignature(cert->source + cert->certBegin,
-                            cert->sigIndex - cert->certBegin,
-                        ca->publicKey, ca->pubKeySize, ca->keyOID,
-                        cert->signature, cert->sigLength, cert->signatureOID,
-                        cert->heap)) {
-                    WOLFSSL_MSG("Confirm signature failed");
-                    return ASN_SIG_CONFIRM_E;
+                if ((ret = ConfirmSignature(&cert->sigCtx,
+                        cert->source + cert->certBegin,
+                        cert->sigIndex - cert->certBegin,
+                        cert->ca->publicKey, cert->ca->pubKeySize,
+                        cert->ca->keyOID, cert->signature,
+                        cert->sigLength, cert->signatureOID)) != 0) {
+                    if (ret != WC_PENDING_E) {
+                        WOLFSSL_MSG("Confirm signature failed");
+                    }
+                    return ret;
                 }
-                #ifndef IGNORE_NAME_CONSTRAINTS
+            #ifndef IGNORE_NAME_CONSTRAINTS
                 /* check that this cert's name is permitted by the signer's
                  * name constraints */
-                if (!ConfirmNameConstraints(ca, cert)) {
+                if (!ConfirmNameConstraints(cert->ca, cert)) {
                     WOLFSSL_MSG("Confirm name constraint failed");
                     return ASN_NAME_INVALID_E;
                 }
-                #endif /* IGNORE_NAME_CONSTRAINTS */
+            #endif /* IGNORE_NAME_CONSTRAINTS */
             }
         }
         else {
@@ -5833,7 +6089,7 @@ int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
     if (criticalExt != 0)
         return criticalExt;
 
-    return 0;
+    return ret;
 }
 
 /* Create and init an new signer */
@@ -5946,7 +6202,7 @@ WOLFSSL_LOCAL int SetMyVersion(word32 version, byte* output, int header)
 
     if (header) {
         output[i++] = ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED;
-        output[i++] = ASN_BIT_STRING;
+        output[i++] = 3;
     }
     output[i++] = ASN_INTEGER;
     output[i++] = 0x01;
@@ -5989,7 +6245,7 @@ WOLFSSL_LOCAL int GetSerialNumber(const byte* input, word32* inOutIdx,
     byte* serial, int* serialSz, word32 maxIdx)
 {
     int result = 0;
-    byte b;
+    int ret;
 
     WOLFSSL_ENTER("GetSerialNumber");
 
@@ -6002,42 +6258,19 @@ WOLFSSL_LOCAL int GetSerialNumber(const byte* input, word32* inOutIdx,
         WOLFSSL_MSG("Bad idx first");
         return BUFFER_E;
     }
-    b = input[*inOutIdx];
-    *inOutIdx += 1;
 
-    if (b != ASN_INTEGER) {
-        WOLFSSL_MSG("Expecting Integer");
-        return ASN_PARSE_E;
-    }
+    ret = GetASNInt(input, inOutIdx, serialSz, maxIdx);
+    if (ret != 0)
+        return ret;
 
-    if (GetLength(input, inOutIdx, serialSz, maxIdx) < 0) {
-        return ASN_PARSE_E;
-    }
-
-    /* serial size check */
-    if (*serialSz < 0 || *serialSz > EXTERNAL_SERIAL_SIZE) {
+    if (*serialSz > EXTERNAL_SERIAL_SIZE) {
         WOLFSSL_MSG("Serial size bad");
         return ASN_PARSE_E;
     }
 
-    /* serial size check against max index */
-    if ((*inOutIdx + *serialSz) > maxIdx) {
-        WOLFSSL_MSG("Bad idx serial");
-        return BUFFER_E;
-    }
-
-    /* only check padding and return serial if length is greater than 1 */
-    if (*serialSz > 0) {
-        /* skip padding */
-        if (input[*inOutIdx] == 0x00) {
-            *serialSz -= 1;
-            *inOutIdx += 1;
-        }
-
-        /* return serial */
-        XMEMCPY(serial, &input[*inOutIdx], *serialSz);
-        *inOutIdx += *serialSz;
-    }
+    /* return serial */
+    XMEMCPY(serial, &input[*inOutIdx], *serialSz);
+    *inOutIdx += *serialSz;
 
     return result;
 }
@@ -6262,15 +6495,12 @@ static int SetRsaPublicKey(byte* output, RsaKey* key,
     byte e[MAX_RSA_E_SZ];
 #endif
     byte seq[MAX_SEQ_SZ];
-    byte len[MAX_LENGTH_SZ + 1];  /* trailing 0 */
+    byte bitString[1 + MAX_LENGTH_SZ + 1];
     int  nSz;
     int  eSz;
     int  seqSz;
-    int  lenSz;
+    int  bitStringSz;
     int  idx;
-    int  rawLen;
-    int  leadingBit;
-    int  err;
 
     if (output == NULL || key == NULL || outLen < MAX_SEQ_SZ)
         return BAD_FUNC_ARG;
@@ -6283,37 +6513,15 @@ static int SetRsaPublicKey(byte* output, RsaKey* key,
 #endif
 
 #ifdef HAVE_USER_RSA
-    leadingBit = wc_Rsa_leading_bit(key->n);
-    rawLen = wc_Rsa_unsigned_bin_size(key->n) + leadingBit;
+    nSz = SetASNIntRSA(key->n, n);
 #else
-    leadingBit = mp_leading_bit(&key->n);
-    rawLen = mp_unsigned_bin_size(&key->n) + leadingBit;
+    nSz = SetASNIntMP(&key->n, MAX_RSA_INT_SZ, n);
 #endif
-    n[0] = ASN_INTEGER;
-    nSz  = SetLength(rawLen, n + 1) + 1;  /* int tag */
-
-    if ( (nSz + rawLen) <= MAX_RSA_INT_SZ) {
-        if (leadingBit)
-            n[nSz] = 0;
-#ifdef HAVE_USER_RSA
-        err = wc_Rsa_to_unsigned_bin(key->n, n + nSz, rawLen);
-#else
-        err = mp_to_unsigned_bin(&key->n, n + nSz + leadingBit);
-#endif
-        if (err == MP_OKAY)
-            nSz += rawLen;
-        else {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return MP_TO_E;
-        }
-    }
-    else {
+    if (nSz < 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-        return BUFFER_E;
+        return nSz;
     }
 
     /* e */
@@ -6328,39 +6536,16 @@ static int SetRsaPublicKey(byte* output, RsaKey* key,
 #endif
 
 #ifdef HAVE_USER_RSA
-    leadingBit = wc_Rsa_leading_bit(key->e);
-    rawLen = wc_Rsa_unsigned_bin_size(key->e) + leadingBit;
+    eSz = SetASNIntRSA(key->e, e);
 #else
-    leadingBit = mp_leading_bit(&key->e);
-    rawLen = mp_unsigned_bin_size(&key->e) + leadingBit;
+    eSz = SetASNIntMP(&key->e, MAX_RSA_INT_SZ, e);
 #endif
-    e[0] = ASN_INTEGER;
-    eSz  = SetLength(rawLen, e + 1) + 1;  /* int tag */
-
-    if ( (eSz + rawLen) < MAX_RSA_E_SZ) {
-        if (leadingBit)
-            e[eSz] = 0;
-#ifdef HAVE_USER_RSA
-        err = wc_Rsa_to_unsigned_bin(key->e, e + eSz, rawLen);
-#else
-        err = mp_to_unsigned_bin(&key->e, e + eSz + leadingBit);
-#endif
-        if (err == MP_OKAY)
-            eSz += rawLen;
-        else {
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            return MP_TO_E;
-        }
-    }
-    else {
+    if (eSz < 0) {
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(n, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-        return BUFFER_E;
+        return eSz;
     }
 
     seqSz  = SetSequence(nSz + eSz, seq);
@@ -6390,14 +6575,12 @@ static int SetRsaPublicKey(byte* output, RsaKey* key,
         byte algo[MAX_ALGO_SZ];
 #endif
         algoSz = SetAlgoID(RSAk, algo, oidKeyType, 0);
-        lenSz  = SetLength(seqSz + nSz + eSz + 1, len);
-        len[lenSz++] = 0;   /* trailing 0 */
+        bitStringSz  = SetBitString(seqSz + nSz + eSz, 0, bitString);
 
-        /* write, 1 is for ASN_BIT_STRING */
-        idx = SetSequence(nSz + eSz + seqSz + lenSz + 1 + algoSz, output);
+        idx = SetSequence(nSz + eSz + seqSz + bitStringSz + algoSz, output);
 
         /* check output size */
-        if ( (idx + algoSz + 1 + lenSz + seqSz + nSz + eSz) > outLen) {
+        if ( (idx + algoSz + bitStringSz + seqSz + nSz + eSz) > outLen) {
             #ifdef WOLFSSL_SMALL_STACK
                 XFREE(n,    NULL, DYNAMIC_TYPE_TMP_BUFFER);
                 XFREE(e,    NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -6411,10 +6594,8 @@ static int SetRsaPublicKey(byte* output, RsaKey* key,
         XMEMCPY(output + idx, algo, algoSz);
         idx += algoSz;
         /* bit string */
-        output[idx++] = ASN_BIT_STRING;
-        /* length */
-        XMEMCPY(output + idx, len, lenSz);
-        idx += lenSz;
+        XMEMCPY(output + idx, bitString, bitStringSz);
+        idx += bitStringSz;
 #ifdef WOLFSSL_SMALL_STACK
         XFREE(algo, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
@@ -6439,8 +6620,8 @@ static int SetRsaPublicKey(byte* output, RsaKey* key,
 
     return idx;
 }
-#endif /* !defined(NO_RSA) && (defined(WOLFSSL_CERT_GEN) ||
-                               defined(WOLFSSL_KEY_GEN)) */
+#endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
+                                           !HAVE_USER_RSA))) */
 
 
 #if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA) && !defined(HAVE_USER_RSA)
@@ -6487,7 +6668,7 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
 {
     word32 seqSz, verSz, rawLen, intTotalLen = 0;
     word32 sizes[RSA_INTS];
-    int    i, j, outLen, ret = 0, lbit;
+    int    i, j, outLen, ret = 0, mpSz;
 
     byte  seq[MAX_SEQ_SZ];
     byte  ver[MAX_VERSION_SZ];
@@ -6506,10 +6687,7 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
     for (i = 0; i < RSA_INTS; i++) {
         mp_int* keyInt = GetRsaInt(key, i);
 
-        /* leading zero */
-        lbit = mp_leading_bit(keyInt);
-        rawLen = mp_unsigned_bin_size(keyInt) + lbit;
-
+        rawLen = mp_unsigned_bin_size(keyInt) + 1;
         tmps[i] = (byte*)XMALLOC(rawLen + MAX_SEQ_SZ, key->heap,
                                  DYNAMIC_TYPE_RSA);
         if (tmps[i] == NULL) {
@@ -6517,30 +6695,12 @@ int wc_RsaKeyToDer(RsaKey* key, byte* output, word32 inLen)
             break;
         }
 
-        tmps[i][0] = ASN_INTEGER;
-        sizes[i] = SetLength(rawLen, tmps[i] + 1) + 1 + lbit; /* tag & lbit */
-
-        if (sizes[i] <= MAX_SEQ_SZ) {
-            int err;
-
-            /* leading zero */
-            if (lbit)
-                tmps[i][sizes[i]-1] = 0x00;
-
-            err = mp_to_unsigned_bin(keyInt, tmps[i] + sizes[i]);
-            if (err == MP_OKAY) {
-                sizes[i] += (rawLen-lbit); /* lbit included in rawLen */
-                intTotalLen += sizes[i];
-            }
-            else {
-                ret = err;
-                break;
-            }
-        }
-        else {
-            ret = ASN_INPUT_E;
+        mpSz = SetASNIntMP(keyInt, MAX_RSA_INT_SZ, tmps[i]);
+        if (mpSz < 0) {
+            ret = mpSz;
             break;
         }
+        intTotalLen += (sizes[i] = mpSz);
     }
 
     if (ret != 0) {
@@ -6579,7 +6739,7 @@ int wc_RsaKeyToPublicDer(RsaKey* key, byte* output, word32 inLen)
     return SetRsaPublicKey(output, key, inLen, 1);
 }
 
-#endif /* WOLFSSL_KEY_GEN && !NO_RSA */
+#endif /* WOLFSSL_KEY_GEN && !NO_RSA && !HAVE_USER_RSA */
 
 
 #if defined(WOLFSSL_CERT_GEN) && !defined(NO_RSA)
@@ -6741,10 +6901,10 @@ static int SetSerial(const byte* serial, byte* output)
 /* Write a public ECC key to output */
 static int SetEccPublicKey(byte* output, ecc_key* key, int with_header)
 {
-    byte len[MAX_LENGTH_SZ + TRAILING_ZERO];
+    byte bitString[1 + MAX_LENGTH_SZ + 1];
     int  algoSz;
     int  curveSz;
-    int  lenSz;
+    int  bitStringSz;
     int  idx;
     word32 pubSz = ECC_BUFSIZE;
 #ifdef WOLFSSL_SMALL_STACK
@@ -6799,11 +6959,9 @@ static int SetEccPublicKey(byte* output, ecc_key* key, int with_header)
 #endif
         algoSz  = SetAlgoID(ECDSAk, algo, oidKeyType, curveSz);
 
-        lenSz   = SetLength(pubSz + TRAILING_ZERO, len);
-        len[lenSz++] = 0;   /* trailing 0 */
+        bitStringSz = SetBitString(pubSz, 0, bitString);
 
-        /* write, 1 is for ASN_BIT_STRING */
-        idx = SetSequence(pubSz + curveSz + lenSz + 1 + algoSz, output);
+        idx = SetSequence(pubSz + curveSz + bitStringSz + algoSz, output);
         /* algo */
         XMEMCPY(output + idx, algo, algoSz);
         idx += algoSz;
@@ -6811,10 +6969,8 @@ static int SetEccPublicKey(byte* output, ecc_key* key, int with_header)
         XMEMCPY(output + idx, curve, curveSz);
         idx += curveSz;
         /* bit string */
-        output[idx++] = ASN_BIT_STRING;
-        /* length */
-        XMEMCPY(output + idx, len, lenSz);
-        idx += lenSz;
+        XMEMCPY(output + idx, bitString, bitStringSz);
+        idx += bitStringSz;
     }
     else
         idx = 0;
@@ -7232,7 +7388,7 @@ static int SetOidValue(byte* out, word32 outSz, const byte *oid, word32 oidSz,
  * RFC5280 : non-critical */
 static int SetSKID(byte* output, word32 outSz, byte *input, word32 length)
 {
-    byte skid_len[MAX_LENGTH_SZ];
+    byte skid_len[1 + MAX_LENGTH_SZ];
     byte skid_enc_len[MAX_LENGTH_SZ];
     int idx = 0, skid_lenSz, skid_enc_lenSz;
     static const byte skid_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0e, 0x04 };
@@ -7240,20 +7396,19 @@ static int SetSKID(byte* output, word32 outSz, byte *input, word32 length)
     if (output == NULL || input == NULL)
         return BAD_FUNC_ARG;
 
-    /* length of value */
-    skid_lenSz = SetLength(length, skid_len);
+    /* Octet String header */
+    skid_lenSz = SetOctetString(length, skid_len);
 
     /* length of encoded value */
-    skid_enc_lenSz = SetLength(length + skid_lenSz + 1, skid_enc_len);
+    skid_enc_lenSz = SetLength(length + skid_lenSz, skid_enc_len);
 
     if (outSz < 3)
         return BUFFER_E;
 
-    /* sequence, + 1 => byte to put type size */
-    idx = SetSequence(length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz+1,
+    idx = SetSequence(length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz,
                       output);
 
-    if ((length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz + 1) > outSz)
+    if ((length + sizeof(skid_oid) + skid_lenSz + skid_enc_lenSz) > outSz)
         return BUFFER_E;
 
     /* put oid */
@@ -7264,10 +7419,7 @@ static int SetSKID(byte* output, word32 outSz, byte *input, word32 length)
     XMEMCPY(output+idx, skid_enc_len, skid_enc_lenSz);
     idx += skid_enc_lenSz;
 
-    /* put type */
-    output[idx++] = ASN_OCTET_STRING;
-
-    /* put value len */
+    /* put octet header */
     XMEMCPY(output+idx, skid_len, skid_lenSz);
     idx += skid_lenSz;
 
@@ -7316,42 +7468,16 @@ static int SetAKID(byte* output, word32 outSz,
 static int SetKeyUsage(byte* output, word32 outSz, word16 input)
 {
     byte ku[5];
-    int unusedBits = 0;
+    int  idx;
     static const byte keyusage_oid[] = { 0x06, 0x03, 0x55, 0x1d, 0x0f,
                                          0x01, 0x01, 0xff, 0x04};
 
     if (output == NULL)
         return BAD_FUNC_ARG;
 
-    /* Key Usage is a BitString */
-    ku[0] = ASN_BIT_STRING;
-
-    /* put the Bit String size */
-    if (input > 255) {
-        ku[1] = (byte)3;
-
-        /* compute unused bits */
-        while (((((input >> 8) & 0xff) >> unusedBits) & 0x01) == 0)
-            unusedBits++;
-    }
-    else {
-        ku[1] = (byte)2;
-
-        /* compute unused bits */
-        while (((input >> unusedBits) & 0x01) == 0)
-            unusedBits++;
-    }
-
-    /* put unused bits value */
-    ku[2] = (byte)unusedBits;
-
-    /* compute byte value */
-    ku[3] = (byte)(input & 0xff);
-    if (input > 255)
-        ku[4] = (byte)((input >> 8) & 0xff);
-
+    idx = SetBitString16Bit(input, ku);
     return SetOidValue(output, outSz, keyusage_oid, sizeof(keyusage_oid),
-                       ku, (int)ku[1]+2);
+                       ku, idx);
 }
 
 /* Encode OID string representation to ITU-T X.690 format */
@@ -7537,7 +7663,7 @@ int SetName(byte* output, word32 outputSz, CertName* name)
         const char* nameStr = GetOneName(name, i);
         if (nameStr) {
             /* bottom up */
-            byte firstLen[MAX_LENGTH_SZ];
+            byte firstLen[1 + MAX_LENGTH_SZ];
             byte secondLen[MAX_LENGTH_SZ];
             byte sequence[MAX_SEQ_SZ];
             byte set[MAX_SET_SZ];
@@ -7565,13 +7691,13 @@ int SetName(byte* output, word32 outputSz, CertName* name)
             if (email) {
                 thisLen += EMAIL_JOINT_LEN;
                 thisLen ++;                               /* id type */
-                firstSz  = SetLength(EMAIL_JOINT_LEN, firstLen);
+                firstSz  = SetObjectId(EMAIL_JOINT_LEN, firstLen);
             }
             else {
                 thisLen++;                                 /* str type */
                 thisLen++;                                 /* id  type */
                 thisLen += JOINT_LEN;
-                firstSz = SetLength(JOINT_LEN + 1, firstLen);
+                firstSz  = SetObjectId(JOINT_LEN + 1, firstLen);
             }
             thisLen += firstSz;
             thisLen++;                                /* object id */
@@ -7597,8 +7723,6 @@ int SetName(byte* output, word32 outputSz, CertName* name)
             XMEMCPY(names[i].encoded + idx, sequence, seqSz);
             idx += seqSz;
             /* asn object id */
-            names[i].encoded[idx++] = ASN_OBJECT_ID;
-            /* first length */
             XMEMCPY(names[i].encoded + idx, firstLen, firstSz);
             idx += firstSz;
             if (email) {
@@ -7971,28 +8095,14 @@ static int WriteCertBody(DerCert* der, byte* buffer)
 
 
 /* Make RSA signature from buffer (sz), write to sig (sigSz) */
-static int MakeSignature(const byte* buffer, int sz, byte* sig, int sigSz,
-                         RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng,
-                         int sigAlgoType)
+static int MakeSignature(CertSignCtx* certSignCtx, const byte* buffer, int sz,
+    byte* sig, int sigSz, RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng,
+    int sigAlgoType, void* heap)
 {
-    int encSigSz, digestSz, typeH = 0, ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* digest;
-#else
-    byte digest[WC_MAX_DIGEST_SIZE]; /* max size */
-#endif
-#ifdef WOLFSSL_SMALL_STACK
-    byte* encSig;
-#else
-    byte encSig[MAX_DER_DIGEST_SZ];
-#endif
+    int digestSz = 0, typeH = 0, ret = 0;
 
-    (void)digest;
     (void)digestSz;
-    (void)encSig;
-    (void)encSigSz;
     (void)typeH;
-
     (void)buffer;
     (void)sz;
     (void)sig;
@@ -8001,119 +8111,141 @@ static int MakeSignature(const byte* buffer, int sz, byte* sig, int sigSz,
     (void)eccKey;
     (void)rng;
 
-#ifdef WOLFSSL_SMALL_STACK
-    digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (digest == NULL)
-        return 0; /* not confirmed */
-#endif
+    switch (certSignCtx->state) {
+    case CERTSIGN_STATE_BEGIN:
+    case CERTSIGN_STATE_DIGEST:
 
-    switch (sigAlgoType) {
-    #ifndef NO_MD5
-        case CTC_MD5wRSA:
-        if ((ret = wc_Md5Hash(buffer, sz, digest)) == 0) {
-            typeH    = MD5h;
-            digestSz = MD5_DIGEST_SIZE;
+        certSignCtx->state = CERTSIGN_STATE_DIGEST;
+        certSignCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (certSignCtx->digest == NULL) {
+            ret = MEMORY_E; goto exit_ms;
         }
-        break;
-    #endif
-    #ifndef NO_SHA
-        case CTC_SHAwRSA:
-        case CTC_SHAwECDSA:
-        if ((ret = wc_ShaHash(buffer, sz, digest)) == 0) {
-            typeH    = SHAh;
-            digestSz = SHA_DIGEST_SIZE;
+
+        switch (sigAlgoType) {
+        #ifndef NO_MD5
+            case CTC_MD5wRSA:
+            if ((ret = wc_Md5Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = MD5h;
+                digestSz = MD5_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifndef NO_SHA
+            case CTC_SHAwRSA:
+            case CTC_SHAwECDSA:
+            if ((ret = wc_ShaHash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHAh;
+                digestSz = SHA_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifdef WOLFSSL_SHA224
+            case CTC_SHA224wRSA:
+            case CTC_SHA224wECDSA:
+            if ((ret = wc_Sha224Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA224h;
+                digestSz = SHA224_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifndef NO_SHA256
+            case CTC_SHA256wRSA:
+            case CTC_SHA256wECDSA:
+            if ((ret = wc_Sha256Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA256h;
+                digestSz = SHA256_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifdef WOLFSSL_SHA384
+            case CTC_SHA384wRSA:
+            case CTC_SHA384wECDSA:
+            if ((ret = wc_Sha384Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA384h;
+                digestSz = SHA384_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifdef WOLFSSL_SHA512
+            case CTC_SHA512wRSA:
+            case CTC_SHA512wECDSA:
+            if ((ret = wc_Sha512Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA512h;
+                digestSz = SHA512_DIGEST_SIZE;
+            }
+            break;
+        #endif
+            default:
+                WOLFSSL_MSG("MakeSignautre called with unsupported type");
+                ret = ALGO_ID_E;
         }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA224
-        case CTC_SHA224wRSA:
-        case CTC_SHA224wECDSA:
-        if ((ret = wc_Sha224Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA224h;
-            digestSz = SHA224_DIGEST_SIZE;
+
+        /* set next state, since WC_PENDING rentry for these are not "call again" */
+        certSignCtx->state = CERTSIGN_STATE_ENCODE;
+        if (ret != 0) {
+            goto exit_ms;
         }
-        break;
-    #endif
-    #ifndef NO_SHA256
-        case CTC_SHA256wRSA:
-        case CTC_SHA256wECDSA:
-        if ((ret = wc_Sha256Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA256h;
-            digestSz = SHA256_DIGEST_SIZE;
+
+        /* fall-through */
+    case CERTSIGN_STATE_ENCODE:
+    #ifndef NO_RSA
+        if (rsaKey) {
+            certSignCtx->encSig = (byte*)XMALLOC(MAX_DER_DIGEST_SZ, heap,
+                DYNAMIC_TYPE_TMP_BUFFER);
+            if (certSignCtx->encSig == NULL) {
+                ret = MEMORY_E; goto exit_ms;
+            }
+
+            /* signature */
+            certSignCtx->encSigSz = wc_EncodeSignature(certSignCtx->encSig,
+                                          certSignCtx->digest, digestSz, typeH);
         }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA384
-        case CTC_SHA384wRSA:
-        case CTC_SHA384wECDSA:
-        if ((ret = wc_Sha384Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA384h;
-            digestSz = SHA384_DIGEST_SIZE;
+    #endif /* !NO_RSA */
+
+        /* fall-through */
+    case CERTSIGN_STATE_DO:
+        certSignCtx->state = CERTSIGN_STATE_DO;
+        ret = ALGO_ID_E; /* default to error */
+
+    #ifndef NO_RSA
+        if (rsaKey) {
+            /* signature */
+            ret = wc_RsaSSL_Sign(certSignCtx->encSig, certSignCtx->encSigSz,
+                                 sig, sigSz, rsaKey, rng);
         }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA512
-        case CTC_SHA512wRSA:
-        case CTC_SHA512wECDSA:
-        if ((ret = wc_Sha512Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA512h;
-            digestSz = SHA512_DIGEST_SIZE;
+    #endif /* !NO_RSA */
+
+    #ifdef HAVE_ECC
+        if (!rsaKey && eccKey) {
+            word32 outSz = sigSz;
+
+            ret = wc_ecc_sign_hash(certSignCtx->digest, digestSz,
+                                   sig, &outSz, rng, eccKey);
+            if (ret == 0)
+                ret = outSz;
         }
+    #endif /* HAVE_ECC */
         break;
-    #endif
-        default:
-            WOLFSSL_MSG("MakeSignautre called with unsupported type");
-            ret = ALGO_ID_E;
     }
 
-    if (ret != 0) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
+exit_ms:
+
+    if (ret == WC_PENDING_E) {
         return ret;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    encSig = (byte*)XMALLOC(MAX_DER_DIGEST_SZ,
-                                                 NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (encSig == NULL) {
-        XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-
-    ret = ALGO_ID_E;
-
 #ifndef NO_RSA
     if (rsaKey) {
-        /* signature */
-        encSigSz = wc_EncodeSignature(encSig, digest, digestSz, typeH);
-        ret = 0;
-        do {
-#if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, rsaKey);
-#endif
-            if (ret >= 0) {
-                ret = wc_RsaSSL_Sign(encSig, encSigSz, sig, sigSz, rsaKey, rng);
-            }
-        } while (ret == WC_PENDING_E);
+        XFREE(certSignCtx->encSig, heap, DYNAMIC_TYPE_TMP_BUFFER);
     }
-#endif
+#endif /* !NO_RSA */
 
-#ifdef HAVE_ECC
-    if (!rsaKey && eccKey) {
-        word32 outSz = sigSz;
-        ret = wc_ecc_sign_hash(digest, digestSz, sig, &outSz, rng, eccKey);
+    XFREE(certSignCtx->digest, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    certSignCtx->digest = NULL;
 
-        if (ret == 0)
-            ret = outSz;
-    }
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(encSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    /* reset state */
+    certSignCtx->state = CERTSIGN_STATE_BEGIN;
 
     return ret;
 }
@@ -8130,10 +8262,7 @@ static int AddSignature(byte* buffer, int bodySz, const byte* sig, int sigSz,
     /* algo */
     idx += SetAlgoID(sigAlgoType, buffer + idx, oidSigType, 0);
     /* bit string */
-    buffer[idx++] = ASN_BIT_STRING;
-    /* length */
-    idx += SetLength(sigSz + 1, buffer + idx);
-    buffer[idx++] = 0;   /* trailing 0 */
+    idx += SetBitString(sigSz, 0, buffer + idx);
     /* signature */
     XMEMCPY(buffer + idx, sig, sigSz);
     idx += sigSz;
@@ -8490,35 +8619,57 @@ int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
 int wc_SignCert(int requestSz, int sType, byte* buffer, word32 buffSz,
              RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng)
 {
-    int sigSz;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* sig;
-#else
-    byte sig[MAX_ENCODED_SIG_SZ];
+    int sigSz = 0;
+    void* heap = NULL;
+    CertSignCtx* certSignCtx = NULL;
+#ifndef WOLFSSL_ASYNC_CRYPT
+    CertSignCtx  certSignCtx_lcl;
+    certSignCtx = &certSignCtx_lcl;
+    XMEMSET(certSignCtx, 0, sizeof(CertSignCtx));
 #endif
 
     if (requestSz < 0)
         return requestSz;
 
-#ifdef WOLFSSL_SMALL_STACK
-    sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sig == NULL)
-        return MEMORY_E;
-#endif
+    /* locate ctx */
+    if (rsaKey) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        certSignCtx = &rsaKey->certSignCtx;
+    #endif
+        heap = rsaKey->heap;
+    }
+    else if (eccKey) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        certSignCtx = &eccKey->certSignCtx;
+    #endif
+        heap = eccKey->heap;
+    }
 
-    sigSz = MakeSignature(buffer, requestSz, sig, MAX_ENCODED_SIG_SZ, rsaKey,
-                          eccKey, rng, sType);
+    if (certSignCtx == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (certSignCtx->sig == NULL) {
+        certSignCtx->sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (certSignCtx->sig == NULL)
+            return MEMORY_E;
+    }
+
+    sigSz = MakeSignature(certSignCtx, buffer, requestSz, certSignCtx->sig,
+        MAX_ENCODED_SIG_SZ, rsaKey, eccKey, rng, sType, heap);
+    if (sigSz == WC_PENDING_E)
+        return sigSz;
 
     if (sigSz >= 0) {
         if (requestSz + MAX_SEQ_SZ * 2 + sigSz > (int)buffSz)
             sigSz = BUFFER_E;
         else
-            sigSz = AddSignature(buffer, requestSz, sig, sigSz, sType);
+            sigSz = AddSignature(buffer, requestSz, certSignCtx->sig, sigSz, sType);
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    XFREE(certSignCtx->sig, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    certSignCtx->sig = NULL;
 
     return sigSz;
 }
@@ -8756,7 +8907,7 @@ int wc_SetAuthKeyIdFromCert(Cert *cert, const byte *der, int derSz)
 #endif
 
     /* decode certificate and get SKID that will be AKID of current cert */
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCert(decoded, CERT_TYPE, NO_VERIFY, 0);
     if (ret != 0) {
         FreeDecodedCert(decoded);
@@ -8908,7 +9059,7 @@ static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
@@ -9004,7 +9155,7 @@ static int SetDatesFromCert(Cert* cert, const byte* der, int derSz)
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
@@ -9060,7 +9211,7 @@ static int SetNameFromCert(CertName* cn, const byte* der, int derSz)
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
@@ -9239,8 +9390,8 @@ int wc_SetDatesBuffer(Cert* cert, const byte* der, int derSz)
 int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s)
 {
     word32 idx = 0;
-    word32 rSz;                           /* encoding size */
-    word32 sSz;
+    int    rSz;                           /* encoding size */
+    int    sSz;
     word32 headerSz = 4;   /* 2*ASN_TAG + 2*LEN(ENUM) */
 
     /* If the leading bit on the INTEGER is a 1, add a leading zero */
@@ -9248,33 +9399,24 @@ int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s)
     int sLeadingZero = mp_leading_bit(s);
     int rLen = mp_unsigned_bin_size(r);   /* big int size */
     int sLen = mp_unsigned_bin_size(s);
-    int err;
 
     if (*outLen < (rLen + rLeadingZero + sLen + sLeadingZero +
                    headerSz + 2))  /* SEQ_TAG + LEN(ENUM) */
         return BUFFER_E;
 
-    idx = SetSequence(rLen+rLeadingZero+sLen+sLeadingZero+headerSz, out);
+    idx = SetSequence(rLen + rLeadingZero + sLen+sLeadingZero + headerSz, out);
 
     /* store r */
-    out[idx++] = ASN_INTEGER;
-    rSz = SetLength(rLen + rLeadingZero, &out[idx]);
+    rSz = SetASNIntMP(r, -1, &out[idx]);
+    if (rSz < 0)
+        return rSz;
     idx += rSz;
-    if (rLeadingZero)
-        out[idx++] = 0;
-    err = mp_to_unsigned_bin(r, &out[idx]);
-    if (err != MP_OKAY) return err;
-    idx += rLen;
 
     /* store s */
-    out[idx++] = ASN_INTEGER;
-    sSz = SetLength(sLen + sLeadingZero, &out[idx]);
+    sSz = SetASNIntMP(s, -1, &out[idx]);
+    if (sSz < 0)
+        return sSz;
     idx += sSz;
-    if (sLeadingZero)
-        out[idx++] = 0;
-    err = mp_to_unsigned_bin(s, &out[idx]);
-    if (err != MP_OKAY) return err;
-    idx += sLen;
 
     *outLen = idx;
 
@@ -9288,17 +9430,21 @@ int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen, mp_int* r, mp_int* s)
     word32 idx = 0;
     int    len = 0;
 
-    if (GetSequence(sig, &idx, &len, sigLen) < 0)
+    if (GetSequence(sig, &idx, &len, sigLen) < 0) {
         return ASN_ECC_KEY_E;
+    }
 
-    if ((word32)len > (sigLen - idx))
+    if ((word32)len > (sigLen - idx)) {
         return ASN_ECC_KEY_E;
+    }
 
-    if (GetInt(r, sig, &idx, sigLen) < 0)
+    if (GetInt(r, sig, &idx, sigLen) < 0) {
         return ASN_ECC_KEY_E;
+    }
 
-    if (GetInt(s, sig, &idx, sigLen) < 0)
+    if (GetInt(s, sig, &idx, sigLen) < 0) {
         return ASN_ECC_KEY_E;
+    }
 
     return 0;
 }
@@ -9307,7 +9453,7 @@ int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen, mp_int* r, mp_int* s)
 int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
                         word32 inSz)
 {
-    word32 oidSum = 0;
+    word32 oidSum;
     int    version, length;
     int    privSz, pubSz;
     byte   b;
@@ -9371,24 +9517,10 @@ int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
         if (GetLength(input, inOutIdx, &length, inSz) <= 0)
             ret = ASN_PARSE_E;
         else {
-            /* object id */
-            b = input[*inOutIdx];
-            *inOutIdx += 1;
-
-            if (b != ASN_OBJECT_ID) {
-                ret = ASN_OBJECT_ID_E;
-            }
-            else if (GetLength(input, inOutIdx, &length, inSz) <= 0) {
-                ret = ASN_PARSE_E;
-            }
-            else {
-                while(length--) {
-                    oidSum += input[*inOutIdx];
-                    *inOutIdx += 1;
-                }
-                if ((ret = CheckCurve(oidSum)) < 0) {
+            ret = GetObjectId(input, inOutIdx, &oidSum, oidIgnoreType, inSz);
+            if (ret == 0) {
+                if ((ret = CheckCurve(oidSum)) < 0)
                     ret = ECC_CURVE_OID_E;
-                }
                 else {
                     curve_id = ret;
                     ret = 0;
@@ -9410,33 +9542,18 @@ int wc_EccPrivateKeyDecode(const byte* input, word32* inOutIdx, ecc_key* key,
         }
         else {
             /* key header */
-            b = input[*inOutIdx];
-            *inOutIdx += 1;
-
-            if (b != ASN_BIT_STRING) {
-                ret = ASN_BITSTR_E;
-            }
-            else if (GetLength(input, inOutIdx, &length, inSz) <= 0) {
-                ret = ASN_PARSE_E;
-            }
-            else {
-                b = input[*inOutIdx];
-                *inOutIdx += 1;
-
-                if (b != 0x00) {
-                    ret = ASN_EXPECT_0_E;
-                }
-                else {
-                    /* pub key */
-                    pubSz = length - 1;  /* null prefix */
-                    if (pubSz < 2*(ECC_MAXSIZE+1)) {
-                        XMEMCPY(pub, &input[*inOutIdx], pubSz);
-                        *inOutIdx += length;
-                        ret = wc_ecc_import_private_key_ex(priv, privSz, pub,
-                                                        pubSz, key, curve_id);
-                    } else
-                        ret = BUFFER_E;
+            ret = CheckBitString(input, inOutIdx, &length, inSz, 0, NULL);
+            if (ret == 0) {
+                /* pub key */
+                pubSz = length;
+                if (pubSz < 2*(ECC_MAXSIZE+1)) {
+                    XMEMCPY(pub, &input[*inOutIdx], pubSz);
+                    *inOutIdx += length;
+                    ret = wc_ecc_import_private_key_ex(priv, privSz, pub,
+                                                    pubSz, key, curve_id);
                 }
+                else
+                    ret = BUFFER_E;
             }
         }
     }
@@ -9454,7 +9571,10 @@ int wc_EccPublicKeyDecode(const byte* input, word32* inOutIdx,
                           ecc_key* key, word32 inSz)
 {
     int    length;
-    byte   b;
+    int    ret;
+#ifdef ECC_CHECK_PUBLIC_KEY_OID
+    word32 oidSum;
+#endif
 
     if (input == NULL || inOutIdx == NULL || key == NULL || inSz == 0)
         return BAD_FUNC_ARG;
@@ -9465,40 +9585,30 @@ int wc_EccPublicKeyDecode(const byte* input, word32* inOutIdx,
     if (GetSequence(input, inOutIdx, &length, inSz) < 0)
         return ASN_PARSE_E;
 
-    b = input[(*inOutIdx)++];
-    if (b != ASN_OBJECT_ID)
-        return ASN_OBJECT_ID_E;
-
-    if (GetLength(input, inOutIdx, &length, inSz) < 0)
-        return ASN_PARSE_E;
-
-    *inOutIdx += length;   /* skip past */
+    ret = SkipObjectId(input, inOutIdx, inSz);
+    if (ret != 0)
+        return ret;
 
     /* ecc params information */
-    b = input[(*inOutIdx)++];
-    if (b != ASN_OBJECT_ID)
-        return ASN_OBJECT_ID_E;
-
-    if (GetLength(input, inOutIdx, &length, inSz) <= 0)
-        return ASN_PARSE_E;
-
-    *inOutIdx += length;   /* skip past */
+#ifdef ECC_CHECK_PUBLIC_KEY_OID
+    ret = GetObjectId(input, inOutIdx, &oidSum, oidIgnoreType, inSz);
+    if (ret != 0)
+        return ret;
+    if (CheckCurve(oidSum) < 0)
+        return ECC_CURVE_OID_E;
+#else
+    ret = SkipObjectId(input, inOutIdx, inSz);
+    if (ret != 0)
+        return ret;
+#endif
 
     /* key header */
-    b = input[*inOutIdx];
-    *inOutIdx += 1;
-
-    if (b != ASN_BIT_STRING)
-        return ASN_BITSTR_E;
-    if (GetLength(input, inOutIdx, &length, inSz) <= 0)
-        return ASN_PARSE_E;
-
-    b = input[(*inOutIdx)++];
-    if (b != 0x00)
-        return ASN_EXPECT_0_E;
+    ret = CheckBitString(input, inOutIdx, NULL, inSz, 1, NULL);
+    if (ret != 0)
+        return ret;
 
     /* This is the raw point data compressed or uncompressed. */
-    if (wc_ecc_import_x963(input+*inOutIdx, inSz - *inOutIdx, key) != 0)
+    if (wc_ecc_import_x963(input + *inOutIdx, inSz - *inOutIdx, key) != 0)
         return ASN_ECC_KEY_E;
 
     return 0;
@@ -9543,8 +9653,7 @@ static int wc_BuildEccKeyDer(ecc_key* key, byte* output, word32 inLen,
     if (prv == NULL) {
         return MEMORY_E;
     }
-    prv[prvidx++] = ASN_OCTET_STRING;
-    prv[prvidx++] = (byte)key->dp->size;
+    prvidx += SetOctetString8Bit(key->dp->size, &prv[prvidx]);
     ret = wc_ecc_export_private_only(key, prv + prvidx, &privSz);
     if (ret < 0) {
         XFREE(prv, key->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -9572,8 +9681,7 @@ static int wc_BuildEccKeyDer(ecc_key* key, byte* output, word32 inLen,
             pubidx += SetLength(pubSz + ASN_ECC_CONTEXT_SZ + 2, pub+pubidx);
         else /* leading zero */
             pubidx += SetLength(pubSz + ASN_ECC_CONTEXT_SZ + 1, pub+pubidx);
-        pub[pubidx++] = ASN_BIT_STRING;
-        pubidx += SetLength(pubSz + 1, pub+pubidx);
+        pubidx += SetBitString(pubSz, 0, pub + pubidx);
         pub[pubidx++] = (byte)0; /* leading zero */
         ret = wc_ecc_export_x963(key, pub + pubidx, &pubSz);
         if (ret != 0) {
@@ -9710,6 +9818,7 @@ static int DecodeSingleResponse(byte* source,
     word32 idx = *ioIndex, prevIndex, oid;
     int length, wrapperSz;
     CertStatus* cs = resp->status;
+    int ret;
 
     WOLFSSL_ENTER("DecodeSingleResponse");
 
@@ -9733,17 +9842,15 @@ static int DecodeSingleResponse(byte* source,
     if (GetAlgoId(source, &idx, &oid, oidIgnoreType, size) < 0)
         return ASN_PARSE_E;
     /* Save reference to the hash of CN */
-    if (source[idx++] != ASN_OCTET_STRING)
-        return ASN_PARSE_E;
-    if (GetLength(source, &idx, &length, size) < 0)
-        return ASN_PARSE_E;
+    ret = GetOctetString(source, &idx, &length, size);
+    if (ret < 0)
+        return ret;
     resp->issuerHash = source + idx;
     idx += length;
     /* Save reference to the hash of the issuer public key */
-    if (source[idx++] != ASN_OCTET_STRING)
-        return ASN_PARSE_E;
-    if (GetLength(source, &idx, &length, size) < 0)
-        return ASN_PARSE_E;
+    ret = GetOctetString(source, &idx, &length, size);
+    if (ret < 0)
+        return ret;
     resp->issuerKeyHash = source + idx;
     idx += length;
 
@@ -9826,6 +9933,7 @@ static int DecodeOcspRespExtensions(byte* source,
     int length;
     int ext_bound; /* boundary index for the sequence of extensions */
     word32 oid;
+    int ret;
 
     WOLFSSL_ENTER("DecodeOcspRespExtensions");
 
@@ -9858,31 +9966,20 @@ static int DecodeOcspRespExtensions(byte* source,
         /* check for critical flag */
         if (source[idx] == ASN_BOOLEAN) {
             WOLFSSL_MSG("\tfound optional critical flag, moving past");
-            idx += (ASN_BOOL_SIZE + 1);
+            ret = GetBoolean(source, &idx, sz);
+            if (ret < 0)
+                return ret;
         }
 
-        /* process the extension based on the OID */
-        if (source[idx++] != ASN_OCTET_STRING) {
-            WOLFSSL_MSG("\tfail: should be an OCTET STRING");
-            return ASN_PARSE_E;
-        }
-
-        if (GetLength(source, &idx, &length, sz) < 0) {
-            WOLFSSL_MSG("\tfail: extension data length");
-            return ASN_PARSE_E;
-        }
+        ret = GetOctetString(source, &idx, &length, sz);
+        if (ret < 0)
+            return ret;
 
         if (oid == OCSP_NONCE_OID) {
             /* get data inside extra OCTET_STRING */
-            if (source[idx++] != ASN_OCTET_STRING) {
-                WOLFSSL_MSG("\tfail: should be an OCTET STRING");
-                return ASN_PARSE_E;
-            }
-
-            if (GetLength(source, &idx, &length, sz) < 0) {
-                WOLFSSL_MSG("\tfail: extension data length");
-                return ASN_PARSE_E;
-            }
+            ret = GetOctetString(source, &idx, &length, sz);
+            if (ret < 0)
+                return ret;
 
             resp->nonce = source + idx;
             resp->nonceSz = length;
@@ -9990,9 +10087,11 @@ static int DecodeCerts(byte* source,
 static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
             OcspResponse* resp, word32 size, void* cm, void* heap, int noVerify)
 {
-    int length;
+    int    length;
     word32 idx = *ioIndex;
     word32 end_index;
+    int    ret;
+    int    sigLength;
 
     WOLFSSL_ENTER("DecodeBasicOcspResponse");
     (void)heap;
@@ -10011,25 +10110,13 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
     if (GetAlgoId(source, &idx, &resp->sigOID, oidSigType, size) < 0)
         return ASN_PARSE_E;
 
-    /* Obtain pointer to the start of the signature, and save the size */
-    if (source[idx++] == ASN_BIT_STRING)
-    {
-        int  sigLength = 0;
-        byte b;
+    ret = CheckBitString(source, &idx, &sigLength, size, 1, NULL);
+    if (ret != 0)
+        return ret;
 
-        if (GetLength(source, &idx, &sigLength, size) <= 0)
-            return ASN_PARSE_E;
-
-        b = source[idx++];
-        if (b != 0x00) {
-            return ASN_EXPECT_0_E;
-        }
-
-        sigLength--;
-        resp->sigSz = sigLength;
-        resp->sig = source + idx;
-        idx += sigLength;
-    }
+    resp->sigSz = sigLength;
+    resp->sig = source + idx;
+    idx += sigLength;
 
     /*
      * Check the length of the BasicOcspResponse against the current index to
@@ -10039,7 +10126,6 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
     if (idx < end_index)
     {
         DecodedCert cert;
-        int         ret;
 
         if (DecodeCerts(source, &idx, resp, size) < 0)
             return ASN_PARSE_E;
@@ -10054,13 +10140,14 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
             return ret;
         }
 
-        ret = ConfirmSignature(resp->response, resp->responseSz,
-                            cert.publicKey, cert.pubKeySize, cert.keyOID,
-                            resp->sig, resp->sigSz, resp->sigOID, NULL);
+        /* ConfirmSignature is blocking here */
+        ret = ConfirmSignature(&cert.sigCtx,
+            resp->response, resp->responseSz,
+            cert.publicKey, cert.pubKeySize, cert.keyOID,
+            resp->sig, resp->sigSz, resp->sigOID);
         FreeDecodedCert(&cert);
 
-        if (ret == 0)
-        {
+        if (ret != 0) {
             WOLFSSL_MSG("\tOCSP Confirm signature failed");
             return ASN_OCSP_CONFIRM_E;
         }
@@ -10069,6 +10156,7 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
 #endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */
     {
         Signer* ca = NULL;
+        int sigValid = -1;
 
         #ifndef NO_SKID
             ca = GetCA(cm, resp->issuerKeyHash);
@@ -10076,9 +10164,16 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
             ca = GetCA(cm, resp->issuerHash);
         #endif
 
-        if (!ca || !ConfirmSignature(resp->response, resp->responseSz,
-                                     ca->publicKey, ca->pubKeySize, ca->keyOID,
-                                  resp->sig, resp->sigSz, resp->sigOID, NULL)) {
+        if (ca) {
+            SignatureCtx sigCtx;
+            InitSignatureCtx(&sigCtx, heap, INVALID_DEVID);
+
+            /* ConfirmSignature is blocking here */
+            sigValid = ConfirmSignature(&sigCtx, resp->response,
+                resp->responseSz, ca->publicKey, ca->pubKeySize, ca->keyOID,
+                                resp->sig, resp->sigSz, resp->sigOID);
+        }
+        if (ca == NULL || sigValid != 0) {
             WOLFSSL_MSG("\tOCSP Confirm signature failed");
             return ASN_OCSP_CONFIRM_E;
         }
@@ -10143,11 +10238,9 @@ int OcspResponseDecode(OcspResponse* resp, void* cm, void* heap, int noVerify)
         return ASN_PARSE_E;
     if (oid != OCSP_BASIC_OID)
         return ASN_PARSE_E;
-    if (source[idx++] != ASN_OCTET_STRING)
-        return ASN_PARSE_E;
-
-    if (GetLength(source, &idx, &length, size) < 0)
-        return ASN_PARSE_E;
+    ret = GetOctetString(source, &idx, &length, size);
+    if (ret < 0)
+        return ret;
 
     ret = DecodeBasicOcspResponse(source, &idx, resp, size, cm, heap, noVerify);
     if (ret < 0)
@@ -10172,8 +10265,7 @@ word32 EncodeOcspRequestExtensions(OcspRequest* req, byte* output, word32 size)
     totalSz += req->nonceSz;
     totalSz += seqSz[0] = SetOctetString(req->nonceSz, seqArray[0]);
     totalSz += seqSz[1] = SetOctetString(req->nonceSz + seqSz[0], seqArray[1]);
-    seqArray[2][0] = ASN_OBJECT_ID;
-    totalSz += seqSz[2] = 1 + SetLength(sizeof(NonceObjId), &seqArray[2][1]);
+    totalSz += seqSz[2] = SetObjectId(sizeof(NonceObjId), seqArray[2]);
     totalSz += seqSz[3] = SetSequence(totalSz, seqArray[3]);
     totalSz += seqSz[4] = SetSequence(totalSz, seqArray[4]);
 
@@ -10283,6 +10375,8 @@ int EncodeOcspRequest(OcspRequest* req, byte* output, word32 size)
 int InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce,
                                                                      void* heap)
 {
+    int ret;
+
     WOLFSSL_ENTER("InitOcspRequest");
 
     if (req == NULL)
@@ -10314,17 +10408,17 @@ int InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce,
             XMEMCPY(req->url, cert->extAuthInfo, cert->extAuthInfoSz);
             req->urlSz = cert->extAuthInfoSz;
         }
-
     }
 
     if (useNonce) {
         WC_RNG rng;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-        if (wc_InitRng_ex(&rng, req->heap) != 0) {
-#else
-        if (wc_InitRng(&rng) != 0) {
-#endif
+    #ifndef HAVE_FIPS
+        ret = wc_InitRng_ex(&rng, req->heap, INVALID_DEVID);
+    #else
+        ret = wc_InitRng(&rng);
+    #endif
+        if (ret != 0) {
             WOLFSSL_MSG("\tCannot initialize RNG. Skipping the OSCP Nonce.");
         } else {
             if (wc_RNG_GenerateBlock(&rng, req->nonce, MAX_OCSP_NONCE_SZ) != 0)
@@ -10559,29 +10653,15 @@ static int GetCRL_Signature(const byte* source, word32* idx, DecodedCRL* dcrl,
                             int maxIdx)
 {
     int    length;
-    byte   b;
+    int    ret;
 
     WOLFSSL_ENTER("GetCRL_Signature");
 
-    b = source[*idx];
-    *idx += 1;
-    if (b != ASN_BIT_STRING)
-        return ASN_BITSTR_E;
-
-    if (GetLength(source, idx, &length, maxIdx) < 0)
-        return ASN_PARSE_E;
-
+    ret = CheckBitString(source, idx, &length, maxIdx, 1, NULL);
+    if (ret != 0)
+        return ret;
     dcrl->sigLength = length;
 
-    if (length > 0) {
-        b = source[*idx];
-        *idx += 1;
-        if (b != 0x00)
-            return ASN_EXPECT_0_E;
-
-        dcrl->sigLength--;
-    }
-
     dcrl->signature = (byte*)&source[*idx];
     *idx += dcrl->sigLength;
 
@@ -10592,7 +10672,7 @@ static int GetCRL_Signature(const byte* source, word32* idx, DecodedCRL* dcrl,
 /* prase crl buffer into decoded state, 0 on success */
 int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
 {
-    int     version, len, doNextDate = 1;
+    int     ret = 0, version, len, doNextDate = 1;
     word32  oid, idx = 0, dateIdx;
     Signer* ca = NULL;
 
@@ -10675,29 +10755,33 @@ int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
 
     /* openssl doesn't add skid by default for CRLs cause firefox chokes
        we're not assuming it's available yet */
-    #if !defined(NO_SKID) && defined(CRL_SKID_READY)
-        if (dcrl->extAuthKeyIdSet)
-            ca = GetCA(cm, dcrl->extAuthKeyId);
-        if (ca == NULL)
-            ca = GetCAByName(cm, dcrl->issuerHash);
-    #else /* NO_SKID */
-        ca = GetCA(cm, dcrl->issuerHash);
-    #endif /* NO_SKID */
+#if !defined(NO_SKID) && defined(CRL_SKID_READY)
+    if (dcrl->extAuthKeyIdSet)
+        ca = GetCA(cm, dcrl->extAuthKeyId);
+    if (ca == NULL)
+        ca = GetCAByName(cm, dcrl->issuerHash);
+#else
+    ca = GetCA(cm, dcrl->issuerHash);
+#endif /* !NO_SKID && CRL_SKID_READY */
     WOLFSSL_MSG("About to verify CRL signature");
 
     if (ca) {
+        SignatureCtx sigCtx;
+
         WOLFSSL_MSG("Found CRL issuer CA");
         /* try to confirm/verify signature */
-        #ifndef IGNORE_KEY_EXTENSIONS
-            if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) {
-                WOLFSSL_MSG("CA cannot sign CRLs");
-                return ASN_CRL_NO_SIGNER_E;
-            }
-        #endif /* IGNORE_KEY_EXTENSIONS */
-        if (!ConfirmSignature(buff + dcrl->certBegin,
+    #ifndef IGNORE_KEY_EXTENSIONS
+        if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) {
+            WOLFSSL_MSG("CA cannot sign CRLs");
+            return ASN_CRL_NO_SIGNER_E;
+        }
+    #endif /* IGNORE_KEY_EXTENSIONS */
+
+        InitSignatureCtx(&sigCtx, dcrl->heap, INVALID_DEVID);
+        if (ConfirmSignature(&sigCtx, buff + dcrl->certBegin,
                 dcrl->sigIndex - dcrl->certBegin,
                 ca->publicKey, ca->pubKeySize, ca->keyOID,
-                dcrl->signature, dcrl->sigLength, dcrl->signatureOID, NULL)) {
+                dcrl->signature, dcrl->sigLength, dcrl->signatureOID) != 0) {
             WOLFSSL_MSG("CRL Confirm signature failed");
             return ASN_CRL_CONFIRM_E;
         }
@@ -10707,7 +10791,7 @@ int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
         return ASN_CRL_NO_SIGNER_E;
     }
 
-    return 0;
+    return ret;
 }
 
 #endif /* HAVE_CRL */
diff --git a/wolfcrypt/src/des3.c b/wolfcrypt/src/des3.c
old mode 100644
new mode 100755
index 005b03f33..739fb62d6
--- a/wolfcrypt/src/des3.c
+++ b/wolfcrypt/src/des3.c
@@ -26,95 +26,78 @@
 
 #include 
 
+
 #ifndef NO_DES3
 
 #include 
 
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
+    int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+    {
+        return Des_SetKey(des, key, iv, dir);
+    }
+    int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+    {
+        return Des3_SetKey_fips(des, key, iv, dir);
+    }
+    int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des_CbcEncrypt(des, out, in, sz);
+    }
+    int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des_CbcDecrypt(des, out, in, sz);
+    }
+    int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des3_CbcEncrypt_fips(des, out, in, sz);
+    }
+    int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des3_CbcDecrypt_fips(des, out, in, sz);
+    }
 
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
-    return Des_SetKey(des, key, iv, dir);
-}
+    #ifdef WOLFSSL_DES_ECB
+        /* One block, compatibility only */
+        int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+        {
+            return Des_EcbEncrypt(des, out, in, sz);
+        }
+        int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+        {
+            return Des3_EcbEncrypt(des, out, in, sz);
+        }
+    #endif /* WOLFSSL_DES_ECB */
 
+    void wc_Des_SetIV(Des* des, const byte* iv)
+    {
+        Des_SetIV(des, iv);
+    }
+    int wc_Des3_SetIV(Des3* des, const byte* iv)
+    {
+        return Des3_SetIV_fips(des, iv);
+    }
 
-int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
-{
-    return Des3_SetKey_fips(des, key, iv, dir);
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    return Des_CbcEncrypt(des, out, in, sz);
-}
-
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    return Des_CbcDecrypt(des, out, in, sz);
-}
-
-
-int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    return Des3_CbcEncrypt_fips(des, out, in, sz);
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    return Des3_CbcDecrypt_fips(des, out, in, sz);
-}
-
-
-#ifdef WOLFSSL_DES_ECB
-
-/* One block, compatibility only */
-int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    return Des_EcbEncrypt(des, out, in, sz);
-}
-
-int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    return Des3_EcbEncrypt(des, out, in, sz);
-}
-#endif /* WOLFSSL_DES_ECB */
-
-
-void wc_Des_SetIV(Des* des, const byte* iv)
-{
-    Des_SetIV(des, iv);
-}
-
-
-int wc_Des3_SetIV(Des3* des, const byte* iv)
-{
-    return Des3_SetIV_fips(des, iv);
-}
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Des3 for use with Nitrox device */
-int wc_Des3AsyncInit(Des3* des3, int devId)
-{
-    return Des3AsyncInit(des3, devId);
-}
-
-
-/* Free Des3 from use with Nitrox device */
-void wc_Des3AsyncFree(Des3* des3)
-{
-    Des3AsyncFree(des3);
-}
-
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
+    int wc_Des3Init(Des3* des3, void* heap, int devId)
+    {
+        (void)des3;
+        (void)heap;
+        (void)devId;
+        /* FIPS doesn't support:
+            return Des3Init(des3, heap, devId); */
+        return 0;
+    }
+    void wc_Des3Free(Des3* des3)
+    {
+        (void)des3;
+        /* FIPS doesn't support:
+            Des3Free(des3); */
+    }
 
 #else /* build without fips */
 
+
 #if defined(WOLFSSL_TI_CRYPT)
     #include 
 #else
@@ -130,6 +113,7 @@ void wc_Des3AsyncFree(Des3* des3)
 #endif
 
 
+/* Hardware Acceleration */
 #if defined(STM32F2_CRYPTO) || defined(STM32F4_CRYPTO)
 
     /*
@@ -446,227 +430,224 @@ void wc_Des3AsyncFree(Des3* des3)
 
 #elif defined(HAVE_COLDFIRE_SEC)
 
-#include 
+    #include 
 
-#include "sec.h"
-#include "mcf5475_sec.h"
-#include "mcf5475_siu.h"
+    #include "sec.h"
+    #include "mcf5475_sec.h"
+    #include "mcf5475_siu.h"
 
-#if defined (HAVE_THREADX)
-#include "memory_pools.h"
-extern TX_BYTE_POOL mp_ncached;  /* Non Cached memory pool */
-#endif
-
-#define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64)
-static unsigned char *desBuffIn = NULL ;
-static unsigned char *desBuffOut = NULL ;
-static byte *secIV ;
-static byte *secKey ;
-static volatile SECdescriptorType *secDesc ;
-
-static wolfSSL_Mutex Mutex_DesSEC ;
-
-#define SEC_DESC_DES_CBC_ENCRYPT  0x20500010
-#define SEC_DESC_DES_CBC_DECRYPT  0x20400010
-#define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010
-#define SEC_DESC_DES3_CBC_DECRYPT 0x20600010
-
-#define DES_IVLEN 8
-#define DES_KEYLEN 8
-#define DES3_IVLEN 8
-#define DES3_KEYLEN 24
-
-extern volatile unsigned char __MBAR[];
-
-static void wc_Des_Cbc(byte* out, const byte* in, word32 sz,
-                    byte *key, byte *iv, word32 desc)
-{
-    #ifdef DEBUG_WOLFSSL
-    int ret ;  int stat1,stat2 ;
+    #if defined (HAVE_THREADX)
+    #include "memory_pools.h"
+    extern TX_BYTE_POOL mp_ncached;  /* Non Cached memory pool */
     #endif
-    int size ;
-    volatile int v ;
 
-    wc_LockMutex(&Mutex_DesSEC) ;
+    #define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64)
+    static unsigned char *desBuffIn = NULL;
+    static unsigned char *desBuffOut = NULL;
+    static byte *secIV;
+    static byte *secKey;
+    static volatile SECdescriptorType *secDesc;
 
-    secDesc->length1 = 0x0;
-    secDesc->pointer1 = NULL;
-    if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){
-        secDesc->length2 = DES_IVLEN ;
-        secDesc->length3 = DES_KEYLEN ;
-    } else {
-        secDesc->length2 = DES3_IVLEN ;
-        secDesc->length3 = DES3_KEYLEN ;
-    }
-    secDesc->pointer2 = secIV ;
-    secDesc->pointer3 = secKey;
-    secDesc->pointer4 = desBuffIn ;
-    secDesc->pointer5 = desBuffOut ;
-    secDesc->length6 = 0;
-    secDesc->pointer6 = NULL;
-    secDesc->length7 = 0x0;
-    secDesc->pointer7 = NULL;
-    secDesc->nextDescriptorPtr = NULL ;
+    static wolfSSL_Mutex Mutex_DesSEC;
 
-    while(sz) {
-        XMEMCPY(secIV, iv, secDesc->length2) ;
-        if((sz%DES_BUFFER_SIZE) == sz) {
-            size = sz ;
-            sz = 0 ;
-        } else {
-            size = DES_BUFFER_SIZE ;
-            sz -= DES_BUFFER_SIZE ;
-        }
+    #define SEC_DESC_DES_CBC_ENCRYPT  0x20500010
+    #define SEC_DESC_DES_CBC_DECRYPT  0x20400010
+    #define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010
+    #define SEC_DESC_DES3_CBC_DECRYPT 0x20600010
 
-        XMEMCPY(desBuffIn, in, size) ;
-        XMEMCPY(secKey, key, secDesc->length3) ;
+    #define DES_IVLEN 8
+    #define DES_KEYLEN 8
+    #define DES3_IVLEN 8
+    #define DES3_KEYLEN 24
 
-        secDesc->header = desc ;
-        secDesc->length4 = size;
-        secDesc->length5 = size;
-        /* Point SEC to the location of the descriptor */
-        MCF_SEC_FR0 = (uint32)secDesc;
-        /* Initialize SEC and wait for encryption to complete */
-        MCF_SEC_CCCR0 = 0x0000001a;
-        /* poll SISR to determine when channel is complete */
-        v=0 ;
-        while((secDesc->header>> 24) != 0xff) {
-            if(v++ > 1000)break ;
-        }
+    extern volatile unsigned char __MBAR[];
 
-#ifdef DEBUG_WOLFSSL
-        ret = MCF_SEC_SISRH;
-        stat1 = MCF_SEC_DSR ;
-        stat2 = MCF_SEC_DISR ;
-        if(ret & 0xe0000000) {
-            /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2) ; */
-        }
-#endif
-
-        XMEMCPY(out, desBuffOut, size) ;
-
-        if((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) {
-            XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2) ;
-        } else {
-            XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2) ;
-        }
-
-        in  += size ;
-        out += size ;
-
-    }
-    wc_UnLockMutex(&Mutex_DesSEC) ;
-
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,  (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT) ;
-    return 0;
-}
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,   (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT) ;
-    return 0;
-}
-
-int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,  (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT) ;
-    return 0;
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,   (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT) ;
-    return 0;
-}
-
-static void setParity(byte *buf, int len)
-{
-    int i, j ;
-    byte v ;
-    int bits ;
-
-    for(i=0; i> 1 ;
-        buf[i] = v << 1 ;
-        bits = 0 ;
-        for(j=0; j<7; j++)
-        {
-            bits += (v&0x1) ;
-            v = v >> 1 ;
+        #ifdef DEBUG_WOLFSSL
+        int ret;  int stat1,stat2;
+    	  #endif
+        int size;
+        volatile int v;
+
+        wc_LockMutex(&Mutex_DesSEC) ;
+
+        secDesc->length1 = 0x0;
+        secDesc->pointer1 = NULL;
+        if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){
+            secDesc->length2 = DES_IVLEN;
+            secDesc->length3 = DES_KEYLEN;
+        } else {
+            secDesc->length2 = DES3_IVLEN;
+            secDesc->length3 = DES3_KEYLEN;
         }
-        buf[i] |= (1 - (bits&0x1)) ;
-    }
+        secDesc->pointer2 = secIV;
+        secDesc->pointer3 = secKey;
+        secDesc->pointer4 = desBuffIn;
+        secDesc->pointer5 = desBuffOut;
+        secDesc->length6 = 0;
+        secDesc->pointer6 = NULL;
+        secDesc->length7 = 0x0;
+        secDesc->pointer7 = NULL;
+        secDesc->nextDescriptorPtr = NULL;
 
-}
+        while(sz) {
+            XMEMCPY(secIV, iv, secDesc->length2);
+            if((sz%DES_BUFFER_SIZE) == sz) {
+                size = sz;
+                sz = 0;
+            } else {
+                size = DES_BUFFER_SIZE;
+                sz -= DES_BUFFER_SIZE;
+            }
 
+            XMEMCPY(desBuffIn, in, size);
+            XMEMCPY(secKey, key, secDesc->length3);
 
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
-    if(desBuffIn == NULL) {
-        #if defined (HAVE_THREADX)
-        int s1, s2, s3, s4, s5 ;
-        s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
-                                                     sizeof(SECdescriptorType), TX_NO_WAIT);
-        s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
-        s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
-        /* Don't know des or des3 to be used. Allocate larger buffers */
-        s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
-        s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
-        #else
-        #warning "Allocate non-Cache buffers"
+            secDesc->header = desc;
+            secDesc->length4 = size;
+            secDesc->length5 = size;
+            /* Point SEC to the location of the descriptor */
+            MCF_SEC_FR0 = (uint32)secDesc;
+            /* Initialize SEC and wait for encryption to complete */
+            MCF_SEC_CCCR0 = 0x0000001a;
+            /* poll SISR to determine when channel is complete */
+            v=0;
+            while((secDesc->header>> 24) != 0xff) {
+                if(v++ > 1000)break;
+            }
+
+        #ifdef DEBUG_WOLFSSL
+            ret = MCF_SEC_SISRH;
+            stat1 = MCF_SEC_DSR;
+            stat2 = MCF_SEC_DISR;
+            if(ret & 0xe0000000) {
+                /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2); */
+            }
         #endif
 
-        wc_InitMutex(&Mutex_DesSEC) ;
+            XMEMCPY(out, desBuffOut, size);
+
+            if ((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) {
+                XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2);
+            } else {
+                XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2);
+            }
+
+            in  += size;
+            out += size;
+
+        }
+        wc_UnLockMutex(&Mutex_DesSEC) ;
+
     }
 
-    XMEMCPY(des->key, key, DES_KEYLEN);
-    setParity((byte *)des->key, DES_KEYLEN) ;
 
-    if (iv) {
-        XMEMCPY(des->reg, iv, DES_IVLEN);
-    }   else {
-        XMEMSET(des->reg, 0x0, DES_IVLEN) ;
+    int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,  (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT);
+        return 0;
     }
-    return 0;
-}
 
-int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
-{
+    int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,   (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT);
+        return 0;
+    }
 
-    if(desBuffIn == NULL) {
+    int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,  (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT);
+    	  return 0;
+    }
+
+
+    int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,   (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT);
+    	  return 0;
+    }
+
+    static void setParity(byte *buf, int len)
+    {
+        int i, j;
+        byte v;
+        int bits;
+
+        for (i=0; i> 1;
+            buf[i] = v << 1;
+            bits = 0;
+            for (j=0; j<7; j++) {
+                bits += (v&0x1);
+                v = v >> 1;
+            }
+            buf[i] |= (1 - (bits&0x1));
+        }
+
+    }
+
+    int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+    {
+        if(desBuffIn == NULL) {
         #if defined (HAVE_THREADX)
-        int s1, s2, s3, s4, s5 ;
-        s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
-                                                     sizeof(SECdescriptorType), TX_NO_WAIT);
-        s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
-        s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
-        s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
-        s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
+    			  int s1, s2, s3, s4, s5;
+            s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+                                                         sizeof(SECdescriptorType), TX_NO_WAIT);
+            s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
+            s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
+            /* Don't know des or des3 to be used. Allocate larger buffers */
+            s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
+            s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
         #else
-        #warning "Allocate non-Cache buffers"
+            #warning "Allocate non-Cache buffers"
         #endif
 
-        wc_InitMutex(&Mutex_DesSEC) ;
+            InitMutex(&Mutex_DesSEC);
+        }
+
+        XMEMCPY(des->key, key, DES_KEYLEN);
+        setParity((byte *)des->key, DES_KEYLEN);
+
+        if (iv) {
+            XMEMCPY(des->reg, iv, DES_IVLEN);
+        }   else {
+            XMEMSET(des->reg, 0x0, DES_IVLEN);
+        }
+    		return 0;
     }
 
-    XMEMCPY(des3->key[0], key, DES3_KEYLEN);
-    setParity((byte *)des3->key[0], DES3_KEYLEN) ;
+    int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
+    {
+
+        if(desBuffIn == NULL) {
+        #if defined (HAVE_THREADX)
+    			  int s1, s2, s3, s4, s5;
+            s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+                                                         sizeof(SECdescriptorType), TX_NO_WAIT);
+            s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
+            s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
+            s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
+            s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
+        #else
+            #warning "Allocate non-Cache buffers"
+        #endif
+
+            InitMutex(&Mutex_DesSEC);
+        }
+
+        XMEMCPY(des3->key[0], key, DES3_KEYLEN);
+        setParity((byte *)des3->key[0], DES3_KEYLEN);
+
+        if (iv) {
+            XMEMCPY(des3->reg, iv, DES3_IVLEN);
+        }   else {
+            XMEMSET(des3->reg, 0x0, DES3_IVLEN);
+        }
+        return 0;
 
-    if (iv) {
-        XMEMCPY(des3->reg, iv, DES3_IVLEN);
-    }   else {
-        XMEMSET(des3->reg, 0x0, DES3_IVLEN) ;
     }
-    return 0;
-
-}
 #elif (defined FREESCALE_LTC_DES)
 
     #include "fsl_ltc.h"
@@ -753,7 +734,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
             return -1;
 
     }
-#elif defined FREESCALE_MMCAU
+#elif defined(FREESCALE_MMCAU)
     /*
      * Freescale mmCAU hardware DES/3DES support through the CAU/mmCAU library.
      * Documentation located in ColdFire/ColdFire+ CAU and Kinetis mmCAU
@@ -761,8 +742,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
      */
     #include "fsl_mmcau.h"
 
-    const unsigned char parityLookup[128] =
-    {
+    const unsigned char parityLookup[128] = {
         1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
         0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
         0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
@@ -824,7 +804,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
         byte temp_block[DES_BLOCK_SIZE];
 
         iv = (byte*)des->reg;
-        
+
         while (len > 0)
         {
             XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE);
@@ -897,7 +877,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
         byte temp_block[DES_BLOCK_SIZE];
 
         iv = (byte*)des->reg;
-        
+
         while (len > 0)
         {
             XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE);
@@ -969,13 +949,10 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
 
     #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h"
 
-void wc_Des_SetIV(Des* des, const byte* iv);
-int  wc_Des3_SetIV(Des3* des, const byte* iv);
-
     int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
     {
-        word32 *dkey = des->key ;
-        word32 *dreg = des->reg ;
+        word32 *dkey = des->key;
+        word32 *dreg = des->reg;
 
         XMEMCPY((byte *)dkey, (byte *)key, 8);
         ByteReverseWords(dkey, dkey, 8);
@@ -988,12 +965,12 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
     int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
     {
         word32 *dkey1 = des->key[0];
-        word32 *dreg = des->reg ;
+        word32 *dreg = des->reg;
 
         XMEMCPY(dkey1, key, 24);
         ByteReverseWords(dkey1, dkey1, 24);
         XMEMCPY(dreg, iv, 8);
-        ByteReverseWords(dreg, dreg, 8) ;
+        ByteReverseWords(dreg, dreg, 8);
 
         return 0;
     }
@@ -1001,21 +978,21 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
     void DesCrypt(word32 *key, word32 *iv, byte* out, const byte* in, word32 sz,
                   int dir, int algo, int cryptoalgo)
     {
-        securityAssociation *sa_p ;
-        bufferDescriptor *bd_p ;
-        const byte *in_p, *in_l ;
-        byte *out_p, *out_l ;
+        securityAssociation *sa_p;
+        bufferDescriptor *bd_p;
+        const byte *in_p, *in_l;
+        byte *out_p, *out_l;
         volatile securityAssociation sa __attribute__((aligned (8)));
         volatile bufferDescriptor bd __attribute__((aligned (8)));
-        volatile int k ;
+        volatile int k;
 
         /* get uncached address */
 
         in_l = in;
-        out_l = out ;
-        sa_p = KVA0_TO_KVA1(&sa) ;
-        bd_p = KVA0_TO_KVA1(&bd) ;
-        in_p = KVA0_TO_KVA1(in_l) ;
+        out_l = out;
+        sa_p = KVA0_TO_KVA1(&sa);
+        bd_p = KVA0_TO_KVA1(&bd);
+        in_p = KVA0_TO_KVA1(in_l);
         out_p= KVA0_TO_KVA1(out_l);
 
         if(PIC32MZ_IF_RAM(in_p))
@@ -1024,13 +1001,13 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
 
         /* Set up the Security Association */
         XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
-        sa_p->SA_CTRL.ALGO = algo ;
+        sa_p->SA_CTRL.ALGO = algo;
         sa_p->SA_CTRL.LNC = 1;
         sa_p->SA_CTRL.LOADIV = 1;
         sa_p->SA_CTRL.FB = 1;
-        sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
+        sa_p->SA_CTRL.ENCTYPE = dir; /* Encryption/Decryption */
         sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
-        sa_p->SA_CTRL.KEYSIZE = 1 ; /* KEY is 192 bits */
+        sa_p->SA_CTRL.KEYSIZE = 1; /* KEY is 192 bits */
         XMEMCPY((byte *)KVA0_TO_KVA1(&sa.SA_ENCKEY[algo==PIC32_ALGO_TDES ? 2 : 6]),
                 (byte *)key, algo==PIC32_ALGO_TDES ? 24 : 8);
         XMEMCPY((byte *)KVA0_TO_KVA1(&sa.SA_ENCIV[2]), (byte *)iv, 8);
@@ -1043,30 +1020,30 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
         bd_p->BD_CTRL.LAST_BD = 1;
         bd_p->BD_CTRL.DESC_EN = 1;
 
-        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ; /* (unsigned int)sa_p; */
-        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ; /* (unsigned int)in_p; */
+        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa); /* (unsigned int)sa_p; */
+        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in); /* (unsigned int)in_p; */
         bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out); /* (unsigned int)out_p; */
         bd_p->NXTPTR = (unsigned int)KVA_TO_PA(&bd);
-        bd_p->MSGLEN = sz ;
+        bd_p->MSGLEN = sz;
 
         /* Fire in the hole! */
         CECON = 1 << 6;
         while (CECON);
 
         /* Run the engine */
-        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ; /* (unsigned int)bd_p ; */
+        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd); /* (unsigned int)bd_p; */
         CEINTEN = 0x07;
         CECON = 0x27;
 
-        WAIT_ENGINE ;
+        WAIT_ENGINE;
 
         if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
            (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
            (cryptoalgo == PIC32_CRYPTOALGO_RCBC)) {
             /* set iv for the next call */
             if(dir == PIC32_ENCRYPTION) {
-                XMEMCPY((void *)iv, (void*)&(out_p[sz-DES_IVLEN]), DES_IVLEN) ;
-            } else {
+	            XMEMCPY((void *)iv, (void*)&(out_p[sz-DES_IVLEN]), DES_IVLEN);
+	        } else {
                 ByteReverseWords((word32*)iv, (word32 *)&(in_p[sz-DES_IVLEN]),
                                  DES_IVLEN);
             }
@@ -1103,559 +1080,575 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
         return 0;
     }
 
-#else /* Begin wolfCrypt software implementation */
-
-/* permuted choice table (key) */
-static const byte pc1[] = {
-       57, 49, 41, 33, 25, 17,  9,
-        1, 58, 50, 42, 34, 26, 18,
-       10,  2, 59, 51, 43, 35, 27,
-       19, 11,  3, 60, 52, 44, 36,
-
-       63, 55, 47, 39, 31, 23, 15,
-        7, 62, 54, 46, 38, 30, 22,
-       14,  6, 61, 53, 45, 37, 29,
-       21, 13,  5, 28, 20, 12,  4
-};
-
-/* number left rotations of pc1 */
-static const byte totrot[] = {
-       1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
-};
-
-/* permuted choice key (table) */
-static const byte pc2[] = {
-       14, 17, 11, 24,  1,  5,
-        3, 28, 15,  6, 21, 10,
-       23, 19, 12,  4, 26,  8,
-       16,  7, 27, 20, 13,  2,
-       41, 52, 31, 37, 47, 55,
-       30, 40, 51, 45, 33, 48,
-       44, 49, 39, 56, 34, 53,
-       46, 42, 50, 36, 29, 32
-};
-
-/* End of DES-defined tables */
-
-/* bit 0 is left-most in byte */
-static const int bytebit[] = {
-       0200,0100,040,020,010,04,02,01
-};
-
-static const word32 Spbox[8][64] = {
-{
-0x01010400,0x00000000,0x00010000,0x01010404,
-0x01010004,0x00010404,0x00000004,0x00010000,
-0x00000400,0x01010400,0x01010404,0x00000400,
-0x01000404,0x01010004,0x01000000,0x00000004,
-0x00000404,0x01000400,0x01000400,0x00010400,
-0x00010400,0x01010000,0x01010000,0x01000404,
-0x00010004,0x01000004,0x01000004,0x00010004,
-0x00000000,0x00000404,0x00010404,0x01000000,
-0x00010000,0x01010404,0x00000004,0x01010000,
-0x01010400,0x01000000,0x01000000,0x00000400,
-0x01010004,0x00010000,0x00010400,0x01000004,
-0x00000400,0x00000004,0x01000404,0x00010404,
-0x01010404,0x00010004,0x01010000,0x01000404,
-0x01000004,0x00000404,0x00010404,0x01010400,
-0x00000404,0x01000400,0x01000400,0x00000000,
-0x00010004,0x00010400,0x00000000,0x01010004},
-{
-0x80108020,0x80008000,0x00008000,0x00108020,
-0x00100000,0x00000020,0x80100020,0x80008020,
-0x80000020,0x80108020,0x80108000,0x80000000,
-0x80008000,0x00100000,0x00000020,0x80100020,
-0x00108000,0x00100020,0x80008020,0x00000000,
-0x80000000,0x00008000,0x00108020,0x80100000,
-0x00100020,0x80000020,0x00000000,0x00108000,
-0x00008020,0x80108000,0x80100000,0x00008020,
-0x00000000,0x00108020,0x80100020,0x00100000,
-0x80008020,0x80100000,0x80108000,0x00008000,
-0x80100000,0x80008000,0x00000020,0x80108020,
-0x00108020,0x00000020,0x00008000,0x80000000,
-0x00008020,0x80108000,0x00100000,0x80000020,
-0x00100020,0x80008020,0x80000020,0x00100020,
-0x00108000,0x00000000,0x80008000,0x00008020,
-0x80000000,0x80100020,0x80108020,0x00108000},
-{
-0x00000208,0x08020200,0x00000000,0x08020008,
-0x08000200,0x00000000,0x00020208,0x08000200,
-0x00020008,0x08000008,0x08000008,0x00020000,
-0x08020208,0x00020008,0x08020000,0x00000208,
-0x08000000,0x00000008,0x08020200,0x00000200,
-0x00020200,0x08020000,0x08020008,0x00020208,
-0x08000208,0x00020200,0x00020000,0x08000208,
-0x00000008,0x08020208,0x00000200,0x08000000,
-0x08020200,0x08000000,0x00020008,0x00000208,
-0x00020000,0x08020200,0x08000200,0x00000000,
-0x00000200,0x00020008,0x08020208,0x08000200,
-0x08000008,0x00000200,0x00000000,0x08020008,
-0x08000208,0x00020000,0x08000000,0x08020208,
-0x00000008,0x00020208,0x00020200,0x08000008,
-0x08020000,0x08000208,0x00000208,0x08020000,
-0x00020208,0x00000008,0x08020008,0x00020200},
-{
-0x00802001,0x00002081,0x00002081,0x00000080,
-0x00802080,0x00800081,0x00800001,0x00002001,
-0x00000000,0x00802000,0x00802000,0x00802081,
-0x00000081,0x00000000,0x00800080,0x00800001,
-0x00000001,0x00002000,0x00800000,0x00802001,
-0x00000080,0x00800000,0x00002001,0x00002080,
-0x00800081,0x00000001,0x00002080,0x00800080,
-0x00002000,0x00802080,0x00802081,0x00000081,
-0x00800080,0x00800001,0x00802000,0x00802081,
-0x00000081,0x00000000,0x00000000,0x00802000,
-0x00002080,0x00800080,0x00800081,0x00000001,
-0x00802001,0x00002081,0x00002081,0x00000080,
-0x00802081,0x00000081,0x00000001,0x00002000,
-0x00800001,0x00002001,0x00802080,0x00800081,
-0x00002001,0x00002080,0x00800000,0x00802001,
-0x00000080,0x00800000,0x00002000,0x00802080},
-{
-0x00000100,0x02080100,0x02080000,0x42000100,
-0x00080000,0x00000100,0x40000000,0x02080000,
-0x40080100,0x00080000,0x02000100,0x40080100,
-0x42000100,0x42080000,0x00080100,0x40000000,
-0x02000000,0x40080000,0x40080000,0x00000000,
-0x40000100,0x42080100,0x42080100,0x02000100,
-0x42080000,0x40000100,0x00000000,0x42000000,
-0x02080100,0x02000000,0x42000000,0x00080100,
-0x00080000,0x42000100,0x00000100,0x02000000,
-0x40000000,0x02080000,0x42000100,0x40080100,
-0x02000100,0x40000000,0x42080000,0x02080100,
-0x40080100,0x00000100,0x02000000,0x42080000,
-0x42080100,0x00080100,0x42000000,0x42080100,
-0x02080000,0x00000000,0x40080000,0x42000000,
-0x00080100,0x02000100,0x40000100,0x00080000,
-0x00000000,0x40080000,0x02080100,0x40000100},
-{
-0x20000010,0x20400000,0x00004000,0x20404010,
-0x20400000,0x00000010,0x20404010,0x00400000,
-0x20004000,0x00404010,0x00400000,0x20000010,
-0x00400010,0x20004000,0x20000000,0x00004010,
-0x00000000,0x00400010,0x20004010,0x00004000,
-0x00404000,0x20004010,0x00000010,0x20400010,
-0x20400010,0x00000000,0x00404010,0x20404000,
-0x00004010,0x00404000,0x20404000,0x20000000,
-0x20004000,0x00000010,0x20400010,0x00404000,
-0x20404010,0x00400000,0x00004010,0x20000010,
-0x00400000,0x20004000,0x20000000,0x00004010,
-0x20000010,0x20404010,0x00404000,0x20400000,
-0x00404010,0x20404000,0x00000000,0x20400010,
-0x00000010,0x00004000,0x20400000,0x00404010,
-0x00004000,0x00400010,0x20004010,0x00000000,
-0x20404000,0x20000000,0x00400010,0x20004010},
-{
-0x00200000,0x04200002,0x04000802,0x00000000,
-0x00000800,0x04000802,0x00200802,0x04200800,
-0x04200802,0x00200000,0x00000000,0x04000002,
-0x00000002,0x04000000,0x04200002,0x00000802,
-0x04000800,0x00200802,0x00200002,0x04000800,
-0x04000002,0x04200000,0x04200800,0x00200002,
-0x04200000,0x00000800,0x00000802,0x04200802,
-0x00200800,0x00000002,0x04000000,0x00200800,
-0x04000000,0x00200800,0x00200000,0x04000802,
-0x04000802,0x04200002,0x04200002,0x00000002,
-0x00200002,0x04000000,0x04000800,0x00200000,
-0x04200800,0x00000802,0x00200802,0x04200800,
-0x00000802,0x04000002,0x04200802,0x04200000,
-0x00200800,0x00000000,0x00000002,0x04200802,
-0x00000000,0x00200802,0x04200000,0x00000800,
-0x04000002,0x04000800,0x00000800,0x00200002},
-{
-0x10001040,0x00001000,0x00040000,0x10041040,
-0x10000000,0x10001040,0x00000040,0x10000000,
-0x00040040,0x10040000,0x10041040,0x00041000,
-0x10041000,0x00041040,0x00001000,0x00000040,
-0x10040000,0x10000040,0x10001000,0x00001040,
-0x00041000,0x00040040,0x10040040,0x10041000,
-0x00001040,0x00000000,0x00000000,0x10040040,
-0x10000040,0x10001000,0x00041040,0x00040000,
-0x00041040,0x00040000,0x10041000,0x00001000,
-0x00000040,0x10040040,0x00001000,0x00041040,
-0x10001000,0x00000040,0x10000040,0x10040000,
-0x10040040,0x10000000,0x00040000,0x10001040,
-0x00000000,0x10041040,0x00040040,0x10000040,
-0x10040000,0x10001000,0x10001040,0x00000000,
-0x10041040,0x00041000,0x00041000,0x00001040,
-0x00001040,0x00040040,0x10000000,0x10041000}
-};
-
-
-static INLINE void IPERM(word32* left, word32* right)
-{
-    word32 work;
-
-    *right = rotlFixed(*right, 4U);
-    work = (*left ^ *right) & 0xf0f0f0f0;
-    *left ^= work;
-
-    *right = rotrFixed(*right^work, 20U);
-    work = (*left ^ *right) & 0xffff0000;
-    *left ^= work;
-
-    *right = rotrFixed(*right^work, 18U);
-    work = (*left ^ *right) & 0x33333333;
-    *left ^= work;
-
-    *right = rotrFixed(*right^work, 6U);
-    work = (*left ^ *right) & 0x00ff00ff;
-    *left ^= work;
-
-    *right = rotlFixed(*right^work, 9U);
-    work = (*left ^ *right) & 0xaaaaaaaa;
-    *left = rotlFixed(*left^work, 1U);
-    *right ^= work;
-}
-
-
-static INLINE void FPERM(word32* left, word32* right)
-{
-    word32 work;
-
-    *right = rotrFixed(*right, 1U);
-    work = (*left ^ *right) & 0xaaaaaaaa;
-    *right ^= work;
-
-    *left = rotrFixed(*left^work, 9U);
-    work = (*left ^ *right) & 0x00ff00ff;
-    *right ^= work;
-
-    *left = rotlFixed(*left^work, 6U);
-    work = (*left ^ *right) & 0x33333333;
-    *right ^= work;
-
-    *left = rotlFixed(*left^work, 18U);
-    work = (*left ^ *right) & 0xffff0000;
-    *right ^= work;
-
-    *left = rotlFixed(*left^work, 20U);
-    work = (*left ^ *right) & 0xf0f0f0f0;
-    *right ^= work;
-
-    *left = rotrFixed(*left^work, 4U);
-}
-
-
-static int DesSetKey(const byte* key, int dir, word32* out)
-{
-#ifdef WOLFSSL_SMALL_STACK
-    byte* buffer = (byte*)XMALLOC(56+56+8, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    if (buffer == NULL)
-        return MEMORY_E;
 #else
-    byte buffer[56+56+8];
+    #define NEED_SOFT_DES
+
 #endif
 
+
+#ifdef NEED_SOFT_DES
+
+    /* permuted choice table (key) */
+    static const byte pc1[] = {
+           57, 49, 41, 33, 25, 17,  9,
+            1, 58, 50, 42, 34, 26, 18,
+           10,  2, 59, 51, 43, 35, 27,
+           19, 11,  3, 60, 52, 44, 36,
+
+           63, 55, 47, 39, 31, 23, 15,
+            7, 62, 54, 46, 38, 30, 22,
+           14,  6, 61, 53, 45, 37, 29,
+           21, 13,  5, 28, 20, 12,  4
+    };
+
+    /* number left rotations of pc1 */
+    static const byte totrot[] = {
+           1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
+    };
+
+    /* permuted choice key (table) */
+    static const byte pc2[] = {
+           14, 17, 11, 24,  1,  5,
+            3, 28, 15,  6, 21, 10,
+           23, 19, 12,  4, 26,  8,
+           16,  7, 27, 20, 13,  2,
+           41, 52, 31, 37, 47, 55,
+           30, 40, 51, 45, 33, 48,
+           44, 49, 39, 56, 34, 53,
+           46, 42, 50, 36, 29, 32
+    };
+
+    /* End of DES-defined tables */
+
+    /* bit 0 is left-most in byte */
+    static const int bytebit[] = {
+        0200,0100,040,020,010,04,02,01
+    };
+
+    static const word32 Spbox[8][64] = {
+    {   0x01010400,0x00000000,0x00010000,0x01010404,
+        0x01010004,0x00010404,0x00000004,0x00010000,
+        0x00000400,0x01010400,0x01010404,0x00000400,
+        0x01000404,0x01010004,0x01000000,0x00000004,
+        0x00000404,0x01000400,0x01000400,0x00010400,
+        0x00010400,0x01010000,0x01010000,0x01000404,
+        0x00010004,0x01000004,0x01000004,0x00010004,
+        0x00000000,0x00000404,0x00010404,0x01000000,
+        0x00010000,0x01010404,0x00000004,0x01010000,
+        0x01010400,0x01000000,0x01000000,0x00000400,
+        0x01010004,0x00010000,0x00010400,0x01000004,
+        0x00000400,0x00000004,0x01000404,0x00010404,
+        0x01010404,0x00010004,0x01010000,0x01000404,
+        0x01000004,0x00000404,0x00010404,0x01010400,
+        0x00000404,0x01000400,0x01000400,0x00000000,
+        0x00010004,0x00010400,0x00000000,0x01010004},
+    {   0x80108020,0x80008000,0x00008000,0x00108020,
+        0x00100000,0x00000020,0x80100020,0x80008020,
+        0x80000020,0x80108020,0x80108000,0x80000000,
+        0x80008000,0x00100000,0x00000020,0x80100020,
+        0x00108000,0x00100020,0x80008020,0x00000000,
+        0x80000000,0x00008000,0x00108020,0x80100000,
+        0x00100020,0x80000020,0x00000000,0x00108000,
+        0x00008020,0x80108000,0x80100000,0x00008020,
+        0x00000000,0x00108020,0x80100020,0x00100000,
+        0x80008020,0x80100000,0x80108000,0x00008000,
+        0x80100000,0x80008000,0x00000020,0x80108020,
+        0x00108020,0x00000020,0x00008000,0x80000000,
+        0x00008020,0x80108000,0x00100000,0x80000020,
+        0x00100020,0x80008020,0x80000020,0x00100020,
+        0x00108000,0x00000000,0x80008000,0x00008020,
+        0x80000000,0x80100020,0x80108020,0x00108000},
+    {   0x00000208,0x08020200,0x00000000,0x08020008,
+        0x08000200,0x00000000,0x00020208,0x08000200,
+        0x00020008,0x08000008,0x08000008,0x00020000,
+        0x08020208,0x00020008,0x08020000,0x00000208,
+        0x08000000,0x00000008,0x08020200,0x00000200,
+        0x00020200,0x08020000,0x08020008,0x00020208,
+        0x08000208,0x00020200,0x00020000,0x08000208,
+        0x00000008,0x08020208,0x00000200,0x08000000,
+        0x08020200,0x08000000,0x00020008,0x00000208,
+        0x00020000,0x08020200,0x08000200,0x00000000,
+        0x00000200,0x00020008,0x08020208,0x08000200,
+        0x08000008,0x00000200,0x00000000,0x08020008,
+        0x08000208,0x00020000,0x08000000,0x08020208,
+        0x00000008,0x00020208,0x00020200,0x08000008,
+        0x08020000,0x08000208,0x00000208,0x08020000,
+        0x00020208,0x00000008,0x08020008,0x00020200},
+    {   0x00802001,0x00002081,0x00002081,0x00000080,
+        0x00802080,0x00800081,0x00800001,0x00002001,
+        0x00000000,0x00802000,0x00802000,0x00802081,
+        0x00000081,0x00000000,0x00800080,0x00800001,
+        0x00000001,0x00002000,0x00800000,0x00802001,
+        0x00000080,0x00800000,0x00002001,0x00002080,
+        0x00800081,0x00000001,0x00002080,0x00800080,
+        0x00002000,0x00802080,0x00802081,0x00000081,
+        0x00800080,0x00800001,0x00802000,0x00802081,
+        0x00000081,0x00000000,0x00000000,0x00802000,
+        0x00002080,0x00800080,0x00800081,0x00000001,
+        0x00802001,0x00002081,0x00002081,0x00000080,
+        0x00802081,0x00000081,0x00000001,0x00002000,
+        0x00800001,0x00002001,0x00802080,0x00800081,
+        0x00002001,0x00002080,0x00800000,0x00802001,
+        0x00000080,0x00800000,0x00002000,0x00802080},
+    {   0x00000100,0x02080100,0x02080000,0x42000100,
+        0x00080000,0x00000100,0x40000000,0x02080000,
+        0x40080100,0x00080000,0x02000100,0x40080100,
+        0x42000100,0x42080000,0x00080100,0x40000000,
+        0x02000000,0x40080000,0x40080000,0x00000000,
+        0x40000100,0x42080100,0x42080100,0x02000100,
+        0x42080000,0x40000100,0x00000000,0x42000000,
+        0x02080100,0x02000000,0x42000000,0x00080100,
+        0x00080000,0x42000100,0x00000100,0x02000000,
+        0x40000000,0x02080000,0x42000100,0x40080100,
+        0x02000100,0x40000000,0x42080000,0x02080100,
+        0x40080100,0x00000100,0x02000000,0x42080000,
+        0x42080100,0x00080100,0x42000000,0x42080100,
+        0x02080000,0x00000000,0x40080000,0x42000000,
+        0x00080100,0x02000100,0x40000100,0x00080000,
+        0x00000000,0x40080000,0x02080100,0x40000100},
+    {   0x20000010,0x20400000,0x00004000,0x20404010,
+        0x20400000,0x00000010,0x20404010,0x00400000,
+        0x20004000,0x00404010,0x00400000,0x20000010,
+        0x00400010,0x20004000,0x20000000,0x00004010,
+        0x00000000,0x00400010,0x20004010,0x00004000,
+        0x00404000,0x20004010,0x00000010,0x20400010,
+        0x20400010,0x00000000,0x00404010,0x20404000,
+        0x00004010,0x00404000,0x20404000,0x20000000,
+        0x20004000,0x00000010,0x20400010,0x00404000,
+        0x20404010,0x00400000,0x00004010,0x20000010,
+        0x00400000,0x20004000,0x20000000,0x00004010,
+        0x20000010,0x20404010,0x00404000,0x20400000,
+        0x00404010,0x20404000,0x00000000,0x20400010,
+        0x00000010,0x00004000,0x20400000,0x00404010,
+        0x00004000,0x00400010,0x20004010,0x00000000,
+        0x20404000,0x20000000,0x00400010,0x20004010},
+    {   0x00200000,0x04200002,0x04000802,0x00000000,
+        0x00000800,0x04000802,0x00200802,0x04200800,
+        0x04200802,0x00200000,0x00000000,0x04000002,
+        0x00000002,0x04000000,0x04200002,0x00000802,
+        0x04000800,0x00200802,0x00200002,0x04000800,
+        0x04000002,0x04200000,0x04200800,0x00200002,
+        0x04200000,0x00000800,0x00000802,0x04200802,
+        0x00200800,0x00000002,0x04000000,0x00200800,
+        0x04000000,0x00200800,0x00200000,0x04000802,
+        0x04000802,0x04200002,0x04200002,0x00000002,
+        0x00200002,0x04000000,0x04000800,0x00200000,
+        0x04200800,0x00000802,0x00200802,0x04200800,
+        0x00000802,0x04000002,0x04200802,0x04200000,
+        0x00200800,0x00000000,0x00000002,0x04200802,
+        0x00000000,0x00200802,0x04200000,0x00000800,
+        0x04000002,0x04000800,0x00000800,0x00200002},
+    {   0x10001040,0x00001000,0x00040000,0x10041040,
+        0x10000000,0x10001040,0x00000040,0x10000000,
+        0x00040040,0x10040000,0x10041040,0x00041000,
+        0x10041000,0x00041040,0x00001000,0x00000040,
+        0x10040000,0x10000040,0x10001000,0x00001040,
+        0x00041000,0x00040040,0x10040040,0x10041000,
+        0x00001040,0x00000000,0x00000000,0x10040040,
+        0x10000040,0x10001000,0x00041040,0x00040000,
+        0x00041040,0x00040000,0x10041000,0x00001000,
+        0x00000040,0x10040040,0x00001000,0x00041040,
+        0x10001000,0x00000040,0x10000040,0x10040000,
+        0x10040040,0x10000000,0x00040000,0x10001040,
+        0x00000000,0x10041040,0x00040040,0x10000040,
+        0x10040000,0x10001000,0x10001040,0x00000000,
+        0x10041040,0x00041000,0x00041000,0x00001040,
+        0x00001040,0x00040040,0x10000000,0x10041000}
+    };
+
+    static INLINE void IPERM(word32* left, word32* right)
     {
-        byte* const  pc1m = buffer;               /* place to modify pc1 into */
-        byte* const  pcr  = pc1m + 56;            /* place to rotate pc1 into */
-        byte* const  ks   = pcr  + 56;
-        register int i, j, l;
-        int          m;
+        word32 work;
 
-        for (j = 0; j < 56; j++) {             /* convert pc1 to bits of key  */
-            l = pc1[j] - 1;                    /* integer bit location        */
-            m = l & 07;                        /* find bit                    */
-            pc1m[j] = (key[l >> 3] &           /* find which key byte l is in */
-                bytebit[m])                    /* and which bit of that byte  */
-                ? 1 : 0;                       /* and store 1-bit result      */
-        }
+        *right = rotlFixed(*right, 4U);
+        work = (*left ^ *right) & 0xf0f0f0f0;
+        *left ^= work;
 
-        for (i = 0; i < 16; i++) {            /* key chunk for each iteration */
-            XMEMSET(ks, 0, 8);                /* Clear key schedule */
+        *right = rotrFixed(*right^work, 20U);
+        work = (*left ^ *right) & 0xffff0000;
+        *left ^= work;
 
-            for (j = 0; j < 56; j++)          /* rotate pc1 the right amount  */
-                pcr[j] =
-                      pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28];
+        *right = rotrFixed(*right^work, 18U);
+        work = (*left ^ *right) & 0x33333333;
+        *left ^= work;
 
-            /* rotate left and right halves independently */
-            for (j = 0; j < 48; j++) {        /* select bits individually     */
-                if (pcr[pc2[j] - 1]) {        /* check bit that goes to ks[j] */
-                    l= j % 6;                 /* mask it in if it's there     */
-                    ks[j/6] |= bytebit[l] >> 2;
+        *right = rotrFixed(*right^work, 6U);
+        work = (*left ^ *right) & 0x00ff00ff;
+        *left ^= work;
+
+        *right = rotlFixed(*right^work, 9U);
+        work = (*left ^ *right) & 0xaaaaaaaa;
+        *left = rotlFixed(*left^work, 1U);
+        *right ^= work;
+    }
+
+    static INLINE void FPERM(word32* left, word32* right)
+    {
+        word32 work;
+
+        *right = rotrFixed(*right, 1U);
+        work = (*left ^ *right) & 0xaaaaaaaa;
+        *right ^= work;
+
+        *left = rotrFixed(*left^work, 9U);
+        work = (*left ^ *right) & 0x00ff00ff;
+        *right ^= work;
+
+        *left = rotlFixed(*left^work, 6U);
+        work = (*left ^ *right) & 0x33333333;
+        *right ^= work;
+
+        *left = rotlFixed(*left^work, 18U);
+        work = (*left ^ *right) & 0xffff0000;
+        *right ^= work;
+
+        *left = rotlFixed(*left^work, 20U);
+        work = (*left ^ *right) & 0xf0f0f0f0;
+        *right ^= work;
+
+        *left = rotrFixed(*left^work, 4U);
+    }
+
+    static int DesSetKey(const byte* key, int dir, word32* out)
+    {
+        #define DES_KEY_BUFFER_SIZE (56+56+8)
+    #ifdef WOLFSSL_SMALL_STACK
+        byte* buffer = (byte*)XMALLOC(DES_KEY_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+        if (buffer == NULL)
+            return MEMORY_E;
+    #else
+        byte buffer[DES_KEY_BUFFER_SIZE];
+    #endif
+
+        {
+            byte* const  pc1m = buffer;            /* place to modify pc1 into */
+            byte* const  pcr  = pc1m + 56;         /* place to rotate pc1 into */
+            byte* const  ks   = pcr  + 56;
+            register int i, j, l;
+            int          m;
+
+            for (j = 0; j < 56; j++) {             /* convert pc1 to bits of key  */
+                l = pc1[j] - 1;                    /* integer bit location        */
+                m = l & 07;                        /* find bit                    */
+                pc1m[j] = (key[l >> 3] &           /* find which key byte l is in */
+                    bytebit[m])                    /* and which bit of that byte  */
+                    ? 1 : 0;                       /* and store 1-bit result      */
+            }
+
+            for (i = 0; i < 16; i++) {            /* key chunk for each iteration */
+                XMEMSET(ks, 0, 8);                /* Clear key schedule */
+
+                for (j = 0; j < 56; j++)          /* rotate pc1 the right amount  */
+                    pcr[j] =
+                          pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28];
+
+                /* rotate left and right halves independently */
+                for (j = 0; j < 48; j++) {        /* select bits individually     */
+                    if (pcr[pc2[j] - 1]) {        /* check bit that goes to ks[j] */
+                        l= j % 6;                 /* mask it in if it's there     */
+                        ks[j/6] |= bytebit[l] >> 2;
+                    }
+                }
+
+                /* Now convert to odd/even interleaved form for use in F */
+                out[2*i] = ((word32) ks[0] << 24)
+                         | ((word32) ks[2] << 16)
+                         | ((word32) ks[4] << 8)
+                         | ((word32) ks[6]);
+
+                out[2*i + 1] = ((word32) ks[1] << 24)
+                             | ((word32) ks[3] << 16)
+                             | ((word32) ks[5] << 8)
+                             | ((word32) ks[7]);
+            }
+
+            /* reverse key schedule order */
+            if (dir == DES_DECRYPTION) {
+                for (i = 0; i < 16; i += 2) {
+                    word32 swap = out[i];
+                    out[i] = out[DES_KS_SIZE - 2 - i];
+                    out[DES_KS_SIZE - 2 - i] = swap;
+
+                    swap = out[i + 1];
+                    out[i + 1] = out[DES_KS_SIZE - 1 - i];
+                    out[DES_KS_SIZE - 1 - i] = swap;
                 }
             }
 
-            /* Now convert to odd/even interleaved form for use in F */
-            out[2*i] = ((word32) ks[0] << 24)
-                     | ((word32) ks[2] << 16)
-                     | ((word32) ks[4] << 8)
-                     | ((word32) ks[6]);
-
-            out[2*i + 1] = ((word32) ks[1] << 24)
-                         | ((word32) ks[3] << 16)
-                         | ((word32) ks[5] << 8)
-                         | ((word32) ks[7]);
+    #ifdef WOLFSSL_SMALL_STACK
+            XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
         }
 
-        /* reverse key schedule order */
-        if (dir == DES_DECRYPTION) {
-            for (i = 0; i < 16; i += 2) {
-                word32 swap = out[i];
-                out[i] = out[DES_KS_SIZE - 2 - i];
-                out[DES_KS_SIZE - 2 - i] = swap;
-
-                swap = out[i + 1];
-                out[i + 1] = out[DES_KS_SIZE - 1 - i];
-                out[DES_KS_SIZE - 1 - i] = swap;
-            }
-        }
-
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+        return 0;
     }
 
-    return 0;
-}
-
-
-static INLINE int Reverse(int dir)
-{
-    return !dir;
-}
-
-
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
-    wc_Des_SetIV(des, iv);
-
-    return DesSetKey(key, dir, des->key);
-}
-
-
-int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
-{
-    int ret;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-        return NitroxDes3SetKey(des, key, iv);
-    }
-#endif
-
-    ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
-    if (ret != 0)
-        return ret;
-
-    ret = DesSetKey(key + 8, Reverse(dir), des->key[1]);
-    if (ret != 0)
-        return ret;
-
-    ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]);
-    if (ret != 0)
-        return ret;
-
-    return wc_Des3_SetIV(des, iv);
-}
-
-
-static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr)
-{
-    word32 l = *lIn, r = *rIn, i;
-
-    for (i=0; i<8; i++)
+    int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
     {
-        word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
-        l ^= Spbox[6][(work) & 0x3f]
-          ^  Spbox[4][(work >> 8) & 0x3f]
-          ^  Spbox[2][(work >> 16) & 0x3f]
-          ^  Spbox[0][(work >> 24) & 0x3f];
-        work = r ^ kptr[4*i+1];
-        l ^= Spbox[7][(work) & 0x3f]
-          ^  Spbox[5][(work >> 8) & 0x3f]
-          ^  Spbox[3][(work >> 16) & 0x3f]
-          ^  Spbox[1][(work >> 24) & 0x3f];
+        wc_Des_SetIV(des, iv);
 
-        work = rotrFixed(l, 4U) ^ kptr[4*i+2];
-        r ^= Spbox[6][(work) & 0x3f]
-          ^  Spbox[4][(work >> 8) & 0x3f]
-          ^  Spbox[2][(work >> 16) & 0x3f]
-          ^  Spbox[0][(work >> 24) & 0x3f];
-        work = l ^ kptr[4*i+3];
-        r ^= Spbox[7][(work) & 0x3f]
-          ^  Spbox[5][(work >> 8) & 0x3f]
-          ^  Spbox[3][(work >> 16) & 0x3f]
-          ^  Spbox[1][(work >> 24) & 0x3f];
+        return DesSetKey(key, dir, des->key);
     }
 
-    *lIn = l; *rIn = r;
-}
+    int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+    {
+        int ret;
 
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
+            /* key_raw holds orignal key copy */
+            des->key_raw = key;
+            des->iv_raw = iv;
 
-static void DesProcessBlock(Des* des, const byte* in, byte* out)
-{
-    word32 l, r;
+            /* continue on to set normal key for smaller DES operations */
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    XMEMCPY(&l, in, sizeof(l));
-    XMEMCPY(&r, in + sizeof(l), sizeof(r));
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    IPERM(&l,&r);
+        ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
+        if (ret != 0)
+            return ret;
 
-    DesRawProcessBlock(&l, &r, des->key);
+        ret = DesSetKey(key + 8, !dir, des->key[1]);
+        if (ret != 0)
+            return ret;
 
-    FPERM(&l,&r);
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    XMEMCPY(out, &r, sizeof(r));
-    XMEMCPY(out + sizeof(r), &l, sizeof(l));
-}
+        ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]);
+        if (ret != 0)
+            return ret;
 
-
-static void Des3ProcessBlock(Des3* des, const byte* in, byte* out)
-{
-    word32 l, r;
-
-    XMEMCPY(&l, in, sizeof(l));
-    XMEMCPY(&r, in + sizeof(l), sizeof(r));
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    IPERM(&l,&r);
-
-    DesRawProcessBlock(&l, &r, des->key[0]);
-    DesRawProcessBlock(&r, &l, des->key[1]);
-    DesRawProcessBlock(&l, &r, des->key[2]);
-
-    FPERM(&l,&r);
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    XMEMCPY(out, &r, sizeof(r));
-    XMEMCPY(out + sizeof(r), &l, sizeof(l));
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-
-    while (blocks--) {
-        xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
-        DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
-        XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-
-    while (blocks--) {
-        XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
-        DesProcessBlock(des, (byte*)des->tmp, out);
-        xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
-        XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-
-int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-        return NitroxDes3CbcEncrypt(des, out, in, sz);
-    }
-#endif
-
-    blocks = sz / DES_BLOCK_SIZE;
-    while (blocks--) {
-        xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
-        Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
-        XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-        return NitroxDes3CbcDecrypt(des, out, in, sz);
-    }
-#endif
-
-    blocks = sz / DES_BLOCK_SIZE;
-    while (blocks--) {
-        XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
-        Des3ProcessBlock(des, (byte*)des->tmp, out);
-        xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
-        XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-#ifdef WOLFSSL_DES_ECB
-
-/* One block, compatibility only */
-int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-
-    if (des == NULL || out == NULL || in == NULL) {
-        return BAD_FUNC_ARG;
+        return wc_Des3_SetIV(des, iv);
     }
 
-    while (blocks--) {
-        DesProcessBlock(des, in, out);
+    static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr)
+    {
+        word32 l = *lIn, r = *rIn, i;
 
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
+        for (i=0; i<8; i++)
+        {
+            word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
+            l ^= Spbox[6][(work) & 0x3f]
+              ^  Spbox[4][(work >> 8) & 0x3f]
+              ^  Spbox[2][(work >> 16) & 0x3f]
+              ^  Spbox[0][(work >> 24) & 0x3f];
+            work = r ^ kptr[4*i+1];
+            l ^= Spbox[7][(work) & 0x3f]
+              ^  Spbox[5][(work >> 8) & 0x3f]
+              ^  Spbox[3][(work >> 16) & 0x3f]
+              ^  Spbox[1][(work >> 24) & 0x3f];
 
-int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-    /* printf("wc_Des3_EcbEncrypt(%016x, %016x, %d)\n",
-        *(unsigned long *)in, *(unsigned long *)out, sz) ; */
+            work = rotrFixed(l, 4U) ^ kptr[4*i+2];
+            r ^= Spbox[6][(work) & 0x3f]
+              ^  Spbox[4][(work >> 8) & 0x3f]
+              ^  Spbox[2][(work >> 16) & 0x3f]
+              ^  Spbox[0][(work >> 24) & 0x3f];
+            work = l ^ kptr[4*i+3];
+            r ^= Spbox[7][(work) & 0x3f]
+              ^  Spbox[5][(work >> 8) & 0x3f]
+              ^  Spbox[3][(work >> 16) & 0x3f]
+              ^  Spbox[1][(work >> 24) & 0x3f];
+        }
 
-    if (des == NULL || out == NULL || in == NULL) {
-        return BAD_FUNC_ARG;
+        *lIn = l; *rIn = r;
     }
 
-    while (blocks--) {
-        Des3ProcessBlock(des, in, out);
+    static void DesProcessBlock(Des* des, const byte* in, byte* out)
+    {
+        word32 l, r;
 
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
+        XMEMCPY(&l, in, sizeof(l));
+        XMEMCPY(&r, in + sizeof(l), sizeof(r));
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        IPERM(&l,&r);
+
+        DesRawProcessBlock(&l, &r, des->key);
+
+        FPERM(&l,&r);
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        XMEMCPY(out, &r, sizeof(r));
+        XMEMCPY(out + sizeof(r), &l, sizeof(l));
     }
-    return 0;
-}
 
-#endif /* WOLFSSL_DES_ECB */
+    static void Des3ProcessBlock(Des3* des, const byte* in, byte* out)
+    {
+        word32 l, r;
 
-#endif /* End wolfCrypt software implementation */
+        XMEMCPY(&l, in, sizeof(l));
+        XMEMCPY(&r, in + sizeof(l), sizeof(r));
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        IPERM(&l,&r);
+
+        DesRawProcessBlock(&l, &r, des->key[0]);
+        DesRawProcessBlock(&r, &l, des->key[1]);
+        DesRawProcessBlock(&l, &r, des->key[2]);
+
+        FPERM(&l,&r);
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        XMEMCPY(out, &r, sizeof(r));
+        XMEMCPY(out + sizeof(r), &l, sizeof(l));
+    }
+
+    int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks = sz / DES_BLOCK_SIZE;
+
+        while (blocks--) {
+            xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
+            DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
+            XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+    int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks = sz / DES_BLOCK_SIZE;
+
+        while (blocks--) {
+            XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
+            DesProcessBlock(des, (byte*)des->tmp, out);
+            xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+            XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+    int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
+                                            sz >= WC_ASYNC_THRESH_DES3_CBC) {
+        #if defined(HAVE_CAVIUM)
+            return NitroxDes3CbcEncrypt(des, out, in, sz);
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymDes3CbcEncrypt(&des->asyncDev, out, in, sz,
+                des->key_raw, DES3_KEYLEN, (byte*)des->iv_raw, DES3_IVLEN);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &des->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_DES3_CBC_ENCRYPT;
+                testDev->des.des = des;
+                testDev->des.out = out;
+                testDev->des.in = in;
+                testDev->des.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        blocks = sz / DES_BLOCK_SIZE;
+        while (blocks--) {
+            xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
+            Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
+            XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+
+    int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
+                                            sz >= WC_ASYNC_THRESH_DES3_CBC) {
+        #if defined(HAVE_CAVIUM)
+            return NitroxDes3CbcDecrypt(des, out, in, sz);
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymDes3CbcDecrypt(&des->asyncDev, out, in, sz,
+                des->key_raw, DES3_KEYLEN, (byte*)des->iv_raw, DES3_IVLEN);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &des->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_DES3_CBC_DECRYPT;
+                testDev->des.des = des;
+                testDev->des.out = out;
+                testDev->des.in = in;
+                testDev->des.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        blocks = sz / DES_BLOCK_SIZE;
+        while (blocks--) {
+            XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
+            Des3ProcessBlock(des, (byte*)des->tmp, out);
+            xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+            XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+    #ifdef WOLFSSL_DES_ECB
+        /* One block, compatibility only */
+        int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+        {
+            word32 blocks = sz / DES_BLOCK_SIZE;
+
+            if (des == NULL || out == NULL || in == NULL) {
+                return BAD_FUNC_ARG;
+            }
+
+            while (blocks--) {
+                DesProcessBlock(des, in, out);
+
+                out += DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+            }
+            return 0;
+        }
+
+        int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+        {
+            word32 blocks = sz / DES_BLOCK_SIZE;
+            /* printf("wc_Des3_EcbEncrypt(%016x, %016x, %d)\n",
+                *(unsigned long *)in, *(unsigned long *)out, sz) ; */
+
+            if (des == NULL || out == NULL || in == NULL) {
+                return BAD_FUNC_ARG;
+            }
+
+            while (blocks--) {
+                Des3ProcessBlock(des, in, out);
+
+                out += DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+            }
+            return 0;
+        }
+    #endif /* WOLFSSL_DES_ECB */
+
+#endif /* NEED_SOFT_DES */
 
 
 void wc_Des_SetIV(Des* des, const byte* iv)
@@ -1666,7 +1659,6 @@ void wc_Des_SetIV(Des* des, const byte* iv)
         XMEMSET(des->reg,  0, DES_BLOCK_SIZE);
 }
 
-
 int wc_Des3_SetIV(Des3* des, const byte* iv)
 {
     if (des && iv)
@@ -1678,28 +1670,35 @@ int wc_Des3_SetIV(Des3* des, const byte* iv)
 }
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Des3 for use with Nitrox device */
-int wc_Des3AsyncInit(Des3* des3, int devId)
+/* Initialize Des3 for use with async device */
+int wc_Des3Init(Des3* des3, void* heap, int devId)
 {
+    int ret = 0;
     if (des3 == NULL)
         return BAD_FUNC_ARG;
 
-    return wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES, devId);
+    des3->heap = heap;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+    ret = wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES,
+                                                        des3->heap, devId);
+#else
+    (void)devId;
+#endif
+
+    return ret;
 }
 
-
-/* Free Des3 from use with Nitrox device */
-void wc_Des3AsyncFree(Des3* des3)
+/* Free Des3 from use with async device */
+void wc_Des3Free(Des3* des3)
 {
     if (des3 == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&des3->asyncDev);
-}
-
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+    wolfAsync_DevCtxFree(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
 #endif /* WOLFSSL_TI_CRYPT */
 #endif /* HAVE_FIPS */
diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c
old mode 100644
new mode 100755
index a3e2e4619..f67e404d6
--- a/wolfcrypt/src/dh.c
+++ b/wolfcrypt/src/dh.c
@@ -49,26 +49,43 @@
 #endif
 
 
-int wc_InitDhKey(DhKey* key)
+int wc_InitDhKey_ex(DhKey* key, void* heap, int devId)
 {
     int ret = 0;
 
     if (key == NULL)
         return BAD_FUNC_ARG;
 
+    key->heap = heap; /* for XMALLOC/XFREE in future */
+
     if (mp_init_multi(&key->p, &key->g, NULL, NULL, NULL, NULL) != MP_OKAY)
-        ret = MEMORY_E;
+        return MEMORY_E;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+    /* handle as async */
+    ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH,
+        key->heap, devId);
+#else
+    (void)devId;
+#endif
 
     return ret;
 }
 
+int wc_InitDhKey(DhKey* key)
+{
+    return wc_InitDhKey_ex(key, NULL, INVALID_DEVID);
+}
+
 
 void wc_FreeDhKey(DhKey* key)
 {
     if (key) {
-    #ifndef USE_FAST_MATH
         mp_clear(&key->p);
         mp_clear(&key->g);
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+        wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH);
     #endif
     }
 }
@@ -76,15 +93,15 @@ void wc_FreeDhKey(DhKey* key)
 
 /* if defined to not use floating point values do not compile in */
 #ifndef WOLFSSL_DH_CONST
-static word32 DiscreteLogWorkFactor(word32 n)
-{
-    /* assuming discrete log takes about the same time as factoring */
-    if (n<5)
-        return 0;
-    else
-        return (word32)(2.4 * XPOW((double)n, 1.0/3.0) *
-                XPOW(XLOG((double)n), 2.0/3.0) - 5);
-}
+    static word32 DiscreteLogWorkFactor(word32 n)
+    {
+        /* assuming discrete log takes about the same time as factoring */
+        if (n < 5)
+            return 0;
+        else
+            return (word32)(2.4 * XPOW((double)n, 1.0/3.0) *
+                    XPOW(XLOG((double)n), 2.0/3.0) - 5);
+    }
 #endif /* WOLFSSL_DH_CONST*/
 
 
@@ -104,9 +121,9 @@ static word32 DiscreteLogWorkFactor(word32 n)
 #endif
 
 
-static int GeneratePrivate(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz)
+static int GeneratePrivateDh(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz)
 {
-    int ret;
+    int ret = 0;
     word32 sz = mp_unsigned_bin_size(&key->p);
 
     /* Table of predetermined values from the operation
@@ -124,33 +141,31 @@ static int GeneratePrivate(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz)
         case 896:  sz = 49; break;
         case 1024: sz = 52; break;
         default:
-            #ifndef WOLFSSL_DH_CONST
-                /* if using floating points and size of p is not in table */
-                sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
-                                           WOLFSSL_BIT_SIZE + 1);
-                break;
-            #else
-                return BAD_FUNC_ARG;
-            #endif
+        #ifndef WOLFSSL_DH_CONST
+            /* if using floating points and size of p is not in table */
+            sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
+                                       WOLFSSL_BIT_SIZE + 1);
+            break;
+        #else
+            return BAD_FUNC_ARG;
+        #endif
     }
 
     ret = wc_RNG_GenerateBlock(rng, priv, sz);
-    if (ret != 0)
-        return ret;
 
-    priv[0] |= 0x0C;
+    if (ret == 0) {
+        priv[0] |= 0x0C;
+        *privSz = sz;
+    }
 
-    *privSz = sz;
-
-    return 0;
+    return ret;
 }
 
 
-static int GeneratePublic(DhKey* key, const byte* priv, word32 privSz,
-                          byte* pub, word32* pubSz)
+static int GeneratePublicDh(DhKey* key, byte* priv, word32 privSz,
+    byte* pub, word32* pubSz)
 {
     int ret = 0;
-
     mp_int x;
     mp_int y;
 
@@ -175,22 +190,75 @@ static int GeneratePublic(DhKey* key, const byte* priv, word32 privSz,
     return ret;
 }
 
-
-int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz,
-                      byte* pub, word32* pubSz)
+static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng,
+    byte* priv, word32* privSz, byte* pub, word32* pubSz)
 {
     int ret;
 
     if (key == NULL || rng == NULL || priv == NULL || privSz == NULL ||
-            pub == NULL || pubSz == NULL) {
+        pub == NULL || pubSz == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    ret = GeneratePrivate(key, rng, priv, privSz);
+    ret = GeneratePrivateDh(key, rng, priv, privSz);
 
-    return (ret != 0) ? ret : GeneratePublic(key, priv, *privSz, pub, pubSz);
+    return (ret != 0) ? ret : GeneratePublicDh(key, priv, *privSz, pub, pubSz);
 }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+static int wc_DhGenerateKeyPair_Async(DhKey* key, WC_RNG* rng,
+    byte* priv, word32* privSz, byte* pub, word32* pubSz)
+{
+    int ret;
+
+#if defined(HAVE_INTEL_QA)
+    mp_int x;
+
+    ret = mp_init(&x);
+    if (ret != MP_OKAY)
+        return ret;
+
+    ret = GeneratePrivateDh(key, rng, priv, privSz);
+    if (ret == 0)
+        ret = mp_read_unsigned_bin(&x, priv, *privSz);
+    if (ret == MP_OKAY)
+        ret = wc_mp_to_bigint(&x, &x.raw);
+    if (ret == MP_OKAY)
+        ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+    if (ret == MP_OKAY)
+        ret = wc_mp_to_bigint(&key->g, &key->g.raw);
+    if (ret == MP_OKAY)
+        ret = IntelQaDhKeyGen(&key->asyncDev, &key->p.raw, &key->g.raw,
+            &x.raw, pub, pubSz);
+    mp_clear(&x);
+
+#else
+
+    #if defined(HAVE_CAVIUM)
+        /* TODO: Not implemented - use software for now */
+
+    #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_DH_GEN;
+            testDev->dhGen.key = key;
+            testDev->dhGen.rng = rng;
+            testDev->dhGen.priv = priv;
+            testDev->dhGen.privSz = privSz;
+            testDev->dhGen.pub = pub;
+            testDev->dhGen.pubSz = pubSz;
+            return WC_PENDING_E;
+        }
+    #endif
+
+    ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz);
+
+#endif /* HAVE_INTEL_QA */
+
+    return ret;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_DH */
+
 
 /* Check DH Public Key for invalid numbers
  *
@@ -242,11 +310,34 @@ int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz)
 }
 
 
-int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
-            word32 privSz, const byte* otherPub, word32 pubSz)
+int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng,
+    byte* priv, word32* privSz, byte* pub, word32* pubSz)
+{
+    int ret;
+
+    if (key == NULL || rng == NULL || priv == NULL || privSz == NULL ||
+                                                pub == NULL || pubSz == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) {
+        ret = wc_DhGenerateKeyPair_Async(key, rng, priv, privSz, pub, pubSz);
+    }
+    else
+#endif
+    {
+        ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz);
+    }
+
+    return ret;
+}
+
+
+static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
+    const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz)
 {
     int ret = 0;
-
     mp_int x;
     mp_int y;
     mp_int z;
@@ -276,7 +367,65 @@ int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
 
     mp_clear(&z);
     mp_clear(&y);
-    mp_clear(&x);
+    mp_forcezero(&x);
+
+    return ret;
+}
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+static int wc_DhAgree_Async(DhKey* key, byte* agree, word32* agreeSz,
+    const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz)
+{
+    int ret;
+
+#ifdef HAVE_CAVIUM
+    /* TODO: Not implemented - use software for now */
+    ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+
+#elif defined(HAVE_INTEL_QA)
+    ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+    if (ret == MP_OKAY)
+        ret = IntelQaDhAgree(&key->asyncDev, &key->p.raw,
+            agree, agreeSz, priv, privSz, otherPub, pubSz);
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+    WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+    if (testDev->type == ASYNC_TEST_NONE) {
+        testDev->type = ASYNC_TEST_DH_AGREE;
+        testDev->dhAgree.key = key;
+        testDev->dhAgree.agree = agree;
+        testDev->dhAgree.agreeSz = agreeSz;
+        testDev->dhAgree.priv = priv;
+        testDev->dhAgree.privSz = privSz;
+        testDev->dhAgree.otherPub = otherPub;
+        testDev->dhAgree.pubSz = pubSz;
+        return WC_PENDING_E;
+    }
+    ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+#endif
+
+    return ret;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
+            word32 privSz, const byte* otherPub, word32 pubSz)
+{
+    int ret = 0;
+
+    if (key == NULL || agree == NULL || agreeSz == NULL || priv == NULL ||
+                                                            otherPub == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) {
+        ret = wc_DhAgree_Async(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+    }
+    else
+#endif
+    {
+        ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+    }
 
     return ret;
 }
@@ -286,8 +435,9 @@ int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
 int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
                 word32 gSz)
 {
-    if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0)
+    if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0) {
         return BAD_FUNC_ARG;
+    }
 
     /* may have leading 0 */
     if (p[0] == 0) {
@@ -318,6 +468,4 @@ int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
     return 0;
 }
 
-
 #endif /* NO_DH */
-
diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c
old mode 100644
new mode 100755
index 3ba711b28..e256d72b4
--- a/wolfcrypt/src/ecc.c
+++ b/wolfcrypt/src/ecc.c
@@ -1016,6 +1016,9 @@ enum ecc_curve_load_mask {
 #ifdef ECC_CACHE_CURVE
     /* cache (mp_int) of the curve parameters */
     static ecc_curve_spec* ecc_curve_spec_cache[ECC_SET_COUNT];
+    #ifndef SINGLE_THREADED
+        static wolfSSL_Mutex ecc_curve_cache_mutex;
+    #endif
 
     #define DECLARE_CURVE_SPECS(intcount) ecc_curve_spec* curve = NULL;
 #else
@@ -1034,8 +1037,6 @@ static void _wc_ecc_curve_free(ecc_curve_spec* curve)
         return;
     }
 
-    /* don't clear fast math (only normal math uses alloc's) */
-#if !defined(USE_FAST_MATH)
     if (curve->load_mask & ECC_CURVE_FIELD_PRIME)
         mp_clear(curve->prime);
     if (curve->load_mask & ECC_CURVE_FIELD_AF)
@@ -1050,7 +1051,7 @@ static void _wc_ecc_curve_free(ecc_curve_spec* curve)
         mp_clear(curve->Gx);
     if (curve->load_mask & ECC_CURVE_FIELD_GY)
         mp_clear(curve->Gy);
-#endif
+
     curve->load_mask = 0;
 }
 
@@ -1082,6 +1083,11 @@ static int wc_ecc_curve_load_item(const char* src, mp_int** dst,
         curve->load_mask |= mask;
 
         err = mp_read_radix(*dst, src, 16);
+
+    #ifdef HAVE_WOLF_BIGINT
+        if (err == MP_OKAY)
+            err = wc_mp_to_bigint(*dst, &(*dst)->raw);
+    #endif
     }
     return err;
 }
@@ -1091,7 +1097,7 @@ static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
 {
     int ret = 0, x;
     ecc_curve_spec* curve;
-    byte load_items; /* mask of items to load */
+    byte load_items = 0; /* mask of items to load */
 
     if (dp == NULL || pCurve == NULL)
         return BAD_FUNC_ARG;
@@ -1133,8 +1139,16 @@ static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
     }
     curve->dp = dp; /* set dp info */
 
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    ret = wc_LockMutex(&ecc_curve_cache_mutex);
+    if (ret != 0) {
+        return MEMORY_E;
+    }
+#endif
+
     /* determine items to load */
     load_items = (~curve->load_mask & load_mask);
+    curve->load_mask |= load_items;
 
     /* load items */
     x = 0;
@@ -1165,10 +1179,23 @@ static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
         ret = MP_READ_E;
     }
 
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    wc_UnLockMutex(&ecc_curve_cache_mutex);
+#endif
+
     return ret;
 }
 
 #ifdef ECC_CACHE_CURVE
+int wc_ecc_curve_cache_init(void)
+{
+    int ret = 0;
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    ret = wc_InitMutex(&ecc_curve_cache_mutex);
+#endif
+    return ret;
+}
+
 void wc_ecc_curve_cache_free(void)
 {
     int x;
@@ -1181,6 +1208,10 @@ void wc_ecc_curve_cache_free(void)
             ecc_curve_spec_cache[x] = NULL;
         }
     }
+
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    wc_FreeMutex(&ecc_curve_cache_mutex);
+#endif
 }
 #endif /* ECC_CACHE_CURVE */
 
@@ -1225,7 +1256,8 @@ int wc_ecc_set_curve(ecc_key* key, int keysize, int curve_id)
             }
         }
         if (ecc_sets[x].size == 0) {
-            return ECC_BAD_ARG_E;
+            WOLFSSL_MSG("ECC Curve not found");
+            return ECC_CURVE_OID_E;
         }
 
         key->idx = x;
@@ -1279,10 +1311,8 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
        if ( (mp_cmp(P->x, Q->x) == MP_EQ) &&
             (get_digit_count(Q->z) && mp_cmp(P->z, Q->z) == MP_EQ) &&
             (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, &t1) == MP_EQ)) {
-       #ifndef USE_FAST_MATH
            mp_clear(&t1);
            mp_clear(&t2);
-       #endif
           return ecc_projective_dbl_point(P, R, a, modulus, mp);
        }
    }
@@ -1508,11 +1538,10 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
 #endif
 
 done:
-#ifndef USE_FAST_MATH
+
    /* clean up */
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -1570,10 +1599,8 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a,
    z = &rz;
 
    if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) {
-   #ifndef USE_FAST_MATH
        mp_clear(&t1);
        mp_clear(&t2);
-   #endif
        return err;
    }
 #else
@@ -1780,11 +1807,9 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a,
        err = mp_copy(z, R->z);
 #endif
 
-#ifndef USE_FAST_MATH
    /* clean up */
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -1892,10 +1917,8 @@ int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp)
 
 done:
   /* clean up */
-#ifndef USE_FAST_MATH
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -1907,7 +1930,7 @@ done:
     !defined(__cplusplus)
     /* let's use the one we already have */
     extern const wolfssl_word wc_off_on_addr[2];
-#elif defined(ECC_TIMING_RESISTANT)
+#else
     static const wolfssl_word wc_off_on_addr[2] =
     {
     #if defined(WC_64BIT_CPU)
@@ -1979,9 +2002,7 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
        return err;
    }
    if ((err = mp_montgomery_calc_normalization(&mu, modulus)) != MP_OKAY) {
-   #ifndef USE_FAST_MATH
        mp_clear(&mu);
-   #endif
        return err;
    }
 
@@ -1989,9 +2010,7 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
   for (i = 0; i < M_POINTS; i++) {
       M[i] = wc_ecc_new_point_h(heap);
       if (M[i] == NULL) {
-      #ifndef USE_FAST_MATH
          mp_clear(&mu);
-      #endif
          err = MEMORY_E; goto exit;
       }
   }
@@ -2018,10 +2037,8 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
        }
    }
 
-#ifndef USE_FAST_MATH
    /* done with mu */
    mp_clear(&mu);
-#endif
 
 #ifndef ECC_TIMING_RESISTANT
 
@@ -2558,6 +2575,100 @@ int wc_ecc_get_curve_id_from_name(const char* curveName)
     return ecc_sets[curve_idx].id;
 }
 
+/* Compares a curve parameter (hex, from ecc_sets[]) to given input
+ * parameter (byte array) for equality.
+ *
+ * Returns MP_EQ on success, negative on error */
+static int wc_ecc_cmp_param(const char* curveParam,
+                            const byte* param, word32 paramSz)
+{
+    int err = MP_OKAY;
+    mp_int a, b;
+
+    if (param == NULL || curveParam == NULL)
+        return BAD_FUNC_ARG;
+
+    if ((err = mp_init_multi(&a, &b, NULL, NULL, NULL, NULL)) != MP_OKAY)
+        return err;
+
+    if (err == MP_OKAY)
+        err = mp_read_unsigned_bin(&a, param, paramSz);
+
+    if (err == MP_OKAY)
+        err = mp_read_radix(&b, curveParam, 16);
+
+    if (err == MP_OKAY) {
+        if (mp_cmp(&a, &b) != MP_EQ) {
+            err = -1;
+        } else {
+            err = MP_EQ;
+        }
+    }
+
+#ifndef USE_FAST_MATH
+    mp_clear(&a);
+    mp_clear(&b);
+#endif
+
+    return err;
+}
+
+/* Returns the curve id in ecc_sets[] that corresponds to a given set of
+ * curve parameters.
+ *
+ * fieldSize  the field size in bits
+ * prime      prime of the finite field
+ * primeSz    size of prime in octets
+ * Af         first coefficient a of the curve
+ * AfSz       size of Af in octets
+ * Bf         second coefficient b of the curve
+ * BfSz       size of Bf in octets
+ * order      curve order
+ * orderSz    size of curve in octets
+ * Gx         affine x coordinate of base point
+ * GxSz       size of Gx in octets
+ * Gy         affine y coordinate of base point
+ * GySz       size of Gy in octets
+ * cofactor   curve cofactor
+ *
+ * return curve id, from ecc_sets[] on success, negative on error
+ */
+int wc_ecc_get_curve_id_from_params(int fieldSize,
+        const byte* prime, word32 primeSz, const byte* Af, word32 AfSz,
+        const byte* Bf, word32 BfSz, const byte* order, word32 orderSz,
+        const byte* Gx, word32 GxSz, const byte* Gy, word32 GySz, int cofactor)
+{
+    int idx;
+    int curveSz;
+
+    if (prime == NULL || Af == NULL || Bf == NULL || order == NULL ||
+        Gx == NULL || Gy == NULL)
+        return BAD_FUNC_ARG;
+
+    curveSz = (fieldSize + 1) / 8;    /* round up */
+
+    for (idx = 0; ecc_sets[idx].size != 0; idx++) {
+        if (curveSz == ecc_sets[idx].size) {
+            if ((wc_ecc_cmp_param(ecc_sets[idx].prime, prime,
+                            primeSz) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Af, Af, AfSz) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Bf, Bf, BfSz) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].order, order,
+                                  orderSz) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Gx, Gx, GxSz) == MP_EQ) &&
+                (wc_ecc_cmp_param(ecc_sets[idx].Gy, Gy, GySz) == MP_EQ) &&
+                (cofactor == ecc_sets[idx].cofactor)) {
+                    break;
+            }
+        }
+    }
+
+    if (ecc_sets[idx].size == 0)
+        return ECC_CURVE_INVALID;
+
+    return ecc_sets[idx].id;
+}
+
 
 #ifdef HAVE_ECC_DHE
 /**
@@ -2595,31 +2706,12 @@ int wc_ecc_shared_secret(ecc_key* private_key, ecc_key* public_key, byte* out,
       return ECC_BAD_ARG_E;
    }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
-    #ifdef HAVE_CAVIUM
-        /* TODO: Not implemented */
-    #else
-        AsyncCryptTestDev* testDev = &private_key->asyncDev.dev;
-        if (testDev->type == ASYNC_TEST_NONE) {
-            testDev->type = ASYNC_TEST_ECC_SHARED_SEC;
-            testDev->eccSharedSec.private_key = private_key;
-            testDev->eccSharedSec.public_key = public_key;
-            testDev->eccSharedSec.out = out;
-            testDev->eccSharedSec.outLen = outlen;
-            return WC_PENDING_E;
-        }
-    #endif
-    }
-#endif
-
 #ifdef WOLFSSL_ATECC508A
    err = atcatls_ecdh(private_key->slot, public_key->pubkey, out);
    if (err != ATCA_SUCCESS) {
       err = BAD_COND_E;
    }
    *outlen = private_key->dp->size;
-
 #else
    err = wc_ecc_shared_secret_ex(private_key, &public_key->pubkey, out, outlen);
 #endif /* WOLFSSL_ATECC508A */
@@ -2693,6 +2785,48 @@ static int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point,
     return err;
 }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+static int wc_ecc_shared_secret_gen_async(ecc_key* private_key,
+            ecc_point* point, byte* out, word32 *outlen,
+            ecc_curve_spec* curve)
+{
+    int err;
+
+#ifdef HAVE_CAVIUM
+    /* TODO: Not implemented - use software for now */
+    err = wc_ecc_shared_secret_gen_sync(private_key, point, out, outlen, curve);
+
+#elif defined(HAVE_INTEL_QA)
+    /* sync public key x/y */
+    err = wc_ecc_curve_load(private_key->dp, &curve, ECC_CURVE_FIELD_BF);
+    if (err == MP_OKAY)
+        err = wc_mp_to_bigint(&private_key->k, &private_key->k.raw);
+    if (err == MP_OKAY)
+        err = wc_mp_to_bigint(point->x, &point->x->raw);
+    if (err == MP_OKAY)
+        err = wc_mp_to_bigint(point->y, &point->y->raw);
+    if (err == MP_OKAY)
+        err = IntelQaEcdh(&private_key->asyncDev,
+            &private_key->k.raw, &point->x->raw, &point->y->raw,
+            out, outlen,
+            &curve->Af->raw, &curve->Bf->raw, &curve->prime->raw,
+            private_key->dp->cofactor);
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+    WC_ASYNC_TEST* testDev = &private_key->asyncDev.test;
+    if (testDev->type == ASYNC_TEST_NONE) {
+        testDev->type = ASYNC_TEST_ECC_SHARED_SEC;
+        testDev->eccSharedSec.private_key = private_key;
+        testDev->eccSharedSec.public_point = point;
+        testDev->eccSharedSec.out = out;
+        testDev->eccSharedSec.outLen = outlen;
+        return WC_PENDING_E;
+    }
+    err = wc_ecc_shared_secret_gen_sync(private_key, point, out, outlen, curve);
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
 int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point,
                                                     byte* out, word32 *outlen)
@@ -2711,8 +2845,17 @@ int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point,
     if (err != MP_OKAY)
         return err;
 
-    err = wc_ecc_shared_secret_gen_sync(private_key, point,
-        out, outlen, curve);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        err = wc_ecc_shared_secret_gen_async(private_key, point,
+            out, outlen, curve);
+    }
+    else
+#endif
+    {
+        err = wc_ecc_shared_secret_gen_sync(private_key, point,
+            out, outlen, curve);
+    }
 
     wc_ecc_curve_free(curve);
 
@@ -2761,6 +2904,13 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point,
         case ECC_STATE_SHARED_SEC_RES:
             private_key->state = ECC_STATE_SHARED_SEC_RES;
             err = 0;
+        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+            if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+            #if defined(HAVE_CAVIUM) || defined(HAVE_INTEL_QA)
+                err = private_key->asyncDev.event.ret;
+            #endif
+            }
+        #endif
             break;
 
         default:
@@ -2834,11 +2984,6 @@ static int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order)
         }
     }
 
-#ifdef HAVE_WOLF_BIGINT
-    if (err == MP_OKAY)
-        err = wc_mp_to_bigint(k, &k->raw);
-#endif /* HAVE_WOLF_BIGINT */
-
     ForceZero(buf, ECC_MAXSIZE);
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -2860,17 +3005,24 @@ int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id)
         return BAD_FUNC_ARG;
     }
 
+    /* make sure required key variables are reset */
+    key->state = ECC_STATE_NONE;
+    key->idx = 0;
+    key->dp = NULL;
+
     err = wc_ecc_set_curve(key, keysize, curve_id);
     if (err != 0) {
         return err;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
     #ifdef HAVE_CAVIUM
         /* TODO: Not implemented */
+    #elif defined(HAVE_INTEL_QA)
+        /* TODO: Not implemented */
     #else
-        AsyncCryptTestDev* testDev = &key->asyncDev.dev;
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
         if (testDev->type == ASYNC_TEST_NONE) {
             testDev->type = ASYNC_TEST_ECC_MAKE;
             testDev->eccMake.rng = rng;
@@ -2947,7 +3099,7 @@ int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id)
     /* cleanup these on failure case only */
     if (err != MP_OKAY) {
         /* clean up */
-    #if !defined(USE_FAST_MATH) && !defined(ALT_ECC_SIZE)
+    #ifndef ALT_ECC_SIZE
         mp_clear(key->pubkey.x);
         mp_clear(key->pubkey.y);
         mp_clear(key->pubkey.z);
@@ -3034,12 +3186,48 @@ int wc_ecc_make_key(WC_RNG* rng, int keysize, ecc_key* key)
     return wc_ecc_make_key_ex(rng, keysize, key, ECC_CURVE_DEF);
 }
 
+static INLINE int wc_ecc_alloc_rs(ecc_key* key, mp_int** r, mp_int** s)
+{
+    int err = 0;
+
+#ifndef WOLFSSL_ASYNC_CRYPT
+    (void)key;
+#endif
+
+    if (*r == NULL) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        *r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_BIGINT);
+        if (*r == NULL) {
+            return MEMORY_E;
+        }
+        key->r = *r;
+    #endif
+    }
+    if (*s == NULL) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        *s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_BIGINT);
+        if (*s == NULL) {
+            XFREE(*r, key->heap, DYNAMIC_TYPE_BIGINT);
+            return MEMORY_E;
+        }
+        key->s = *s;
+    #endif
+    }
+
+    /* initialize mp_int */
+    if (*r)
+        XMEMSET(*r, 0, sizeof(mp_int));
+    if (*s)
+        XMEMSET(*s, 0, sizeof(mp_int));
+
+    return err;
+}
+
 static INLINE void wc_ecc_free_rs(ecc_key* key, mp_int** r, mp_int** s)
 {
     if (*r) {
-    #ifndef USE_FAST_MATH
         mp_clear(*r);
-    #endif
+
     #ifdef WOLFSSL_ASYNC_CRYPT
         XFREE(*r, key->heap, DYNAMIC_TYPE_BIGINT);
         key->r = NULL;
@@ -3047,9 +3235,8 @@ static INLINE void wc_ecc_free_rs(ecc_key* key, mp_int** r, mp_int** s)
         *r = NULL;
     }
     if (*s) {
-    #ifndef USE_FAST_MATH
         mp_clear(*s);
-    #endif
+
     #ifdef WOLFSSL_ASYNC_CRYPT
         XFREE(*s, key->heap, DYNAMIC_TYPE_BIGINT);
         key->s = NULL;
@@ -3073,6 +3260,7 @@ int wc_ecc_init_ex(ecc_key* key, void* heap, int devId)
 #endif
 
     XMEMSET(key, 0, sizeof(ecc_key));
+    key->state = ECC_STATE_NONE;
 
 #ifdef WOLFSSL_ATECC508A
     key->slot = atmel_ecc_alloc();
@@ -3103,12 +3291,10 @@ int wc_ecc_init_ex(ecc_key* key, void* heap, int devId)
     key->heap = heap;
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (devId != INVALID_DEVID) {
-        /* handle as async */
-        ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC,
-                                                                        devId);
-    }
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    /* handle as async */
+    ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC,
+                                                            key->heap, devId);
 #else
     (void)devId;
 #endif
@@ -3158,49 +3344,16 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
         return ECC_BAD_ARG_E;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
-    #ifdef HAVE_CAVIUM
-        /* TODO: Not implemented */
-    #else
-        AsyncCryptTestDev* testDev = &key->asyncDev.dev;
-        if (testDev->type == ASYNC_TEST_NONE) {
-            testDev->type = ASYNC_TEST_ECC_SIGN;
-            testDev->eccSign.in = in;
-            testDev->eccSign.inSz = inlen;
-            testDev->eccSign.out = out;
-            testDev->eccSign.outSz = outlen;
-            testDev->eccSign.rng = rng;
-            testDev->eccSign.key = key;
-            return WC_PENDING_E;
-        }
-    #endif
-    }
-#endif
-
     switch(key->state) {
         case ECC_STATE_NONE:
         case ECC_STATE_SIGN_DO:
             key->state = ECC_STATE_SIGN_DO;
 
-        #ifdef WOLFSSL_ASYNC_CRYPT
-            if (r == NULL)
-                r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (s == NULL)
-                s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (r == NULL || s == NULL) {
-                err = MEMORY_E; break;
-            }
-            key->r = r;
-            key->s = s;
-        #endif
-            XMEMSET(r, 0, sizeof(mp_int));
-            XMEMSET(s, 0, sizeof(mp_int));
+            err = wc_ecc_alloc_rs(key, &r, &s);
+            if (err != 0)
+                break;
 
-            if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL))
-                                                                   != MP_OKAY) {
+            if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){
                 break;
             }
 
@@ -3244,13 +3397,26 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
         case ECC_STATE_SIGN_ENCODE:
             key->state = ECC_STATE_SIGN_ENCODE;
 
-        #ifdef WOLFSSL_ASYNC_CRYPT
+        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+            /* restore r/s */
             r = key->r;
             s = key->s;
-        #endif
+
+            if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+                /* only do this if not simulator, since it overwrites result */
+                #ifndef WOLFSSL_ASYNC_CRYPT_TEST
+                    wc_bigint_to_mp(&r->raw, r);
+                    wc_bigint_to_mp(&s->raw, s);
+                #endif
+            }
+        #endif /* WOLFSSL_ASYNC_CRYPT */
 
             /* encoded with DSA header */
             err = StoreECC_DSA_Sig(out, outlen, r, s);
+
+            /* always free r/s */
+            mp_clear(r);
+            mp_clear(s);
             break;
 
         default:
@@ -3263,8 +3429,8 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
         return err;
     }
 
+    /* cleanup */
     wc_ecc_free_rs(key, &r, &s);
-
     key->state = ECC_STATE_NONE;
 
     return err;
@@ -3301,6 +3467,23 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
       return ECC_BAD_ARG_E;
    }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+       defined(WOLFSSL_ASYNC_CRYPT_TEST)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_ECC_SIGN;
+            testDev->eccSign.in = in;
+            testDev->eccSign.inSz = inlen;
+            testDev->eccSign.rng = rng;
+            testDev->eccSign.key = key;
+            testDev->eccSign.r = r;
+            testDev->eccSign.s = s;
+            return WC_PENDING_E;
+        }
+    }
+#endif
+
    /* get the hash and load it as a bignum into 'e' */
    /* init the bignums */
    if ((err = mp_init(&e)) != MP_OKAY) {
@@ -3329,6 +3512,47 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
    if (err == MP_OKAY) {
        int loop_check = 0;
        ecc_key pubkey;
+
+   #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+        if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        #ifdef HAVE_CAVIUM
+            /* TODO: Not implemented */
+        #elif defined(HAVE_INTEL_QA)
+           mp_int k;
+
+           err = mp_init(&k);
+           /* make sure r and s are allocated */
+           if (err == MP_OKAY)
+               err = wc_bigint_alloc(&key->r->raw, key->dp->size);
+           if (err == MP_OKAY)
+               err = wc_bigint_alloc(&key->s->raw, key->dp->size);
+           /* load e and k */
+           if (err == MP_OKAY)
+               err = wc_mp_to_bigint(&e, &e.raw);
+           if (err == MP_OKAY)
+               err = wc_mp_to_bigint(&key->k, &key->k.raw);
+           if (err == MP_OKAY)
+               err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+           if (err == MP_OKAY)
+               err = wc_ecc_gen_k(rng, key->dp->size, &k, curve->order);
+           if (err == MP_OKAY)
+               err = wc_mp_to_bigint(&k, &k.raw);
+           if (err == MP_OKAY)
+               err = IntelQaEcdsaSign(&key->asyncDev, &e.raw, &key->k.raw,
+                  &k.raw, &r->raw, &s->raw, &curve->Af->raw, &curve->Bf->raw,
+                  &curve->prime->raw, &curve->order->raw, &curve->Gx->raw,
+                  &curve->Gy->raw);
+
+           mp_clear(&e);
+           mp_clear(&k);
+           wc_ecc_curve_free(curve);
+
+           return err;
+       #endif
+       }
+   #endif /* WOLFSSL_ASYNC_CRYPT */
+
+       /* don't use async for key, since we don't support async return here */
        if (wc_ecc_init_ex(&pubkey, key->heap, INVALID_DEVID) == MP_OKAY) {
            for (;;) {
                if (++loop_check > 64) {
@@ -3344,12 +3568,12 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
                if (err != MP_OKAY) break;
 
                if (mp_iszero(r) == MP_YES) {
-               #ifndef USE_FAST_MATH
+                #ifndef ALT_ECC_SIZE
                    mp_clear(pubkey.pubkey.x);
                    mp_clear(pubkey.pubkey.y);
                    mp_clear(pubkey.pubkey.z);
-                   mp_clear(&pubkey.k);
-               #endif
+                #endif
+                   mp_forcezero(&pubkey.k);
                }
                else {
                    /* find s = (e + xr)/k */
@@ -3379,9 +3603,7 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
        }
    }
 
-#ifndef USE_FAST_MATH
    mp_clear(&e);
-#endif
    wc_ecc_curve_free(curve);
 
    return err;
@@ -3399,10 +3621,8 @@ void wc_ecc_free(ecc_key* key)
         return;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        wolfAsync_DevCtxFree(&key->asyncDev);
-    }
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC);
     wc_ecc_free_rs(key, &key->r, &key->s);
 #endif
 
@@ -3411,11 +3631,10 @@ void wc_ecc_free(ecc_key* key)
    key->slot = -1;
 #else
 
-#ifndef USE_FAST_MATH
     mp_clear(key->pubkey.x);
     mp_clear(key->pubkey.y);
     mp_clear(key->pubkey.z);
-#endif
+
     mp_forcezero(&key->k);
 #endif /* WOLFSSL_ATECC508A */
 }
@@ -3529,10 +3748,8 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
       if (err == MP_OKAY)
         err = mp_mulmod(B->z, &mu, modulus, precomp[1<<2]->z);
 
-    #ifndef USE_FAST_MATH
       /* done with mu */
       mp_clear(&mu);
-    #endif
     }
   }
 
@@ -3685,26 +3902,6 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         return ECC_BAD_ARG_E;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
-    #ifdef HAVE_CAVIUM
-        /* TODO: Not implemented */
-    #else
-        AsyncCryptTestDev* testDev = &key->asyncDev.dev;
-        if (testDev->type == ASYNC_TEST_NONE) {
-            testDev->type = ASYNC_TEST_ECC_VERIFY;
-            testDev->eccVerify.in = sig;
-            testDev->eccVerify.inSz = siglen;
-            testDev->eccVerify.out = hash;
-            testDev->eccVerify.outSz = hashlen;
-            testDev->eccVerify.stat = stat;
-            testDev->eccVerify.key = key;
-            return WC_PENDING_E;
-        }
-    #endif
-    }
-#endif
-
     switch(key->state) {
         case ECC_STATE_NONE:
         case ECC_STATE_VERIFY_DECODE:
@@ -3717,21 +3914,9 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
              * If either of those don't allocate correctly, none of
              * the rest of this function will execute, and everything
              * gets cleaned up at the end. */
-        #ifdef WOLFSSL_ASYNC_CRYPT
-            if (r == NULL)
-                r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (s == NULL)
-                s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (r == NULL || s == NULL) {
-                err = MEMORY_E; break;
-            }
-            key->r = r;
-            key->s = s;
-        #endif
-            XMEMSET(r, 0, sizeof(mp_int));
-            XMEMSET(s, 0, sizeof(mp_int));
+            err = wc_ecc_alloc_rs(key, &r, &s);
+            if (err != 0)
+                break;
 
             /* decode DSA header */
             err = DecodeECC_DSA_Sig(sig, siglen, r, s);
@@ -3743,13 +3928,7 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         case ECC_STATE_VERIFY_DO:
             key->state = ECC_STATE_VERIFY_DO;
 
-        #ifdef WOLFSSL_ASYNC_CRYPT
-            r = key->r;
-            s = key->s;
-        #endif
-
-            err = wc_ecc_verify_hash_ex(r, s, hash, hashlen, stat,
-                                                                           key);
+            err = wc_ecc_verify_hash_ex(r, s, hash, hashlen, stat, key);
             if (err < 0) {
                 break;
             }
@@ -3758,6 +3937,16 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         case ECC_STATE_VERIFY_RES:
             key->state = ECC_STATE_VERIFY_RES;
             err = 0;
+
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            /* restore r/s */
+            r = key->r;
+            s = key->s;
+        #endif
+
+            /* done with R/S */
+            mp_clear(r);
+            mp_clear(s);
             break;
 
         default:
@@ -3770,8 +3959,8 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         return err;
     }
 
+    /* cleanup */
     wc_ecc_free_rs(key, &r, &s);
-
     key->state = ECC_STATE_NONE;
 
     return err;
@@ -3794,6 +3983,7 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
 {
    int           err;
 #ifndef WOLFSSL_ATECC508A
+   int          did_init = 0;
    ecc_point    *mG = NULL, *mQ = NULL;
    mp_int        v;
    mp_int        w;
@@ -3816,6 +4006,23 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
       return ECC_BAD_ARG_E;
    }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+       defined(WOLFSSL_ASYNC_CRYPT_TEST)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_ECC_VERIFY;
+            testDev->eccVerify.r = r;
+            testDev->eccVerify.s = s;
+            testDev->eccVerify.hash = hash;
+            testDev->eccVerify.hashlen = hashlen;
+            testDev->eccVerify.stat = stat;
+            testDev->eccVerify.key = key;
+            return WC_PENDING_E;
+        }
+    }
+#endif
+
 #ifdef WOLFSSL_ATECC508A
     /* Extract R and S */
     err = mp_to_unsigned_bin(r, &sigRS[0]);
@@ -3865,9 +4072,38 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
            mp_rshb(&e, WOLFSSL_BIT_SIZE - (orderBits & 0x7));
    }
 
+   /* check for async hardware acceleration */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+   if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+   #ifdef HAVE_CAVIUM
+      /* TODO: Not implemented */
+   #elif defined(HAVE_INTEL_QA)
+      err = wc_mp_to_bigint(&e, &e.raw);
+      if (err == MP_OKAY)
+          err = wc_mp_to_bigint(key->pubkey.x, &key->pubkey.x->raw);
+      if (err == MP_OKAY)
+          err = wc_mp_to_bigint(key->pubkey.y, &key->pubkey.y->raw);
+      if (err == MP_OKAY)
+          err = IntelQaEcdsaVerify(&key->asyncDev, &e.raw, &key->pubkey.x->raw,
+                &key->pubkey.y->raw, &r->raw, &s->raw, &curve->Af->raw,
+                &curve->Bf->raw, &curve->prime->raw, &curve->order->raw,
+                &curve->Gx->raw, &curve->Gy->raw, stat);
+
+      mp_clear(&e);
+
+      wc_ecc_curve_free(curve);
+
+      return err;
+   #endif
+   }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
    /* allocate ints */
-   if ((err = mp_init_multi(&v, &w, &u1, &u2, NULL, NULL)) != MP_OKAY) {
-      err = MEMORY_E;
+   if (err == MP_OKAY) {
+       if ((err = mp_init_multi(&v, &w, &u1, &u2, NULL, NULL)) != MP_OKAY) {
+          err = MEMORY_E;
+       }
+       did_init = 1;
    }
 
    /* allocate points */
@@ -3958,13 +4194,13 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
    wc_ecc_del_point_h(mG, key->heap);
    wc_ecc_del_point_h(mQ, key->heap);
 
-#ifndef USE_FAST_MATH
    mp_clear(&e);
-   mp_clear(&v);
-   mp_clear(&w);
-   mp_clear(&u1);
-   mp_clear(&u2);
-#endif
+   if (did_init) {
+       mp_clear(&v);
+       mp_clear(&w);
+       mp_clear(&u1);
+       mp_clear(&u2);
+   }
 
    wc_ecc_curve_free(curve);
 
@@ -4025,9 +4261,8 @@ int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
 
 #ifdef HAVE_COMP_KEY
     if (err == MP_OKAY && compressed == 1) {   /* build y */
-        mp_int t1, t2;
         int did_init = 0;
-
+        mp_int t1, t2;
         DECLARE_CURVE_SPECS(3)
 
         if (mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL) != MP_OKAY)
@@ -4038,7 +4273,8 @@ int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
         /* load curve info */
         if (err == MP_OKAY)
             err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve,
-                (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF | ECC_CURVE_FIELD_BF));
+                (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF |
+                    ECC_CURVE_FIELD_BF));
 
         /* compute x^3 */
         if (err == MP_OKAY)
@@ -4072,10 +4308,8 @@ int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
         }
 
         if (did_init) {
-        #ifndef USE_FAST_MATH
             mp_clear(&t2);
             mp_clear(&t1);
-        #endif
         }
 
         wc_ecc_curve_free(curve);
@@ -4355,10 +4589,8 @@ int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime)
        }
    }
 
-#ifndef USE_FAST_MATH
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -4596,8 +4828,8 @@ int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key,
         alt_fp_init(key->pubkey.z);
         err = mp_init(&key->k);
     #else
-        err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z, &key->k,
-            NULL, NULL);
+        err = mp_init_multi(&key->k,
+                    key->pubkey.x, key->pubkey.y, key->pubkey.z, NULL, NULL);
     #endif
     if (err != MP_OKAY)
         return MEMORY_E;
@@ -4683,10 +4915,8 @@ int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key,
         }
 
         if (did_init) {
-    #ifndef USE_FAST_MATH
             mp_clear(&t2);
             mp_clear(&t1);
-    #endif
         }
 
         wc_ecc_curve_free(curve);
@@ -4859,7 +5089,6 @@ int wc_ecc_import_private_key_ex(const byte* priv, word32 privSz,
                                  int curve_id)
 {
     int ret;
-    void* heap;
 
     /* public optional, NULL if only importing private */
     if (pub != NULL) {
@@ -4871,18 +5100,13 @@ int wc_ecc_import_private_key_ex(const byte* priv, word32 privSz,
         if (key == NULL || priv == NULL)
             return BAD_FUNC_ARG;
 
-        /* init key */
-        heap = key->heap;
-        ret = wc_ecc_init_ex(key, NULL, INVALID_DEVID);
-        key->heap = heap;
-
+        /* make sure required key variables are reset */
         key->state = ECC_STATE_NONE;
-
-        if (ret != 0)
-            return ret;
+        key->idx = 0;
+        key->dp = NULL;
 
         /* set key size */
-        ret = wc_ecc_set_curve(key, privSz-1, curve_id);
+        ret = wc_ecc_set_curve(key, privSz, curve_id);
     }
 
     if (ret != 0)
@@ -4952,10 +5176,8 @@ int wc_ecc_rs_to_sig(const char* r, const char* s, byte* out, word32* outlen)
             err = MP_ZERO_E;
     }
 
-#ifndef USE_FAST_MATH
     mp_clear(&rtmp);
     mp_clear(&stmp);
-#endif
 
     return err;
 }
@@ -5008,10 +5230,8 @@ int wc_ecc_sig_to_rs(const byte* sig, word32 sigLen, byte* r, word32* rLen,
         }
     }
 
-#ifndef USE_FAST_MATH
     mp_clear(&rtmp);
     mp_clear(&stmp);
-#endif
 
     return err;
 }
@@ -5055,8 +5275,8 @@ static int wc_ecc_import_raw_private(ecc_key* key, const char* qx,
     alt_fp_init(key->pubkey.z);
     err = mp_init(&key->k);
 #else
-    err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z, &key->k,
-                      NULL, NULL);
+    err = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z,
+                                                                  NULL, NULL);
 #endif
     if (err != MP_OKAY)
         return MEMORY_E;
@@ -5962,9 +6182,7 @@ static int build_lut(int idx, mp_int* a, mp_int* modulus, mp_digit mp,
          mp_clear(fp_cache[idx].LUT[x]->z);
    }
 
-#ifndef USE_FAST_MATH
    mp_clear(&tmp);
-#endif
 
    if (err == MP_OKAY)
      return MP_OKAY;
@@ -6118,10 +6336,8 @@ static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* a,
 
 done:
    /* cleanup */
-#ifndef USE_FAST_MATH
    mp_clear(&order);
    mp_clear(&tk);
-#endif
 
 #ifdef WOLFSSL_SMALL_STACK
    XFREE(kb, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -6342,11 +6558,9 @@ static int accel_fp_mul2add(int idx1, int idx2,
 
 done:
    /* cleanup */
-#ifndef USE_FAST_MATH
    mp_clear(&tkb);
    mp_clear(&tka);
    mp_clear(&order);
-#endif
 
    if (kb[0])
       ForceZero(kb[0], KB_SIZE);
@@ -6485,9 +6699,7 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
 #ifndef HAVE_THREAD_LS
     wc_UnLockMutex(&ecc_fp_lock);
 #endif /* HAVE_THREAD_LS */
-#ifndef USE_FAST_MATH
     mp_clear(&mu);
-#endif
 
     return err;
 }
@@ -6575,9 +6787,7 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a,
 #ifndef HAVE_THREAD_LS
     wc_UnLockMutex(&ecc_fp_lock);
 #endif /* HAVE_THREAD_LS */
-#ifndef USE_FAST_MATH
     mp_clear(&mu);
-#endif
 
     return err;
 }
@@ -6994,16 +7204,17 @@ int wc_ecc_encrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
            case ecHMAC_SHA256:
                {
                    Hmac hmac;
-                   ret = wc_HmacSetKey(&hmac, SHA256, macKey, SHA256_DIGEST_SIZE);
-                   if (ret != 0)
-                       break;
-                   ret = wc_HmacUpdate(&hmac, out, msgSz);
-                   if (ret != 0)
-                       break;
-                   ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
-                   if (ret != 0)
-                       break;
-                   ret = wc_HmacFinal(&hmac, out+msgSz);
+                   ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+                   if (ret == 0) {
+                       ret = wc_HmacSetKey(&hmac, SHA256, macKey, SHA256_DIGEST_SIZE);
+                       if (ret == 0)
+                           ret = wc_HmacUpdate(&hmac, out, msgSz);
+                       if (ret == 0)
+                           ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
+                       if (ret == 0)
+                           ret = wc_HmacFinal(&hmac, out+msgSz);
+                       wc_HmacFree(&hmac);
+                   }
                }
                break;
 
@@ -7125,25 +7336,28 @@ int wc_ecc_decrypt(ecc_key* privKey, ecc_key* pubKey, const byte* msg,
 
        switch (ctx->macAlgo) {
            case ecHMAC_SHA256:
-               {
-                   byte verify[SHA256_DIGEST_SIZE];
-                   Hmac hmac;
+           {
+               byte verify[SHA256_DIGEST_SIZE];
+               Hmac hmac;
+
+               ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+               if (ret == 0) {
                    ret = wc_HmacSetKey(&hmac, SHA256, macKey, SHA256_DIGEST_SIZE);
-                   if (ret != 0)
-                       break;
-                   ret = wc_HmacUpdate(&hmac, msg, msgSz-digestSz);
-                   if (ret != 0)
-                       break;
-                   ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
-                   if (ret != 0)
-                       break;
-                   ret = wc_HmacFinal(&hmac, verify);
-                   if (ret != 0)
-                       break;
-                   if (XMEMCMP(verify, msg + msgSz - digestSz, digestSz) != 0)
-                       ret = -1;
+                   if (ret == 0)
+                       ret = wc_HmacUpdate(&hmac, msg, msgSz-digestSz);
+                   if (ret == 0)
+                       ret = wc_HmacUpdate(&hmac, ctx->macSalt, ctx->macSaltSz);
+                   if (ret == 0)
+                       ret = wc_HmacFinal(&hmac, verify);
+                   if (ret == 0) {
+                      if (XMEMCMP(verify, msg + msgSz - digestSz, digestSz) != 0)
+                          ret = -1;
+                   }
+
+                   wc_HmacFree(&hmac);
                }
                break;
+           }
 
            default:
                ret = BAD_FUNC_ARG;
@@ -7298,10 +7512,8 @@ int mp_jacobi(mp_int* a, mp_int* n, int* c)
 
 done:
   /* cleanup */
-#ifndef USE_FAST_MATH
   mp_clear(&n1);
   mp_clear(&a1);
-#endif
 
   return res;
 }
@@ -7480,7 +7692,6 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
     }
   }
 
-#ifndef USE_FAST_MATH
   /* done */
   mp_clear(&t1);
   mp_clear(&C);
@@ -7491,7 +7702,6 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
   mp_clear(&T);
   mp_clear(&R);
   mp_clear(&two);
-#endif
 
   return res;
 }
@@ -7713,51 +7923,4 @@ int wc_X963_KDF(enum wc_HashType type, const byte* secret, word32 secretSz,
 }
 #endif /* HAVE_X963_KDF */
 
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-int wc_ecc_async_handle(ecc_key* key, WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
-{
-    int ret;
-
-    if (key == NULL || queue == NULL || event == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* make sure this ECC context had "wc_EccAsyncInit" called on it */
-    if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC) {
-        return ASYNC_INIT_E;
-    }
-
-    /* setup the event and push to queue */
-    ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-    if (ret == 0) {
-        ret = wolfEventQueue_Push(queue, event);
-    }
-
-    /* check for error (helps with debugging) */
-    if (ret != 0) {
-        WOLFSSL_MSG("wc_EccAsyncHandle failed");
-    }
-    return ret;
-}
-
-int wc_ecc_async_wait(int ret, ecc_key* key)
-{
-    if (ret == WC_PENDING_E) {
-        WOLF_EVENT event;
-        XMEMSET(&event, 0, sizeof(event));
-        ret = wolfAsync_EventInit(&event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret = wolfAsync_EventWait(&event);
-            if (ret == 0 && event.ret >= 0) {
-                ret = event.ret;
-            }
-        }
-    }
-    return ret;
-}
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 #endif /* HAVE_ECC */
diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c
index 796d9e553..b5b578d5a 100644
--- a/wolfcrypt/src/error.c
+++ b/wolfcrypt/src/error.c
@@ -422,6 +422,9 @@ const char* wc_GetErrorString(int error)
     case BAD_PATH_ERROR:
         return "Bad path for opendir error";
 
+    case ASYNC_OP_E:
+        return "Async operation error";
+
     default:
         return "unknown error number";
 
diff --git a/wolfcrypt/src/fe_operations.c b/wolfcrypt/src/fe_operations.c
index a47ff3cfb..285f6c0cf 100755
--- a/wolfcrypt/src/fe_operations.c
+++ b/wolfcrypt/src/fe_operations.c
@@ -41,6 +41,9 @@
     #include 
 #endif
 
+#ifdef HAVE___UINT128_T
+#include "fe_x25519_128.i"
+#else
 /*
 fe means field element.
 Here the field is \Z/(2^255-19).
@@ -1407,6 +1410,7 @@ void fe_cmov(fe f, const fe g, int b)
   f[8] = f8 ^ x8;
   f[9] = f9 ^ x9;
 }
+#endif
 #endif /* HAVE ED25519 or CURVE25519 */
 #endif /* not defined CURVED25519_SMALL */
 
diff --git a/wolfcrypt/src/fe_x25519_128.i b/wolfcrypt/src/fe_x25519_128.i
new file mode 100644
index 000000000..d7297a260
--- /dev/null
+++ b/wolfcrypt/src/fe_x25519_128.i
@@ -0,0 +1,612 @@
+/* fp_mont_small.i
+ *
+ * Copyright (C) 2006-2017 wolfSSL Inc.
+ *
+ * This file is part of wolfSSL.
+ *
+ * wolfSSL is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * wolfSSL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
+ */
+
+/* Convert a number represented as an array of bytes to an array of words with
+ * 51-bits of data in each word.
+ *
+ * in   An array of bytes.
+ * out  An array of words.
+ */
+void fe_frombytes(fe out, const unsigned char *in)
+{
+    out[0] = (((int64_t)((in[ 0]      )       ))      )
+           | (((int64_t)((in[ 1]      )       )) <<  8)
+           | (((int64_t)((in[ 2]      )       )) << 16)
+           | (((int64_t)((in[ 3]      )       )) << 24)
+           | (((int64_t)((in[ 4]      )       )) << 32)
+           | (((int64_t)((in[ 5]      )       )) << 40)
+           | (((int64_t)((in[ 6]      ) & 0x07)) << 48);
+    out[1] = (((int64_t)((in[ 6] >>  3) & 0x1f))      )
+           | (((int64_t)((in[ 7]      )       )) <<  5)
+           | (((int64_t)((in[ 8]      )       )) << 13)
+           | (((int64_t)((in[ 9]      )       )) << 21)
+           | (((int64_t)((in[10]      )       )) << 29)
+           | (((int64_t)((in[11]      )       )) << 37)
+           | (((int64_t)((in[12]      ) & 0x3f)) << 45);
+    out[2] = (((int64_t)((in[12] >>  6) & 0x03))      )
+           | (((int64_t)((in[13]      )       )) <<  2)
+           | (((int64_t)((in[14]      )       )) << 10)
+           | (((int64_t)((in[15]      )       )) << 18)
+           | (((int64_t)((in[16]      )       )) << 26)
+           | (((int64_t)((in[17]      )       )) << 34)
+           | (((int64_t)((in[18]      )       )) << 42)
+           | (((int64_t)((in[19]      ) & 0x01)) << 50);
+    out[3] = (((int64_t)((in[19] >>  1) & 0x7f))      )
+           | (((int64_t)((in[20]      )       )) <<  7)
+           | (((int64_t)((in[21]      )       )) << 15)
+           | (((int64_t)((in[22]      )       )) << 23)
+           | (((int64_t)((in[23]      )       )) << 31)
+           | (((int64_t)((in[24]      )       )) << 39)
+           | (((int64_t)((in[25]      ) & 0x0f)) << 47);
+    out[4] = (((int64_t)((in[25] >>  4) & 0x0f))      )
+           | (((int64_t)((in[26]      )       )) <<  4)
+           | (((int64_t)((in[27]      )       )) << 12)
+           | (((int64_t)((in[28]      )       )) << 20)
+           | (((int64_t)((in[29]      )       )) << 28)
+           | (((int64_t)((in[30]      )       )) << 36)
+           | (((int64_t)((in[31]      ) & 0x7f)) << 44);
+}
+
+/* Convert a number represented as an array of words to an array of bytes.
+ * The array of words is normalized to an array of 51-bit data words and if
+ * greater than the mod, modulo reduced by the prime 2^255 - 1.
+ *
+ * n    An array of words.
+ * out  An array of bytes.
+ */
+void fe_tobytes(unsigned char *out, const fe n)
+{
+    fe      in;
+    int64_t c;
+
+    in[0] = n[0];
+    in[1] = n[1];
+    in[2] = n[2];
+    in[3] = n[3];
+    in[4] = n[4];
+
+    /* Normalize to 51-bits of data per word. */
+    in[0] += (in[4] >> 51) * 19; in[4] &= 0x7ffffffffffff;
+
+    in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
+    in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
+    in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
+    in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
+    in[0] += (in[4] >> 51) * 19;
+    in[4] &= 0x7ffffffffffff;
+
+    c = (in[0] + 19) >> 51;
+    c = (in[1] + c) >> 51;
+    c = (in[2] + c) >> 51;
+    c = (in[3] + c) >> 51;
+    c = (in[4] + c) >> 51;
+    in[0] += c * 19;
+    in[1] += in[0] >> 51; in[0] &= 0x7ffffffffffff;
+    in[2] += in[1] >> 51; in[1] &= 0x7ffffffffffff;
+    in[3] += in[2] >> 51; in[2] &= 0x7ffffffffffff;
+    in[4] += in[3] >> 51; in[3] &= 0x7ffffffffffff;
+    in[4] &= 0x7ffffffffffff;
+
+    out[ 0] = (((byte)((in[0]      )       ))      );
+    out[ 1] = (((byte)((in[0] >>  8)       ))      );
+    out[ 2] = (((byte)((in[0] >> 16)       ))      );
+    out[ 3] = (((byte)((in[0] >> 24)       ))      );
+    out[ 4] = (((byte)((in[0] >> 32)       ))      );
+    out[ 5] = (((byte)((in[0] >> 40)       ))      );
+    out[ 6] = (((byte)((in[0] >> 48) & 0x07))      )
+            | (((byte)((in[1]      ) & 0x1f)) <<  3);
+    out[ 7] = (((byte)((in[1] >>  5)       ))      );
+    out[ 8] = (((byte)((in[1] >> 13)       ))      );
+    out[ 9] = (((byte)((in[1] >> 21)       ))      );
+    out[10] = (((byte)((in[1] >> 29)       ))      );
+    out[11] = (((byte)((in[1] >> 37)       ))      );
+    out[12] = (((byte)((in[1] >> 45) & 0x3f))      )
+            | (((byte)((in[2]      ) & 0x03)) <<  6);
+    out[13] = (((byte)((in[2] >>  2)       ))      );
+    out[14] = (((byte)((in[2] >> 10)       ))      );
+    out[15] = (((byte)((in[2] >> 18)       ))      );
+    out[16] = (((byte)((in[2] >> 26)       ))      );
+    out[17] = (((byte)((in[2] >> 34)       ))      );
+    out[18] = (((byte)((in[2] >> 42)       ))      );
+    out[19] = (((byte)((in[2] >> 50) & 0x01))      )
+            | (((byte)((in[3]      ) & 0x7f)) <<  1);
+    out[20] = (((byte)((in[3] >>  7)       ))      );
+    out[21] = (((byte)((in[3] >> 15)       ))      );
+    out[22] = (((byte)((in[3] >> 23)       ))      );
+    out[23] = (((byte)((in[3] >> 31)       ))      );
+    out[24] = (((byte)((in[3] >> 39)       ))      );
+    out[25] = (((byte)((in[3] >> 47) & 0x0f))      )
+            | (((byte)((in[4]      ) & 0x0f)) <<  4);
+    out[26] = (((byte)((in[4] >>  4)       ))      );
+    out[27] = (((byte)((in[4] >> 12)       ))      );
+    out[28] = (((byte)((in[4] >> 20)       ))      );
+    out[29] = (((byte)((in[4] >> 28)       ))      );
+    out[30] = (((byte)((in[4] >> 36)       ))      );
+    out[31] = (((byte)((in[4] >> 44) & 0x7f))      );
+}
+
+/* Set the field element to 1.
+ *
+ * n  The field element number.
+ */
+void fe_1(fe n)
+{
+    n[0] = 0x0000000000001;
+    n[1] = 0x0000000000000;
+    n[2] = 0x0000000000000;
+    n[3] = 0x0000000000000;
+    n[4] = 0x0000000000000;
+}
+
+/* Set the field element to 0.
+ *
+ * n  The field element number.
+ */
+void fe_0(fe n)
+{
+    n[0] = 0x0000000000000;
+    n[1] = 0x0000000000000;
+    n[2] = 0x0000000000000;
+    n[3] = 0x0000000000000;
+    n[4] = 0x0000000000000;
+}
+
+/* Copy field element a into field element r.
+ *
+ * r  Field element to copy into.
+ * a  Field element to copy.
+ */
+void fe_copy(fe r, const fe a)
+{
+    r[0] = a[0];
+    r[1] = a[1];
+    r[2] = a[2];
+    r[3] = a[3];
+    r[4] = a[4];
+}
+
+/* Constant time, conditional swap of field elements a and b.
+ *
+ * a  A field element.
+ * b  A field element.
+ * c  If 1 then swap and if 0 then don't swap.
+ */
+void fe_cswap(fe a, fe b, int c)
+{
+    int64_t m = c;
+    int64_t t0, t1, t2, t3, t4;
+
+    /* Convert conditional into mask. */
+    m = -m;
+    t0 = m & (a[0] ^ b[0]);
+    t1 = m & (a[1] ^ b[1]);
+    t2 = m & (a[2] ^ b[2]);
+    t3 = m & (a[3] ^ b[3]);
+    t4 = m & (a[4] ^ b[4]);
+
+    a[0] ^= t0;
+    a[1] ^= t1;
+    a[2] ^= t2;
+    a[3] ^= t3;
+    a[4] ^= t4;
+
+    b[0] ^= t0;
+    b[1] ^= t1;
+    b[2] ^= t2;
+    b[3] ^= t3;
+    b[4] ^= t4;
+}
+
+/* Subtract b from a into r. (r = a - b)
+ *
+ * r  A field element.
+ * a  A field element.
+ * b  A field element.
+ */
+void fe_sub(fe r, const fe a, const fe b)
+{
+    r[0] = a[0] - b[0];
+    r[1] = a[1] - b[1];
+    r[2] = a[2] - b[2];
+    r[3] = a[3] - b[3];
+    r[4] = a[4] - b[4];
+}
+
+/* Add b to a into r. (r = a + b)
+ *
+ * r  A field element.
+ * a  A field element.
+ * b  A field element.
+ */
+void fe_add(fe r, const fe a, const fe b)
+{
+    r[0] = a[0] + b[0];
+    r[1] = a[1] + b[1];
+    r[2] = a[2] + b[2];
+    r[3] = a[3] + b[3];
+    r[4] = a[4] + b[4];
+}
+
+/* Multiply a and b into r. (r = a * b)
+ *
+ * r  A field element.
+ * a  A field element.
+ * b  A field element.
+ */
+void fe_mul(fe r, const fe a, const fe b)
+{
+    __int128_t t0 = ((__int128_t)a[0]) * b[0];
+    __int128_t t1 = ((__int128_t)a[0]) * b[1]
+                  + ((__int128_t)a[1]) * b[0];
+    __int128_t t2 = ((__int128_t)a[0]) * b[2]
+                  + ((__int128_t)a[1]) * b[1]
+                  + ((__int128_t)a[2]) * b[0];
+    __int128_t t3 = ((__int128_t)a[0]) * b[3]
+                  + ((__int128_t)a[1]) * b[2]
+                  + ((__int128_t)a[2]) * b[1]
+                  + ((__int128_t)a[3]) * b[0];
+    __int128_t t4 = ((__int128_t)a[0]) * b[4]
+                  + ((__int128_t)a[1]) * b[3]
+                  + ((__int128_t)a[2]) * b[2]
+                  + ((__int128_t)a[3]) * b[1]
+                  + ((__int128_t)a[4]) * b[0];
+    __int128_t t5 = ((__int128_t)a[1]) * b[4]
+                  + ((__int128_t)a[2]) * b[3]
+                  + ((__int128_t)a[3]) * b[2]
+                  + ((__int128_t)a[4]) * b[1];
+    __int128_t t6 = ((__int128_t)a[2]) * b[4]
+                  + ((__int128_t)a[3]) * b[3]
+                  + ((__int128_t)a[4]) * b[2];
+    __int128_t t7 = ((__int128_t)a[3]) * b[4]
+                  + ((__int128_t)a[4]) * b[3];
+    __int128_t t8 = ((__int128_t)a[4]) * b[4];
+
+    /* Modulo reduce double long word. */
+    t0 += t5 * 19;
+    t1 += t6 * 19;
+    t2 += t7 * 19;
+    t3 += t8 * 19;
+
+    /* Normalize to 51-bits of data per word. */
+    t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+    r[0] += (t4 >> 51) * 19;
+    r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Square a and put result in r. (r = a * a)
+ *
+ * r  A field element.
+ * a  A field element.
+ * b  A field element.
+ */
+void fe_sq(fe r, const fe a)
+{
+    __int128_t t0 = ((__int128_t)a[0]) * a[0];
+    __int128_t t1 = ((__int128_t)a[0]) * a[1] * 2;
+    __int128_t t2 = ((__int128_t)a[0]) * a[2] * 2
+                  + ((__int128_t)a[1]) * a[1];
+    __int128_t t3 = ((__int128_t)a[0]) * a[3] * 2
+                  + ((__int128_t)a[1]) * a[2] * 2;
+    __int128_t t4 = ((__int128_t)a[0]) * a[4] * 2
+                  + ((__int128_t)a[1]) * a[3] * 2
+                  + ((__int128_t)a[2]) * a[2];
+    __int128_t t5 = ((__int128_t)a[1]) * a[4] * 2
+                  + ((__int128_t)a[2]) * a[3] * 2;
+    __int128_t t6 = ((__int128_t)a[2]) * a[4] * 2
+                  + ((__int128_t)a[3]) * a[3];
+    __int128_t t7 = ((__int128_t)a[3]) * a[4] * 2;
+    __int128_t t8 = ((__int128_t)a[4]) * a[4];
+
+    /* Modulo reduce double long word. */
+    t0 += t5 * 19;
+    t1 += t6 * 19;
+    t2 += t7 * 19;
+    t3 += t8 * 19;
+
+    /* Normalize to 51-bits of data per word. */
+    t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+    r[0] += (t4 >> 51) * 19;
+    r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Multiply a by 121666 and put result in r. (r = 121666 * a)
+ *
+ * r  A field element.
+ * a  A field element.
+ * b  A field element.
+ */
+void fe_mul121666(fe r, fe a)
+{
+    __int128_t t0 = ((__int128_t)a[0]) * (int64_t)121666;
+    __int128_t t1 = ((__int128_t)a[1]) * (int64_t)121666;
+    __int128_t t2 = ((__int128_t)a[2]) * (int64_t)121666;
+    __int128_t t3 = ((__int128_t)a[3]) * (int64_t)121666;
+    __int128_t t4 = ((__int128_t)a[4]) * (int64_t)121666;
+
+    /* Normalize to 51-bits of data per word. */
+    t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+    r[0] += (t4 >> 51) * 19;
+    r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Find the inverse of a modulo 2^255 - 1 and put result in r.
+ * (r * a) mod (2^255 - 1) = 1
+ * Implementation is constant time.
+ *
+ * r  A field element.
+ * a  A field element.
+ */
+void fe_invert(fe r, const fe a)
+{
+    fe  t0, t1, t2, t3;
+    int i;
+
+    /* a ^ (2^255 - 21) */
+    fe_sq(t0,  a); for (i = 1; i <   1; ++i) fe_sq(t0, t0);
+    fe_sq(t1, t0); for (i = 1; i <   2; ++i) fe_sq(t1, t1); fe_mul(t1,  a, t1);
+    fe_mul(t0, t0, t1);
+    fe_sq(t2, t0); for (i = 1; i <   1; ++i) fe_sq(t2, t2); fe_mul(t1, t1, t2);
+    fe_sq(t2, t1); for (i = 1; i <   5; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+    fe_sq(t2, t1); for (i = 1; i <  10; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
+    fe_sq(t3, t2); for (i = 1; i <  20; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
+    fe_sq(t2, t2); for (i = 1; i <  10; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+    fe_sq(t2, t1); for (i = 1; i <  50; ++i) fe_sq(t2, t2); fe_mul(t2, t2, t1);
+    fe_sq(t3, t2); for (i = 1; i < 100; ++i) fe_sq(t3, t3); fe_mul(t2, t3, t2);
+    fe_sq(t2, t2); for (i = 1; i <  50; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+    fe_sq(t1, t1); for (i = 1; i <   5; ++i) fe_sq(t1, t1); fe_mul( r, t1, t0);
+}
+
+/* Scalar multiply the field element a by n using Montgomery Ladder and places
+ * result in r.
+ *
+ * r  A field element as an array of bytes.
+ * n  The scalar as an array of bytes.
+ * a  A field element as an array of bytes.
+ */
+int curve25519(byte* r, byte* n, byte* a)
+{
+    fe           x1, x2, z2, x3, z3;
+    fe           t0, t1;
+    int          pos;
+    unsigned int swap;
+    unsigned int b;
+
+    fe_frombytes(x1, a);
+    fe_1(x2);
+    fe_0(z2);
+    fe_copy(x3, x1);
+    fe_1(z3);
+
+    swap = 0;
+    for (pos = 254;pos >= 0;--pos) {
+        b = n[pos / 8] >> (pos & 7);
+        b &= 1;
+        swap ^= b;
+        fe_cswap(x2, x3, swap);
+        fe_cswap(z2, z3, swap);
+        swap = b;
+
+        fe_sub(t0, x3, z3);
+        fe_sub(t1, x2, z2);
+        fe_add(x2, x2, z2);
+        fe_add(z2, x3, z3);
+        fe_mul(z3, t0, x2);
+        fe_mul(z2, z2, t1);
+        fe_sq(t0, t1);
+        fe_sq(t1, x2);
+        fe_add(x3, z3, z2);
+        fe_sub(z2, z3, z2);
+        fe_mul(x2, t1, t0);
+        fe_sub(t1, t1, t0);
+        fe_sq(z2, z2);
+        fe_mul121666(z3, t1);
+        fe_sq(x3, x3);
+        fe_add(t0, t0, z3);
+        fe_mul(z3, x1, z2);
+        fe_mul(z2, t1, t0);
+    }
+    fe_cswap(x2, x3, swap);
+    fe_cswap(z2, z3, swap);
+
+    fe_invert(z2, z2);
+    fe_mul(x2, x2, z2);
+    fe_tobytes(r, x2);
+
+    return 0;
+}
+
+/* The field element value 0 as an array of bytes. */
+static const unsigned char zero[32] = {0};
+
+/* Constant time check as to whether a is a not 0.
+ *
+ * a  A field element.
+ */
+int fe_isnonzero(const fe a)
+{
+    unsigned char s[32];
+    fe_tobytes(s, a);
+    return ConstantCompare(s, zero, 32);
+}
+
+/* Checks whether a is negative.
+ *
+ * a  A field element.
+ */
+int fe_isnegative(const fe a)
+{
+    unsigned char s[32];
+    fe_tobytes(s, a);
+    return s[0] & 1;
+}
+
+/* Negates field element a and stores the result in r.
+ *
+ * r  A field element.
+ * a  A field element.
+ */
+void fe_neg(fe r, const fe a)
+{
+    r[0] = -a[0];
+    r[1] = -a[1];
+    r[2] = -a[2];
+    r[3] = -a[3];
+    r[4] = -a[4];
+}
+
+/* Constant time, conditional move of b into a.
+ * a is not changed if the condition is 0.
+ *
+ * a  A field element.
+ * b  A field element.
+ * c  If 1 then copy and if 0 then don't copy.
+ */
+void fe_cmov(fe a, const fe b, int c)
+{
+    int64_t m = c;
+    int64_t t0, t1, t2, t3, t4;
+
+    /* Convert conditional into mask. */
+    m = -m;
+    t0 = m & (a[0] ^ b[0]);
+    t1 = m & (a[1] ^ b[1]);
+    t2 = m & (a[2] ^ b[2]);
+    t3 = m & (a[3] ^ b[3]);
+    t4 = m & (a[4] ^ b[4]);
+
+    a[0] ^= t0;
+    a[1] ^= t1;
+    a[2] ^= t2;
+    a[3] ^= t3;
+    a[4] ^= t4;
+}
+
+void fe_pow22523(fe r, const fe a)
+{
+    fe t0, t1, t2;
+    int i;
+
+    /* a ^ (2^255 - 23) */
+    fe_sq(t0,  a); for (i = 1; i <   1; ++i) fe_sq(t0, t0);
+    fe_sq(t1, t0); for (i = 1; i <   2; ++i) fe_sq(t1, t1); fe_mul(t1,  a, t1);
+    fe_mul(t0, t0, t1);
+    fe_sq(t0, t0); for (i = 1; i <   1; ++i) fe_sq(t0, t0); fe_mul(t0, t1, t0);
+    fe_sq(t1, t0); for (i = 1; i <   5; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
+    fe_sq(t1, t0); for (i = 1; i <  10; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
+    fe_sq(t2, t1); for (i = 1; i <  20; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+    fe_sq(t1, t1); for (i = 1; i <  10; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
+    fe_sq(t1, t0); for (i = 1; i <  50; ++i) fe_sq(t1, t1); fe_mul(t1, t1, t0);
+    fe_sq(t2, t1); for (i = 1; i < 100; ++i) fe_sq(t2, t2); fe_mul(t1, t2, t1);
+    fe_sq(t1, t1); for (i = 1; i <  50; ++i) fe_sq(t1, t1); fe_mul(t0, t1, t0);
+    fe_sq(t0, t0); for (i = 1; i <   2; ++i) fe_sq(t0, t0); fe_mul( r, t0, a);
+
+    return;
+}
+
+/* Double the square of a and put result in r. (r = 2 * a * a)
+ *
+ * r  A field element.
+ * a  A field element.
+ * b  A field element.
+ */
+void fe_sq2(fe r, const fe a)
+{
+    __int128_t t0 = 2 * (((__int128_t)a[0]) * a[0]);
+    __int128_t t1 = 2 * (((__int128_t)a[0]) * a[1] * 2);
+    __int128_t t2 = 2 * (((__int128_t)a[0]) * a[2] * 2
+                  + ((__int128_t)a[1]) * a[1]);
+    __int128_t t3 = 2 * (((__int128_t)a[0]) * a[3] * 2
+                  + ((__int128_t)a[1]) * a[2] * 2);
+    __int128_t t4 = 2 * (((__int128_t)a[0]) * a[4] * 2
+                  + ((__int128_t)a[1]) * a[3] * 2
+                  + ((__int128_t)a[2]) * a[2]);
+    __int128_t t5 = 2 * (((__int128_t)a[1]) * a[4] * 2
+                  + ((__int128_t)a[2]) * a[3] * 2);
+    __int128_t t6 = 2 * (((__int128_t)a[2]) * a[4] * 2
+                  + ((__int128_t)a[3]) * a[3]);
+    __int128_t t7 = 2 * (((__int128_t)a[3]) * a[4] * 2);
+    __int128_t t8 = 2 * (((__int128_t)a[4]) * a[4]);
+
+    /* Modulo reduce double long word. */
+    t0 += t5 * 19;
+    t1 += t6 * 19;
+    t2 += t7 * 19;
+    t3 += t8 * 19;
+
+    /* Normalize to 51-bits of data per word. */
+    t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff;
+
+    t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff;
+    t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff;
+    t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff;
+    t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff;
+    r[0] += (t4 >> 51) * 19;
+    r[4] = t4 & 0x7ffffffffffff;
+}
+
+/* Load 3 little endian bytes into a 64-bit word.
+ *
+ * in  An array of bytes.
+ * returns a 64-bit word.
+ */
+uint64_t load_3(const unsigned char *in)
+{
+    uint64_t result;
+
+    result = ((((uint64_t)in[0])      ) |
+              (((uint64_t)in[1]) <<  8) |
+              (((uint64_t)in[2]) << 16));
+
+    return result;
+}
+
+/* Load 4 little endian bytes into a 64-bit word.
+ *
+ * in  An array of bytes.
+ * returns a 64-bit word.
+ */
+uint64_t load_4(const unsigned char *in)
+{
+    uint64_t result;
+
+    result = ((((uint64_t)in[0])      ) |
+              (((uint64_t)in[1]) <<  8) |
+              (((uint64_t)in[2]) << 16) |
+              (((uint64_t)in[3]) << 24));
+
+    return result;
+}
+
diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c
index 109b77c82..bf9b2eee4 100644
--- a/wolfcrypt/src/ge_operations.c
+++ b/wolfcrypt/src/ge_operations.c
@@ -765,7 +765,1354 @@ static void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b)
   fe_cmov(t->xy2d,u->xy2d,b);
 }
 
-
+#ifdef HAVE___UINT128_T
+static const ge_precomp base[32][8] = {
+{
+    {
+        { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b },
+        { 0x03905d740913e, 0x0ba2817d673a2, 0x23e2827f4e67c, 0x133d2e0c21a34, 0x44fd2f9298f81 },
+        { 0x11205877aaa68, 0x479955893d579, 0x50d66309b67a0, 0x2d42d0dbee5ee, 0x6f117b689f0c6 },
+    },
+    {
+        { 0x4e7fc933c71d7, 0x2cf41feb6b244, 0x7581c0a7d1a76, 0x7172d534d32f0, 0x590c063fa87d2 },
+        { 0x1a56042b4d5a8, 0x189cc159ed153, 0x5b8deaa3cae04, 0x2aaf04f11b5d8, 0x6bb595a669c92 },
+        { 0x2a8b3a59b7a5f, 0x3abb359ef087f, 0x4f5a8c4db05af, 0x5b9a807d04205, 0x701af5b13ea50 },
+    },
+    {
+        { 0x5b0a84cee9730, 0x61d10c97155e4, 0x4059cc8096a10, 0x47a608da8014f, 0x7a164e1b9a80f },
+        { 0x11fe8a4fcd265, 0x7bcb8374faacc, 0x52f5af4ef4d4f, 0x5314098f98d10, 0x2ab91587555bd },
+        { 0x6933f0dd0d889, 0x44386bb4c4295, 0x3cb6d3162508c, 0x26368b872a2c6, 0x5a2826af12b9b },
+    },
+    {
+        { 0x351b98efc099f, 0x68fbfa4a7050e, 0x42a49959d971b, 0x393e51a469efd, 0x680e910321e58 },
+        { 0x6050a056818bf, 0x62acc1f5532bf, 0x28141ccc9fa25, 0x24d61f471e683, 0x27933f4c7445a },
+        { 0x3fbe9c476ff09, 0x0af6b982e4b42, 0x0ad1251ba78e5, 0x715aeedee7c88, 0x7f9d0cbf63553 },
+    },
+    {
+        { 0x2bc4408a5bb33, 0x078ebdda05442, 0x2ffb112354123, 0x375ee8df5862d, 0x2945ccf146e20 },
+        { 0x182c3a447d6ba, 0x22964e536eff2, 0x192821f540053, 0x2f9f19e788e5c, 0x154a7e73eb1b5 },
+        { 0x3dbf1812a8285, 0x0fa17ba3f9797, 0x6f69cb49c3820, 0x34d5a0db3858d, 0x43aabe696b3bb },
+    },
+    {
+        { 0x4eeeb77157131, 0x1201915f10741, 0x1669cda6c9c56, 0x45ec032db346d, 0x51e57bb6a2cc3 },
+        { 0x006b67b7d8ca4, 0x084fa44e72933, 0x1154ee55d6f8a, 0x4425d842e7390, 0x38b64c41ae417 },
+        { 0x4326702ea4b71, 0x06834376030b5, 0x0ef0512f9c380, 0x0f1a9f2512584, 0x10b8e91a9f0d6 },
+    },
+    {
+        { 0x25cd0944ea3bf, 0x75673b81a4d63, 0x150b925d1c0d4, 0x13f38d9294114, 0x461bea69283c9 },
+        { 0x72c9aaa3221b1, 0x267774474f74d, 0x064b0e9b28085, 0x3f04ef53b27c9, 0x1d6edd5d2e531 },
+        { 0x36dc801b8b3a2, 0x0e0a7d4935e30, 0x1deb7cecc0d7d, 0x053a94e20dd2c, 0x7a9fbb1c6a0f9 },
+    },
+    {
+        { 0x7596604dd3e8f, 0x6fc510e058b36, 0x3670c8db2cc0d, 0x297d899ce332f, 0x0915e76061bce },
+        { 0x75dedf39234d9, 0x01c36ab1f3c54, 0x0f08fee58f5da, 0x0e19613a0d637, 0x3a9024a1320e0 },
+        { 0x1f5d9c9a2911a, 0x7117994fafcf8, 0x2d8a8cae28dc5, 0x74ab1b2090c87, 0x26907c5c2ecc4 },
+    },
+},
+{
+    {
+        { 0x4dd0e632f9c1d, 0x2ced12622a5d9, 0x18de9614742da, 0x79ca96fdbb5d4, 0x6dd37d49a00ee },
+        { 0x3635449aa515e, 0x3e178d0475dab, 0x50b4712a19712, 0x2dcc2860ff4ad, 0x30d76d6f03d31 },
+        { 0x444172106e4c7, 0x01251afed2d88, 0x534fc9bed4f5a, 0x5d85a39cf5234, 0x10c697112e864 },
+    },
+    {
+        { 0x62aa08358c805, 0x46f440848e194, 0x447b771a8f52b, 0x377ba3269d31d, 0x03bf9baf55080 },
+        { 0x3c4277dbe5fde, 0x5a335afd44c92, 0x0c1164099753e, 0x70487006fe423, 0x25e61cabed66f },
+        { 0x3e128cc586604, 0x5968b2e8fc7e2, 0x049a3d5bd61cf, 0x116505b1ef6e6, 0x566d78634586e },
+    },
+    {
+        { 0x54285c65a2fd0, 0x55e62ccf87420, 0x46bb961b19044, 0x1153405712039, 0x14fba5f34793b },
+        { 0x7a49f9cc10834, 0x2b513788a22c6, 0x5ff4b6ef2395b, 0x2ec8e5af607bf, 0x33975bca5ecc3 },
+        { 0x746166985f7d4, 0x09939000ae79a, 0x5844c7964f97a, 0x13617e1f95b3d, 0x14829cea83fc5 },
+    },
+    {
+        { 0x70b2f4e71ecb8, 0x728148efc643c, 0x0753e03995b76, 0x5bf5fb2ab6767, 0x05fc3bc4535d7 },
+        { 0x37b8497dd95c2, 0x61549d6b4ffe8, 0x217a22db1d138, 0x0b9cf062eb09e, 0x2fd9c71e5f758 },
+        { 0x0b3ae52afdedd, 0x19da76619e497, 0x6fa0654d2558e, 0x78219d25e41d4, 0x373767475c651 },
+    },
+    {
+        { 0x095cb14246590, 0x002d82aa6ac68, 0x442f183bc4851, 0x6464f1c0a0644, 0x6bf5905730907 },
+        { 0x299fd40d1add9, 0x5f2de9a04e5f7, 0x7c0eebacc1c59, 0x4cca1b1f8290a, 0x1fbea56c3b18f },
+        { 0x778f1e1415b8a, 0x6f75874efc1f4, 0x28a694019027f, 0x52b37a96bdc4d, 0x02521cf67a635 },
+    },
+    {
+        { 0x46720772f5ee4, 0x632c0f359d622, 0x2b2092ba3e252, 0x662257c112680, 0x001753d9f7cd6 },
+        { 0x7ee0b0a9d5294, 0x381fbeb4cca27, 0x7841f3a3e639d, 0x676ea30c3445f, 0x3fa00a7e71382 },
+        { 0x1232d963ddb34, 0x35692e70b078d, 0x247ca14777a1f, 0x6db556be8fcd0, 0x12b5fe2fa048e },
+    },
+    {
+        { 0x37c26ad6f1e92, 0x46a0971227be5, 0x4722f0d2d9b4c, 0x3dc46204ee03a, 0x6f7e93c20796c },
+        { 0x0fbc496fce34d, 0x575be6b7dae3e, 0x4a31585cee609, 0x037e9023930ff, 0x749b76f96fb12 },
+        { 0x2f604aea6ae05, 0x637dc939323eb, 0x3fdad9b048d47, 0x0a8b0d4045af7, 0x0fcec10f01e02 },
+    },
+    {
+        { 0x2d29dc4244e45, 0x6927b1bc147be, 0x0308534ac0839, 0x4853664033f41, 0x413779166feab },
+        { 0x558a649fe1e44, 0x44635aeefcc89, 0x1ff434887f2ba, 0x0f981220e2d44, 0x4901aa7183c51 },
+        { 0x1b7548c1af8f0, 0x7848c53368116, 0x01b64e7383de9, 0x109fbb0587c8f, 0x41bb887b726d1 },
+    },
+},
+{
+    {
+        { 0x34c597c6691ae, 0x7a150b6990fc4, 0x52beb9d922274, 0x70eed7164861a, 0x0a871e070c6a9 },
+        { 0x07d44744346be, 0x282b6a564a81d, 0x4ed80f875236b, 0x6fbbe1d450c50, 0x4eb728c12fcdb },
+        { 0x1b5994bbc8989, 0x74b7ba84c0660, 0x75678f1cdaeb8, 0x23206b0d6f10c, 0x3ee7300f2685d },
+    },
+    {
+        { 0x27947841e7518, 0x32c7388dae87f, 0x414add3971be9, 0x01850832f0ef1, 0x7d47c6a2cfb89 },
+        { 0x255e49e7dd6b7, 0x38c2163d59eba, 0x3861f2a005845, 0x2e11e4ccbaec9, 0x1381576297912 },
+        { 0x2d0148ef0d6e0, 0x3522a8de787fb, 0x2ee055e74f9d2, 0x64038f6310813, 0x148cf58d34c9e },
+    },
+    {
+        { 0x72f7d9ae4756d, 0x7711e690ffc4a, 0x582a2355b0d16, 0x0dccfe885b6b4, 0x278febad4eaea },
+        { 0x492f67934f027, 0x7ded0815528d4, 0x58461511a6612, 0x5ea2e50de1544, 0x3ff2fa1ebd5db },
+        { 0x2681f8c933966, 0x3840521931635, 0x674f14a308652, 0x3bd9c88a94890, 0x4104dd02fe9c6 },
+    },
+    {
+        { 0x14e06db096ab8, 0x1219c89e6b024, 0x278abd486a2db, 0x240b292609520, 0x0165b5a48efca },
+        { 0x2bf5e1124422a, 0x673146756ae56, 0x14ad99a87e830, 0x1eaca65b080fd, 0x2c863b00afaf5 },
+        { 0x0a474a0846a76, 0x099a5ef981e32, 0x2a8ae3c4bbfe6, 0x45c34af14832c, 0x591b67d9bffec },
+    },
+    {
+        { 0x1b3719f18b55d, 0x754318c83d337, 0x27c17b7919797, 0x145b084089b61, 0x489b4f8670301 },
+        { 0x70d1c80b49bfa, 0x3d57e7d914625, 0x3c0722165e545, 0x5e5b93819e04f, 0x3de02ec7ca8f7 },
+        { 0x2102d3aeb92ef, 0x68c22d50c3a46, 0x42ea89385894e, 0x75f9ebf55f38c, 0x49f5fbba496cb },
+    },
+    {
+        { 0x5628c1e9c572e, 0x598b108e822ab, 0x55d8fae29361a, 0x0adc8d1a97b28, 0x06a1a6c288675 },
+        { 0x49a108a5bcfd4, 0x6178c8e7d6612, 0x1f03473710375, 0x73a49614a6098, 0x5604a86dcbfa6 },
+        { 0x0d1d47c1764b6, 0x01c08316a2e51, 0x2b3db45c95045, 0x1634f818d300c, 0x20989e89fe274 },
+    },
+    {
+        { 0x4278b85eaec2e, 0x0ef59657be2ce, 0x72fd169588770, 0x2e9b205260b30, 0x730b9950f7059 },
+        { 0x777fd3a2dcc7f, 0x594a9fb124932, 0x01f8e80ca15f0, 0x714d13cec3269, 0x0403ed1d0ca67 },
+        { 0x32d35874ec552, 0x1f3048df1b929, 0x300d73b179b23, 0x6e67be5a37d0b, 0x5bd7454308303 },
+    },
+    {
+        { 0x4932115e7792a, 0x457b9bbb930b8, 0x68f5d8b193226, 0x4164e8f1ed456, 0x5bb7db123067f },
+        { 0x2d19528b24cc2, 0x4ac66b8302ff3, 0x701c8d9fdad51, 0x6c1b35c5b3727, 0x133a78007380a },
+        { 0x1f467c6ca62be, 0x2c4232a5dc12c, 0x7551dc013b087, 0x0690c11b03bcd, 0x740dca6d58f0e },
+    },
+},
+{
+    {
+        { 0x28c570478433c, 0x1d8502873a463, 0x7641e7eded49c, 0x1ecedd54cf571, 0x2c03f5256c2b0 },
+        { 0x0ee0752cfce4e, 0x660dd8116fbe9, 0x55167130fffeb, 0x1c682b885955c, 0x161d25fa963ea },
+        { 0x718757b53a47d, 0x619e18b0f2f21, 0x5fbdfe4c1ec04, 0x5d798c81ebb92, 0x699468bdbd96b },
+    },
+    {
+        { 0x53de66aa91948, 0x045f81a599b1b, 0x3f7a8bd214193, 0x71d4da412331a, 0x293e1c4e6c4a2 },
+        { 0x72f46f4dafecf, 0x2948ffadef7a3, 0x11ecdfdf3bc04, 0x3c2e98ffeed25, 0x525219a473905 },
+        { 0x6134b925112e1, 0x6bb942bb406ed, 0x070c445c0dde2, 0x411d822c4d7a3, 0x5b605c447f032 },
+    },
+    {
+        { 0x1fec6f0e7f04c, 0x3cebc692c477d, 0x077986a19a95e, 0x6eaaaa1778b0f, 0x2f12fef4cc5ab },
+        { 0x5805920c47c89, 0x1924771f9972c, 0x38bbddf9fc040, 0x1f7000092b281, 0x24a76dcea8aeb },
+        { 0x522b2dfc0c740, 0x7e8193480e148, 0x33fd9a04341b9, 0x3c863678a20bc, 0x5e607b2518a43 },
+    },
+    {
+        { 0x4431ca596cf14, 0x015da7c801405, 0x03c9b6f8f10b5, 0x0346922934017, 0x201f33139e457 },
+        { 0x31d8f6cdf1818, 0x1f86c4b144b16, 0x39875b8d73e9d, 0x2fbf0d9ffa7b3, 0x5067acab6ccdd },
+        { 0x27f6b08039d51, 0x4802f8000dfaa, 0x09692a062c525, 0x1baea91075817, 0x397cba8862460 },
+    },
+    {
+        { 0x5c3fbc81379e7, 0x41bbc255e2f02, 0x6a3f756998650, 0x1297fd4e07c42, 0x771b4022c1e1c },
+        { 0x13093f05959b2, 0x1bd352f2ec618, 0x075789b88ea86, 0x61d1117ea48b9, 0x2339d320766e6 },
+        { 0x5d986513a2fa7, 0x63f3a99e11b0f, 0x28a0ecfd6b26d, 0x53b6835e18d8f, 0x331a189219971 },
+    },
+    {
+        { 0x12f3a9d7572af, 0x10d00e953c4ca, 0x603df116f2f8a, 0x33dc276e0e088, 0x1ac9619ff649a },
+        { 0x66f45fb4f80c6, 0x3cc38eeb9fea2, 0x107647270db1f, 0x710f1ea740dc8, 0x31167c6b83bdf },
+        { 0x33842524b1068, 0x77dd39d30fe45, 0x189432141a0d0, 0x088fe4eb8c225, 0x612436341f08b },
+    },
+    {
+        { 0x349e31a2d2638, 0x0137a7fa6b16c, 0x681ae92777edc, 0x222bfc5f8dc51, 0x1522aa3178d90 },
+        { 0x541db874e898d, 0x62d80fb841b33, 0x03e6ef027fa97, 0x7a03c9e9633e8, 0x46ebe2309e5ef },
+        { 0x02f5369614938, 0x356e5ada20587, 0x11bc89f6bf902, 0x036746419c8db, 0x45fe70f505243 },
+    },
+    {
+        { 0x24920c8951491, 0x107ec61944c5e, 0x72752e017c01f, 0x122b7dda2e97a, 0x16619f6db57a2 },
+        { 0x075a6960c0b8c, 0x6dde1c5e41b49, 0x42e3f516da341, 0x16a03fda8e79e, 0x428d1623a0e39 },
+        { 0x74a4401a308fd, 0x06ed4b9558109, 0x746f1f6a08867, 0x4636f5c6f2321, 0x1d81592d60bd3 },
+    },
+},
+{
+    {
+        { 0x5b69f7b85c5e8, 0x17a2d175650ec, 0x4cc3e6dbfc19e, 0x73e1d3873be0e, 0x3a5f6d51b0af8 },
+        { 0x68756a60dac5f, 0x55d757b8aec26, 0x3383df45f80bd, 0x6783f8c9f96a6, 0x20234a7789ecd },
+        { 0x20db67178b252, 0x73aa3da2c0eda, 0x79045c01c70d3, 0x1b37b15251059, 0x7cd682353cffe },
+    },
+    {
+        { 0x5cd6068acf4f3, 0x3079afc7a74cc, 0x58097650b64b4, 0x47fabac9c4e99, 0x3ef0253b2b2cd },
+        { 0x1a45bd887fab6, 0x65748076dc17c, 0x5b98000aa11a8, 0x4a1ecc9080974, 0x2838c8863bdc0 },
+        { 0x3b0cf4a465030, 0x022b8aef57a2d, 0x2ad0677e925ad, 0x4094167d7457a, 0x21dcb8a606a82 },
+    },
+    {
+        { 0x500fabe7731ba, 0x7cc53c3113351, 0x7cf65fe080d81, 0x3c5d966011ba1, 0x5d840dbf6c6f6 },
+        { 0x004468c9d9fc8, 0x5da8554796b8c, 0x3b8be70950025, 0x6d5892da6a609, 0x0bc3d08194a31 },
+        { 0x6380d309fe18b, 0x4d73c2cb8ee0d, 0x6b882adbac0b6, 0x36eabdddd4cbe, 0x3a4276232ac19 },
+    },
+    {
+        { 0x0c172db447ecb, 0x3f8c505b7a77f, 0x6a857f97f3f10, 0x4fcc0567fe03a, 0x0770c9e824e1a },
+        { 0x2432c8a7084fa, 0x47bf73ca8a968, 0x1639176262867, 0x5e8df4f8010ce, 0x1ff177cea16de },
+        { 0x1d99a45b5b5fd, 0x523674f2499ec, 0x0f8fa26182613, 0x58f7398048c98, 0x39f264fd41500 },
+    },
+    {
+        { 0x34aabfe097be1, 0x43bfc03253a33, 0x29bc7fe91b7f3, 0x0a761e4844a16, 0x65c621272c35f },
+        { 0x53417dbe7e29c, 0x54573827394f5, 0x565eea6f650dd, 0x42050748dc749, 0x1712d73468889 },
+        { 0x389f8ce3193dd, 0x2d424b8177ce5, 0x073fa0d3440cd, 0x139020cd49e97, 0x22f9800ab19ce },
+    },
+    {
+        { 0x29fdd9a6efdac, 0x7c694a9282840, 0x6f7cdeee44b3a, 0x55a3207b25cc3, 0x4171a4d38598c },
+        { 0x2368a3e9ef8cb, 0x454aa08e2ac0b, 0x490923f8fa700, 0x372aa9ea4582f, 0x13f416cd64762 },
+        { 0x758aa99c94c8c, 0x5f6001700ff44, 0x7694e488c01bd, 0x0d5fde948eed6, 0x508214fa574bd },
+    },
+    {
+        { 0x215bb53d003d6, 0x1179e792ca8c3, 0x1a0e96ac840a2, 0x22393e2bb3ab6, 0x3a7758a4c86cb },
+        { 0x269153ed6fe4b, 0x72a23aef89840, 0x052be5299699c, 0x3a5e5ef132316, 0x22f960ec6faba },
+        { 0x111f693ae5076, 0x3e3bfaa94ca90, 0x445799476b887, 0x24a0912464879, 0x5d9fd15f8de7f },
+    },
+    {
+        { 0x44d2aeed7521e, 0x50865d2c2a7e4, 0x2705b5238ea40, 0x46c70b25d3b97, 0x3bc187fa47eb9 },
+        { 0x408d36d63727f, 0x5faf8f6a66062, 0x2bb892da8de6b, 0x769d4f0c7e2e6, 0x332f35914f8fb },
+        { 0x70115ea86c20c, 0x16d88da24ada8, 0x1980622662adf, 0x501ebbc195a9d, 0x450d81ce906fb },
+    },
+},
+{
+    {
+        { 0x4d8961cae743f, 0x6bdc38c7dba0e, 0x7d3b4a7e1b463, 0x0844bdee2adf3, 0x4cbad279663ab },
+        { 0x3b6a1a6205275, 0x2e82791d06dcf, 0x23d72caa93c87, 0x5f0b7ab68aaf4, 0x2de25d4ba6345 },
+        { 0x19024a0d71fcd, 0x15f65115f101a, 0x4e99067149708, 0x119d8d1cba5af, 0x7d7fbcefe2007 },
+    },
+    {
+        { 0x45dc5f3c29094, 0x3455220b579af, 0x070c1631e068a, 0x26bc0630e9b21, 0x4f9cd196dcd8d },
+        { 0x71e6a266b2801, 0x09aae73e2df5d, 0x40dd8b219b1a3, 0x546fb4517de0d, 0x5975435e87b75 },
+        { 0x297d86a7b3768, 0x4835a2f4c6332, 0x070305f434160, 0x183dd014e56ae, 0x7ccdd084387a0 },
+    },
+    {
+        { 0x484186760cc93, 0x7435665533361, 0x02f686336b801, 0x5225446f64331, 0x3593ca848190c },
+        { 0x6422c6d260417, 0x212904817bb94, 0x5a319deb854f5, 0x7a9d4e060da7d, 0x428bd0ed61d0c },
+        { 0x3189a5e849aa7, 0x6acbb1f59b242, 0x7f6ef4753630c, 0x1f346292a2da9, 0x27398308da2d6 },
+    },
+    {
+        { 0x10e4c0a702453, 0x4daafa37bd734, 0x49f6bdc3e8961, 0x1feffdcecdae6, 0x572c2945492c3 },
+        { 0x38d28435ed413, 0x4064f19992858, 0x7680fbef543cd, 0x1aadd83d58d3c, 0x269597aebe8c3 },
+        { 0x7c745d6cd30be, 0x27c7755df78ef, 0x1776833937fa3, 0x5405116441855, 0x7f985498c05bc },
+    },
+    {
+        { 0x615520fbf6363, 0x0b9e9bf74da6a, 0x4fe8308201169, 0x173f76127de43, 0x30f2653cd69b1 },
+        { 0x1ce889f0be117, 0x36f6a94510709, 0x7f248720016b4, 0x1821ed1e1cf91, 0x76c2ec470a31f },
+        { 0x0c938aac10c85, 0x41b64ed797141, 0x1beb1c1185e6d, 0x1ed5490600f07, 0x2f1273f159647 },
+    },
+    {
+        { 0x08bd755a70bc0, 0x49e3a885ce609, 0x16585881b5ad6, 0x3c27568d34f5e, 0x38ac1997edc5f },
+        { 0x1fc7c8ae01e11, 0x2094d5573e8e7, 0x5ca3cbbf549d2, 0x4f920ecc54143, 0x5d9e572ad85b6 },
+        { 0x6b517a751b13b, 0x0cfd370b180cc, 0x5377925d1f41a, 0x34e56566008a2, 0x22dfcd9cbfe9e },
+    },
+    {
+        { 0x459b4103be0a1, 0x59a4b3f2d2add, 0x7d734c8bb8eeb, 0x2393cbe594a09, 0x0fe9877824cde },
+        { 0x3d2e0c30d0cd9, 0x3f597686671bb, 0x0aa587eb63999, 0x0e3c7b592c619, 0x6b2916c05448c },
+        { 0x334d10aba913b, 0x045cdb581cfdb, 0x5e3e0553a8f36, 0x50bb3041effb2, 0x4c303f307ff00 },
+    },
+    {
+        { 0x403580dd94500, 0x48df77d92653f, 0x38a9fe3b349ea, 0x0ea89850aafe1, 0x416b151ab706a },
+        { 0x23bd617b28c85, 0x6e72ee77d5a61, 0x1a972ff174dde, 0x3e2636373c60f, 0x0d61b8f78b2ab },
+        { 0x0d7efe9c136b0, 0x1ab1c89640ad5, 0x55f82aef41f97, 0x46957f317ed0d, 0x191a2af74277e },
+    },
+},
+{
+    {
+        { 0x62b434f460efb, 0x294c6c0fad3fc, 0x68368937b4c0f, 0x5c9f82910875b, 0x237e7dbe00545 },
+        { 0x6f74bc53c1431, 0x1c40e5dbbd9c2, 0x6c8fb9cae5c97, 0x4845c5ce1b7da, 0x7e2e0e450b5cc },
+        { 0x575ed6701b430, 0x4d3e17fa20026, 0x791fc888c4253, 0x2f1ba99078ac1, 0x71afa699b1115 },
+    },
+    {
+        { 0x23c1c473b50d6, 0x3e7671de21d48, 0x326fa5547a1e8, 0x50e4dc25fafd9, 0x00731fbc78f89 },
+        { 0x66f9b3953b61d, 0x555f4283cccb9, 0x7dd67fb1960e7, 0x14707a1affed4, 0x021142e9c2b1c },
+        { 0x0c71848f81880, 0x44bd9d8233c86, 0x6e8578efe5830, 0x4045b6d7041b5, 0x4c4d6f3347e15 },
+    },
+    {
+        { 0x4ddfc988f1970, 0x4f6173ea365e1, 0x645daf9ae4588, 0x7d43763db623b, 0x38bf9500a88f9 },
+        { 0x7eccfc17d1fc9, 0x4ca280782831e, 0x7b8337db1d7d6, 0x5116def3895fb, 0x193fddaaa7e47 },
+        { 0x2c93c37e8876f, 0x3431a28c583fa, 0x49049da8bd879, 0x4b4a8407ac11c, 0x6a6fb99ebf0d4 },
+    },
+    {
+        { 0x122b5b6e423c6, 0x21e50dff1ddd6, 0x73d76324e75c0, 0x588485495418e, 0x136fda9f42c5e },
+        { 0x6c1bb560855eb, 0x71f127e13ad48, 0x5c6b304905aec, 0x3756b8e889bc7, 0x75f76914a3189 },
+        { 0x4dfb1a305bdd1, 0x3b3ff05811f29, 0x6ed62283cd92e, 0x65d1543ec52e1, 0x022183510be8d },
+    },
+    {
+        { 0x2710143307a7f, 0x3d88fb48bf3ab, 0x249eb4ec18f7a, 0x136115dff295f, 0x1387c441fd404 },
+        { 0x766385ead2d14, 0x0194f8b06095e, 0x08478f6823b62, 0x6018689d37308, 0x6a071ce17b806 },
+        { 0x3c3d187978af8, 0x7afe1c88276ba, 0x51df281c8ad68, 0x64906bda4245d, 0x3171b26aaf1ed },
+    },
+    {
+        { 0x5b7d8b28a47d1, 0x2c2ee149e34c1, 0x776f5629afc53, 0x1f4ea50fc49a9, 0x6c514a6334424 },
+        { 0x7319097564ca8, 0x1844ebc233525, 0x21d4543fdeee1, 0x1ad27aaff1bd2, 0x221fd4873cf08 },
+        { 0x2204f3a156341, 0x537414065a464, 0x43c0c3bedcf83, 0x5557e706ea620, 0x48daa596fb924 },
+    },
+    {
+        { 0x61d5dc84c9793, 0x47de83040c29e, 0x189deb26507e7, 0x4d4e6fadc479a, 0x58c837fa0e8a7 },
+        { 0x28e665ca59cc7, 0x165c715940dd9, 0x0785f3aa11c95, 0x57b98d7e38469, 0x676dd6fccad84 },
+        { 0x1688596fc9058, 0x66f6ad403619f, 0x4d759a87772ef, 0x7856e6173bea4, 0x1c4f73f2c6a57 },
+    },
+    {
+        { 0x6706efc7c3484, 0x6987839ec366d, 0x0731f95cf7f26, 0x3ae758ebce4bc, 0x70459adb7daf6 },
+        { 0x24fbd305fa0bb, 0x40a98cc75a1cf, 0x78ce1220a7533, 0x6217a10e1c197, 0x795ac80d1bf64 },
+        { 0x1db4991b42bb3, 0x469605b994372, 0x631e3715c9a58, 0x7e9cfefcf728f, 0x5fe162848ce21 },
+    },
+},
+{
+    {
+        { 0x1852d5d7cb208, 0x60d0fbe5ce50f, 0x5a1e246e37b75, 0x51aee05ffd590, 0x2b44c043677da },
+        { 0x1214fe194961a, 0x0e1ae39a9e9cb, 0x543c8b526f9f7, 0x119498067e91d, 0x4789d446fc917 },
+        { 0x487ab074eb78e, 0x1d33b5e8ce343, 0x13e419feb1b46, 0x2721f565de6a4, 0x60c52eef2bb9a },
+    },
+    {
+        { 0x3c5c27cae6d11, 0x36a9491956e05, 0x124bac9131da6, 0x3b6f7de202b5d, 0x70d77248d9b66 },
+        { 0x589bc3bfd8bf1, 0x6f93e6aa3416b, 0x4c0a3d6c1ae48, 0x55587260b586a, 0x10bc9c312ccfc },
+        { 0x2e84b3ec2a05b, 0x69da2f03c1551, 0x23a174661a67b, 0x209bca289f238, 0x63755bd3a976f },
+    },
+    {
+        { 0x7101897f1acb7, 0x3d82cb77b07b8, 0x684083d7769f5, 0x52b28472dce07, 0x2763751737c52 },
+        { 0x7a03e2ad10853, 0x213dcc6ad36ab, 0x1a6e240d5bdd6, 0x7c24ffcf8fedf, 0x0d8cc1c48bc16 },
+        { 0x402d36eb419a9, 0x7cef68c14a052, 0x0f1255bc2d139, 0x373e7d431186a, 0x70c2dd8a7ad16 },
+    },
+    {
+        { 0x4967db8ed7e13, 0x15aeed02f523a, 0x6149591d094bc, 0x672f204c17006, 0x32b8613816a53 },
+        { 0x194509f6fec0e, 0x528d8ca31acac, 0x7826d73b8b9fa, 0x24acb99e0f9b3, 0x2e0fac6363948 },
+        { 0x7f7bee448cd64, 0x4e10f10da0f3c, 0x3936cb9ab20e9, 0x7a0fc4fea6cd0, 0x4179215c735a4 },
+    },
+    {
+        { 0x633b9286bcd34, 0x6cab3badb9c95, 0x74e387edfbdfa, 0x14313c58a0fd9, 0x31fa85662241c },
+        { 0x094e7d7dced2a, 0x068fa738e118e, 0x41b640a5fee2b, 0x6bb709df019d4, 0x700344a30cd99 },
+        { 0x26c422e3622f4, 0x0f3066a05b5f0, 0x4e2448f0480a6, 0x244cde0dbf095, 0x24bb2312a9952 },
+    },
+    {
+        { 0x00c2af5f85c6b, 0x0609f4cf2883f, 0x6e86eb5a1ca13, 0x68b44a2efccd1, 0x0d1d2af9ffeb5 },
+        { 0x0ed1732de67c3, 0x308c369291635, 0x33ef348f2d250, 0x004475ea1a1bb, 0x0fee3e871e188 },
+        { 0x28aa132621edf, 0x42b244caf353b, 0x66b064cc2e08a, 0x6bb20020cbdd3, 0x16acd79718531 },
+    },
+    {
+        { 0x1c6c57887b6ad, 0x5abf21fd7592b, 0x50bd41253867a, 0x3800b71273151, 0x164ed34b18161 },
+        { 0x772af2d9b1d3d, 0x6d486448b4e5b, 0x2ce58dd8d18a8, 0x1849f67503c8b, 0x123e0ef6b9302 },
+        { 0x6d94c192fe69a, 0x5475222a2690f, 0x693789d86b8b3, 0x1f5c3bdfb69dc, 0x78da0fc61073f },
+    },
+    {
+        { 0x780f1680c3a94, 0x2a35d3cfcd453, 0x005e5cdc7ddf8, 0x6ee888078ac24, 0x054aa4b316b38 },
+        { 0x15d28e52bc66a, 0x30e1e0351cb7e, 0x30a2f74b11f8c, 0x39d120cd7de03, 0x2d25deeb256b1 },
+        { 0x0468d19267cb8, 0x38cdca9b5fbf9, 0x1bbb05c2ca1e2, 0x3b015758e9533, 0x134610a6ab7da },
+    },
+},
+{
+    {
+        { 0x265e777d1f515, 0x0f1f54c1e39a5, 0x2f01b95522646, 0x4fdd8db9dde6d, 0x654878cba97cc },
+        { 0x38ec78df6b0fe, 0x13caebea36a22, 0x5ebc6e54e5f6a, 0x32804903d0eb8, 0x2102fdba2b20d },
+        { 0x6e405055ce6a1, 0x5024a35a532d3, 0x1f69054daf29d, 0x15d1d0d7a8bd5, 0x0ad725db29ecb },
+    },
+    {
+        { 0x7bc0c9b056f85, 0x51cfebffaffd8, 0x44abbe94df549, 0x7ecbbd7e33121, 0x4f675f5302399 },
+        { 0x267b1834e2457, 0x6ae19c378bb88, 0x7457b5ed9d512, 0x3280d783d05fb, 0x4aefcffb71a03 },
+        { 0x536360415171e, 0x2313309077865, 0x251444334afbc, 0x2b0c3853756e8, 0x0bccbb72a2a86 },
+    },
+    {
+        { 0x55e4c50fe1296, 0x05fdd13efc30d, 0x1c0c6c380e5ee, 0x3e11de3fb62a8, 0x6678fd69108f3 },
+        { 0x6962feab1a9c8, 0x6aca28fb9a30b, 0x56db7ca1b9f98, 0x39f58497018dd, 0x4024f0ab59d6b },
+        { 0x6fa31636863c2, 0x10ae5a67e42b0, 0x27abbf01fda31, 0x380a7b9e64fbc, 0x2d42e2108ead4 },
+    },
+    {
+        { 0x17b0d0f537593, 0x16263c0c9842e, 0x4ab827e4539a4, 0x6370ddb43d73a, 0x420bf3a79b423 },
+        { 0x5131594dfd29b, 0x3a627e98d52fe, 0x1154041855661, 0x19175d09f8384, 0x676b2608b8d2d },
+        { 0x0ba651c5b2b47, 0x5862363701027, 0x0c4d6c219c6db, 0x0f03dff8658de, 0x745d2ffa9c0cf },
+    },
+    {
+        { 0x6df5721d34e6a, 0x4f32f767a0c06, 0x1d5abeac76e20, 0x41ce9e104e1e4, 0x06e15be54c1dc },
+        { 0x25a1e2bc9c8bd, 0x104c8f3b037ea, 0x405576fa96c98, 0x2e86a88e3876f, 0x1ae23ceb960cf },
+        { 0x25d871932994a, 0x6b9d63b560b6e, 0x2df2814c8d472, 0x0fbbee20aa4ed, 0x58ded861278ec },
+    },
+    {
+        { 0x35ba8b6c2c9a8, 0x1dea58b3185bf, 0x4b455cd23bbbe, 0x5ec19c04883f8, 0x08ba696b531d5 },
+        { 0x73793f266c55c, 0x0b988a9c93b02, 0x09b0ea32325db, 0x37cae71c17c5e, 0x2ff39de85485f },
+        { 0x53eeec3efc57a, 0x2fa9fe9022efd, 0x699c72c138154, 0x72a751ebd1ff8, 0x120633b4947cf },
+    },
+    {
+        { 0x531474912100a, 0x5afcdf7c0d057, 0x7a9e71b788ded, 0x5ef708f3b0c88, 0x07433be3cb393 },
+        { 0x4987891610042, 0x79d9d7f5d0172, 0x3c293013b9ec4, 0x0c2b85f39caca, 0x35d30a99b4d59 },
+        { 0x144c05ce997f4, 0x4960b8a347fef, 0x1da11f15d74f7, 0x54fac19c0fead, 0x2d873ede7af6d },
+    },
+    {
+        { 0x202e14e5df981, 0x2ea02bc3eb54c, 0x38875b2883564, 0x1298c513ae9dd, 0x0543618a01600 },
+        { 0x2316443373409, 0x5de95503b22af, 0x699201beae2df, 0x3db5849ff737a, 0x2e773654707fa },
+        { 0x2bdf4974c23c1, 0x4b3b9c8d261bd, 0x26ae8b2a9bc28, 0x3068210165c51, 0x4b1443362d079 },
+    },
+},
+{
+    {
+        { 0x454e91c529ccb, 0x24c98c6bf72cf, 0x0486594c3d89a, 0x7ae13a3d7fa3c, 0x17038418eaf66 },
+        { 0x4b7c7b66e1f7a, 0x4bea185efd998, 0x4fabc711055f8, 0x1fb9f7836fe38, 0x582f446752da6 },
+        { 0x17bd320324ce4, 0x51489117898c6, 0x1684d92a0410b, 0x6e4d90f78c5a7, 0x0c2a1c4bcda28 },
+    },
+    {
+        { 0x4814869bd6945, 0x7b7c391a45db8, 0x57316ac35b641, 0x641e31de9096a, 0x5a6a9b30a314d },
+        { 0x5c7d06f1f0447, 0x7db70f80b3a49, 0x6cb4a3ec89a78, 0x43be8ad81397d, 0x7c558bd1c6f64 },
+        { 0x41524d396463d, 0x1586b449e1a1d, 0x2f17e904aed8a, 0x7e1d2861d3c8e, 0x0404a5ca0afba },
+    },
+    {
+        { 0x49e1b2a416fd1, 0x51c6a0b316c57, 0x575a59ed71bdc, 0x74c021a1fec1e, 0x39527516e7f8e },
+        { 0x740070aa743d6, 0x16b64cbdd1183, 0x23f4b7b32eb43, 0x319aba58235b3, 0x46395bfdcadd9 },
+        { 0x7db2d1a5d9a9c, 0x79a200b85422f, 0x355bfaa71dd16, 0x00b77ea5f78aa, 0x76579a29e822d },
+    },
+    {
+        { 0x4b51352b434f2, 0x1327bd01c2667, 0x434d73b60c8a1, 0x3e0daa89443ba, 0x02c514bb2a277 },
+        { 0x68e7e49c02a17, 0x45795346fe8b6, 0x089306c8f3546, 0x6d89f6b2f88f6, 0x43a384dc9e05b },
+        { 0x3d5da8bf1b645, 0x7ded6a96a6d09, 0x6c3494fee2f4d, 0x02c989c8b6bd4, 0x1160920961548 },
+    },
+    {
+        { 0x05616369b4dcd, 0x4ecab86ac6f47, 0x3c60085d700b2, 0x0213ee10dfcea, 0x2f637d7491e6e },
+        { 0x5166929dacfaa, 0x190826b31f689, 0x4f55567694a7d, 0x705f4f7b1e522, 0x351e125bc5698 },
+        { 0x49b461af67bbe, 0x75915712c3a96, 0x69a67ef580c0d, 0x54d38ef70cffc, 0x7f182d06e7ce2 },
+    },
+    {
+        { 0x54b728e217522, 0x69a90971b0128, 0x51a40f2a963a3, 0x10be9ac12a6bf, 0x44acc043241c5 },
+        { 0x48e64ab0168ec, 0x2a2bdb8a86f4f, 0x7343b6b2d6929, 0x1d804aa8ce9a3, 0x67d4ac8c343e9 },
+        { 0x56bbb4f7a5777, 0x29230627c238f, 0x5ad1a122cd7fb, 0x0dea56e50e364, 0x556d1c8312ad7 },
+    },
+    {
+        { 0x06756b11be821, 0x462147e7bb03e, 0x26519743ebfe0, 0x782fc59682ab5, 0x097abe38cc8c7 },
+        { 0x740e30c8d3982, 0x7c2b47f4682fd, 0x5cd91b8c7dc1c, 0x77fa790f9e583, 0x746c6c6d1d824 },
+        { 0x1c9877ea52da4, 0x2b37b83a86189, 0x733af49310da5, 0x25e81161c04fb, 0x577e14a34bee8 },
+    },
+    {
+        { 0x6cebebd4dd72b, 0x340c1e442329f, 0x32347ffd1a93f, 0x14a89252cbbe0, 0x705304b8fb009 },
+        { 0x268ac61a73b0a, 0x206f234bebe1c, 0x5b403a7cbebe8, 0x7a160f09f4135, 0x60fa7ee96fd78 },
+        { 0x51d354d296ec6, 0x7cbf5a63b16c7, 0x2f50bb3cf0c14, 0x1feb385cac65a, 0x21398e0ca1635 },
+    },
+},
+{
+    {
+        { 0x0aaf9b4b75601, 0x26b91b5ae44f3, 0x6de808d7ab1c8, 0x6a769675530b0, 0x1bbfb284e98f7 },
+        { 0x5058a382b33f3, 0x175a91816913e, 0x4f6cdb96b8ae8, 0x17347c9da81d2, 0x5aa3ed9d95a23 },
+        { 0x777e9c7d96561, 0x28e58f006ccac, 0x541bbbb2cac49, 0x3e63282994cec, 0x4a07e14e5e895 },
+    },
+    {
+        { 0x358cdc477a49b, 0x3cc88fe02e481, 0x721aab7f4e36b, 0x0408cc9469953, 0x50af7aed84afa },
+        { 0x412cb980df999, 0x5e78dd8ee29dc, 0x171dff68c575d, 0x2015dd2f6ef49, 0x3f0bac391d313 },
+        { 0x7de0115f65be5, 0x4242c21364dc9, 0x6b75b64a66098, 0x0033c0102c085, 0x1921a316baebd },
+    },
+    {
+        { 0x2ad9ad9f3c18b, 0x5ec1638339aeb, 0x5703b6559a83b, 0x3fa9f4d05d612, 0x7b049deca062c },
+        { 0x22f7edfb870fc, 0x569eed677b128, 0x30937dcb0a5af, 0x758039c78ea1b, 0x6458df41e273a },
+        { 0x3e37a35444483, 0x661fdb7d27b99, 0x317761dd621e4, 0x7323c30026189, 0x6093dccbc2950 },
+    },
+    {
+        { 0x6eebe6084034b, 0x6cf01f70a8d7b, 0x0b41a54c6670a, 0x6c84b99bb55db, 0x6e3180c98b647 },
+        { 0x39a8585e0706d, 0x3167ce72663fe, 0x63d14ecdb4297, 0x4be21dcf970b8, 0x57d1ea084827a },
+        { 0x2b6e7a128b071, 0x5b27511755dcf, 0x08584c2930565, 0x68c7bda6f4159, 0x363e999ddd97b },
+    },
+    {
+        { 0x048dce24baec6, 0x2b75795ec05e3, 0x3bfa4c5da6dc9, 0x1aac8659e371e, 0x231f979bc6f9b },
+        { 0x043c135ee1fc4, 0x2a11c9919f2d5, 0x6334cc25dbacd, 0x295da17b400da, 0x48ee9b78693a0 },
+        { 0x1de4bcc2af3c6, 0x61fc411a3eb86, 0x53ed19ac12ec0, 0x209dbc6b804e0, 0x079bfa9b08792 },
+    },
+    {
+        { 0x1ed80a2d54245, 0x70efec72a5e79, 0x42151d42a822d, 0x1b5ebb6d631e8, 0x1ef4fb1594706 },
+        { 0x03a51da300df4, 0x467b52b561c72, 0x4d5920210e590, 0x0ca769e789685, 0x038c77f684817 },
+        { 0x65ee65b167bec, 0x052da19b850a9, 0x0408665656429, 0x7ab39596f9a4c, 0x575ee92a4a0bf },
+    },
+    {
+        { 0x6bc450aa4d801, 0x4f4a6773b0ba8, 0x6241b0b0ebc48, 0x40d9c4f1d9315, 0x200a1e7e382f5 },
+        { 0x080908a182fcf, 0x0532913b7ba98, 0x3dccf78c385c3, 0x68002dd5eaba9, 0x43d4e7112cd3f },
+        { 0x5b967eaf93ac5, 0x360acca580a31, 0x1c65fd5c6f262, 0x71c7f15c2ecab, 0x050eca52651e4 },
+    },
+    {
+        { 0x4397660e668ea, 0x7c2a75692f2f5, 0x3b29e7e6c66ef, 0x72ba658bcda9a, 0x6151c09fa131a },
+        { 0x31ade453f0c9c, 0x3dfee07737868, 0x611ecf7a7d411, 0x2637e6cbd64f6, 0x4b0ee6c21c58f },
+        { 0x55c0dfdf05d96, 0x405569dcf475e, 0x05c5c277498bb, 0x18588d95dc389, 0x1fef24fa800f0 },
+    },
+},
+{
+    {
+        { 0x2aff530976b86, 0x0d85a48c0845a, 0x796eb963642e0, 0x60bee50c4b626, 0x28005fe6c8340 },
+        { 0x653fb1aa73196, 0x607faec8306fa, 0x4e85ec83e5254, 0x09f56900584fd, 0x544d49292fc86 },
+        { 0x7ba9f34528688, 0x284a20fb42d5d, 0x3652cd9706ffe, 0x6fd7baddde6b3, 0x72e472930f316 },
+    },
+    {
+        { 0x3f635d32a7627, 0x0cbecacde00fe, 0x3411141eaa936, 0x21c1e42f3cb94, 0x1fee7f000fe06 },
+        { 0x5208c9781084f, 0x16468a1dc24d2, 0x7bf780ac540a8, 0x1a67eced75301, 0x5a9d2e8c2733a },
+        { 0x305da03dbf7e5, 0x1228699b7aeca, 0x12a23b2936bc9, 0x2a1bda56ae6e9, 0x00f94051ee040 },
+    },
+    {
+        { 0x793bb07af9753, 0x1e7b6ecd4fafd, 0x02c7b1560fb43, 0x2296734cc5fb7, 0x47b7ffd25dd40 },
+        { 0x56b23c3d330b2, 0x37608e360d1a6, 0x10ae0f3c8722e, 0x086d9b618b637, 0x07d79c7e8beab },
+        { 0x3fb9cbc08dd12, 0x75c3dd85370ff, 0x47f06fe2819ac, 0x5db06ab9215ed, 0x1c3520a35ea64 },
+    },
+    {
+        { 0x06f40216bc059, 0x3a2579b0fd9b5, 0x71c26407eec8c, 0x72ada4ab54f0b, 0x38750c3b66d12 },
+        { 0x253a6bccba34a, 0x427070433701a, 0x20b8e58f9870e, 0x337c861db00cc, 0x1c3d05775d0ee },
+        { 0x6f1409422e51a, 0x7856bbece2d25, 0x13380a72f031c, 0x43e1080a7f3ba, 0x0621e2c7d3304 },
+    },
+    {
+        { 0x61796b0dbf0f3, 0x73c2f9c32d6f5, 0x6aa8ed1537ebe, 0x74e92c91838f4, 0x5d8e589ca1002 },
+        { 0x060cc8259838d, 0x038d3f35b95f3, 0x56078c243a923, 0x2de3293241bb2, 0x0007d6097bd3a },
+        { 0x71d950842a94b, 0x46b11e5c7d817, 0x5478bbecb4f0d, 0x7c3054b0a1c5d, 0x1583d7783c1cb },
+    },
+    {
+        { 0x34704cc9d28c7, 0x3dee598b1f200, 0x16e1c98746d9e, 0x4050b7095afdf, 0x4958064e83c55 },
+        { 0x6a2ef5da27ae1, 0x28aace02e9d9d, 0x02459e965f0e8, 0x7b864d3150933, 0x252a5f2e81ed8 },
+        { 0x094265066e80d, 0x0a60f918d61a5, 0x0444bf7f30fde, 0x1c40da9ed3c06, 0x079c170bd843b },
+    },
+    {
+        { 0x6cd50c0d5d056, 0x5b7606ae779ba, 0x70fbd226bdda1, 0x5661e53391ff9, 0x6768c0d7317b8 },
+        { 0x6ece464fa6fff, 0x3cc40bca460a0, 0x6e3a90afb8d0c, 0x5801abca11228, 0x6dec05e34ac9f },
+        { 0x625e5f155c1b3, 0x4f32f6f723296, 0x5ac980105efce, 0x17a61165eee36, 0x51445e14ddcd5 },
+    },
+    {
+        { 0x147ab2bbea455, 0x1f240f2253126, 0x0c3de9e314e89, 0x21ea5a4fca45f, 0x12e990086e4fd },
+        { 0x02b4b3b144951, 0x5688977966aea, 0x18e176e399ffd, 0x2e45c5eb4938b, 0x13186f31e3929 },
+        { 0x496b37fdfbb2e, 0x3c2439d5f3e21, 0x16e60fe7e6a4d, 0x4d7ef889b621d, 0x77b2e3f05d3e9 },
+    },
+},
+{
+    {
+        { 0x0639c12ddb0a4, 0x6180490cd7ab3, 0x3f3918297467c, 0x74568be1781ac, 0x07a195152e095 },
+        { 0x7a9c59c2ec4de, 0x7e9f09e79652d, 0x6a3e422f22d86, 0x2ae8e3b836c8b, 0x63b795fc7ad32 },
+        { 0x68f02389e5fc8, 0x059f1bc877506, 0x504990e410cec, 0x09bd7d0feaee2, 0x3e8fe83d032f0 },
+    },
+    {
+        { 0x04c8de8efd13c, 0x1c67c06e6210e, 0x183378f7f146a, 0x64352ceaed289, 0x22d60899a6258 },
+        { 0x315b90570a294, 0x60ce108a925f1, 0x6eff61253c909, 0x003ef0e2d70b0, 0x75ba3b797fac4 },
+        { 0x1dbc070cdd196, 0x16d8fb1534c47, 0x500498183fa2a, 0x72f59c423de75, 0x0904d07b87779 },
+    },
+    {
+        { 0x22d6648f940b9, 0x197a5a1873e86, 0x207e4c41a54bc, 0x5360b3b4bd6d0, 0x6240aacebaf72 },
+        { 0x61fd4ddba919c, 0x7d8e991b55699, 0x61b31473cc76c, 0x7039631e631d6, 0x43e2143fbc1dd },
+        { 0x4749c5ba295a0, 0x37946fa4b5f06, 0x724c5ab5a51f1, 0x65633789dd3f3, 0x56bdaf238db40 },
+    },
+    {
+        { 0x0d36cc19d3bb2, 0x6ec4470d72262, 0x6853d7018a9ae, 0x3aa3e4dc2c8eb, 0x03aa31507e1e5 },
+        { 0x2b9e3f53533eb, 0x2add727a806c5, 0x56955c8ce15a3, 0x18c4f070a290e, 0x1d24a86d83741 },
+        { 0x47648ffd4ce1f, 0x60a9591839e9d, 0x424d5f38117ab, 0x42cc46912c10e, 0x43b261dc9aeb4 },
+    },
+    {
+        { 0x13d8b6c951364, 0x4c0017e8f632a, 0x53e559e53f9c4, 0x4b20146886eea, 0x02b4d5e242940 },
+        { 0x31e1988bb79bb, 0x7b82f46b3bcab, 0x0f7a8ce827b41, 0x5e15816177130, 0x326055cf5b276 },
+        { 0x155cb28d18df2, 0x0c30d9ca11694, 0x2090e27ab3119, 0x208624e7a49b6, 0x27a6c809ae5d3 },
+    },
+    {
+        { 0x4270ac43d6954, 0x2ed4cd95659a5, 0x75c0db37528f9, 0x2ccbcfd2c9234, 0x221503603d8c2 },
+        { 0x6ebcd1f0db188, 0x74ceb4b7d1174, 0x7d56168df4f5c, 0x0bf79176fd18a, 0x2cb67174ff60a },
+        { 0x6cdf9390be1d0, 0x08e519c7e2b3d, 0x253c3d2a50881, 0x21b41448e333d, 0x7b1df4b73890f },
+    },
+    {
+        { 0x6221807f8f58c, 0x3fa92813a8be5, 0x6da98c38d5572, 0x01ed95554468f, 0x68698245d352e },
+        { 0x2f2e0b3b2a224, 0x0c56aa22c1c92, 0x5fdec39f1b278, 0x4c90af5c7f106, 0x61fcef2658fc5 },
+        { 0x15d852a18187a, 0x270dbb59afb76, 0x7db120bcf92ab, 0x0e7a25d714087, 0x46cf4c473daf0 },
+    },
+    {
+        { 0x46ea7f1498140, 0x70725690a8427, 0x0a73ae9f079fb, 0x2dd924461c62b, 0x1065aae50d8cc },
+        { 0x525ed9ec4e5f9, 0x022d20660684c, 0x7972b70397b68, 0x7a03958d3f965, 0x29387bcd14eb5 },
+        { 0x44525df200d57, 0x2d7f94ce94385, 0x60d00c170ecb7, 0x38b0503f3d8f0, 0x69a198e64f1ce },
+    },
+},
+{
+    {
+        { 0x14434dcc5caed, 0x2c7909f667c20, 0x61a839d1fb576, 0x4f23800cabb76, 0x25b2697bd267f },
+        { 0x2b2e0d91a78bc, 0x3990a12ccf20c, 0x141c2e11f2622, 0x0dfcefaa53320, 0x7369e6a92493a },
+        { 0x73ffb13986864, 0x3282bb8f713ac, 0x49ced78f297ef, 0x6697027661def, 0x1420683db54e4 },
+    },
+    {
+        { 0x6bb6fc1cc5ad0, 0x532c8d591669d, 0x1af794da86c33, 0x0e0e9d86d24d3, 0x31e83b4161d08 },
+        { 0x0bd1e249dd197, 0x00bcb1820568f, 0x2eab1718830d4, 0x396fd816997e6, 0x60b63bebf508a },
+        { 0x0c7129e062b4f, 0x1e526415b12fd, 0x461a0fd27923d, 0x18badf670a5b7, 0x55cf1eb62d550 },
+    },
+    {
+        { 0x6b5e37df58c52, 0x3bcf33986c60e, 0x44fb8835ceae7, 0x099dec18e71a4, 0x1a56fbaa62ba0 },
+        { 0x1101065c23d58, 0x5aa1290338b0f, 0x3157e9e2e7421, 0x0ea712017d489, 0x669a656457089 },
+        { 0x66b505c9dc9ec, 0x774ef86e35287, 0x4d1d944c0955e, 0x52e4c39d72b20, 0x13c4836799c58 },
+    },
+    {
+        { 0x4fb6a5d8bd080, 0x58ae34908589b, 0x3954d977baf13, 0x413ea597441dc, 0x50bdc87dc8e5b },
+        { 0x25d465ab3e1b9, 0x0f8fe27ec2847, 0x2d6e6dbf04f06, 0x3038cfc1b3276, 0x66f80c93a637b },
+        { 0x537836edfe111, 0x2be02357b2c0d, 0x6dcee58c8d4f8, 0x2d732581d6192, 0x1dd56444725fd },
+    },
+    {
+        { 0x7e60008bac89a, 0x23d5c387c1852, 0x79e5df1f533a8, 0x2e6f9f1c5f0cf, 0x3a3a450f63a30 },
+        { 0x47ff83362127d, 0x08e39af82b1f4, 0x488322ef27dab, 0x1973738a2a1a4, 0x0e645912219f7 },
+        { 0x72f31d8394627, 0x07bd294a200f1, 0x665be00e274c6, 0x43de8f1b6368b, 0x318c8d9393a9a },
+    },
+    {
+        { 0x69e29ab1dd398, 0x30685b3c76bac, 0x565cf37f24859, 0x57b2ac28efef9, 0x509a41c325950 },
+        { 0x45d032afffe19, 0x12fe49b6cde4e, 0x21663bc327cf1, 0x18a5e4c69f1dd, 0x224c7c679a1d5 },
+        { 0x06edca6f925e9, 0x68c8363e677b8, 0x60cfa25e4fbcf, 0x1c4c17609404e, 0x05bff02328a11 },
+    },
+    {
+        { 0x1a0dd0dc512e4, 0x10894bf5fcd10, 0x52949013f9c37, 0x1f50fba4735c7, 0x576277cdee01a },
+        { 0x2137023cae00b, 0x15a3599eb26c6, 0x0687221512b3c, 0x253cb3a0824e9, 0x780b8cc3fa2a4 },
+        { 0x38abc234f305f, 0x7a280bbc103de, 0x398a836695dfe, 0x3d0af41528a1a, 0x5ff418726271b },
+    },
+    {
+        { 0x347e813b69540, 0x76864c21c3cbb, 0x1e049dbcd74a8, 0x5b4d60f93749c, 0x29d4db8ca0a0c },
+        { 0x6080c1789db9d, 0x4be7cef1ea731, 0x2f40d769d8080, 0x35f7d4c44a603, 0x106a03dc25a96 },
+        { 0x50aaf333353d0, 0x4b59a613cbb35, 0x223dfc0e19a76, 0x77d1e2bb2c564, 0x4ab38a51052cb },
+    },
+},
+{
+    {
+        { 0x7d1ef5fddc09c, 0x7beeaebb9dad9, 0x058d30ba0acfb, 0x5cd92eab5ae90, 0x3041c6bb04ed2 },
+        { 0x42b256768d593, 0x2e88459427b4f, 0x02b3876630701, 0x34878d405eae5, 0x29cdd1adc088a },
+        { 0x2f2f9d956e148, 0x6b3e6ad65c1fe, 0x5b00972b79e5d, 0x53d8d234c5daf, 0x104bbd6814049 },
+    },
+    {
+        { 0x59a5fd67ff163, 0x3a998ead0352b, 0x083c95fa4af9a, 0x6fadbfc01266f, 0x204f2a20fb072 },
+        { 0x0fd3168f1ed67, 0x1bb0de7784a3e, 0x34bcb78b20477, 0x0a4a26e2e2182, 0x5be8cc57092a7 },
+        { 0x43b3d30ebb079, 0x357aca5c61902, 0x5b570c5d62455, 0x30fb29e1e18c7, 0x2570fb17c2791 },
+    },
+    {
+        { 0x6a9550bb8245a, 0x511f20a1a2325, 0x29324d7239bee, 0x3343cc37516c4, 0x241c5f91de018 },
+        { 0x2367f2cb61575, 0x6c39ac04d87df, 0x6d4958bd7e5bd, 0x566f4638a1532, 0x3dcb65ea53030 },
+        { 0x0172940de6caa, 0x6045b2e67451b, 0x56c07463efcb3, 0x0728b6bfe6e91, 0x08420edd5fcdf },
+    },
+    {
+        { 0x0c34e04f410ce, 0x344edc0d0a06b, 0x6e45486d84d6d, 0x44e2ecb3863f5, 0x04d654f321db8 },
+        { 0x720ab8362fa4a, 0x29c4347cdd9bf, 0x0e798ad5f8463, 0x4fef18bcb0bfe, 0x0d9a53efbc176 },
+        { 0x5c116ddbdb5d5, 0x6d1b4bba5abcf, 0x4d28a48a5537a, 0x56b8e5b040b99, 0x4a7a4f2618991 },
+    },
+    {
+        { 0x3b291af372a4b, 0x60e3028fe4498, 0x2267bca4f6a09, 0x719eec242b243, 0x4a96314223e0e },
+        { 0x718025fb15f95, 0x68d6b8371fe94, 0x3804448f7d97c, 0x42466fe784280, 0x11b50c4cddd31 },
+        { 0x0274408a4ffd6, 0x7d382aedb34dd, 0x40acfc9ce385d, 0x628bb99a45b1e, 0x4f4bce4dce6bc },
+    },
+    {
+        { 0x2616ec49d0b6f, 0x1f95d8462e61c, 0x1ad3e9b9159c6, 0x79ba475a04df9, 0x3042cee561595 },
+        { 0x7ce5ae2242584, 0x2d25eb153d4e3, 0x3a8f3d09ba9c9, 0x0f3690d04eb8e, 0x73fcdd14b71c0 },
+        { 0x67079449bac41, 0x5b79c4621484f, 0x61069f2156b8d, 0x0eb26573b10af, 0x389e740c9a9ce },
+    },
+    {
+        { 0x578f6570eac28, 0x644f2339c3937, 0x66e47b7956c2c, 0x34832fe1f55d0, 0x25c425e5d6263 },
+        { 0x4b3ae34dcb9ce, 0x47c691a15ac9f, 0x318e06e5d400c, 0x3c422d9f83eb1, 0x61545379465a6 },
+        { 0x606a6f1d7de6e, 0x4f1c0c46107e7, 0x229b1dcfbe5d8, 0x3acc60a7b1327, 0x6539a08915484 },
+    },
+    {
+        { 0x4dbd414bb4a19, 0x7930849f1dbb8, 0x329c5a466caf0, 0x6c824544feb9b, 0x0f65320ef019b },
+        { 0x21f74c3d2f773, 0x024b88d08bd3a, 0x6e678cf054151, 0x43631272e747c, 0x11c5e4aac5cd1 },
+        { 0x6d1b1cafde0c6, 0x462c76a303a90, 0x3ca4e693cff9b, 0x3952cd45786fd, 0x4cabc7bdec330 },
+    },
+},
+{
+    {
+        { 0x7788f3f78d289, 0x5942809b3f811, 0x5973277f8c29c, 0x010f93bc5fe67, 0x7ee498165acb2 },
+        { 0x69624089c0a2e, 0x0075fc8e70473, 0x13e84ab1d2313, 0x2c10bedf6953b, 0x639b93f0321c8 },
+        { 0x508e39111a1c3, 0x290120e912f7a, 0x1cbf464acae43, 0x15373e9576157, 0x0edf493c85b60 },
+    },
+    {
+        { 0x7c4d284764113, 0x7fefebf06acec, 0x39afb7a824100, 0x1b48e47e7fd65, 0x04c00c54d1dfa },
+        { 0x48158599b5a68, 0x1fd75bc41d5d9, 0x2d9fc1fa95d3c, 0x7da27f20eba11, 0x403b92e3019d4 },
+        { 0x22f818b465cf8, 0x342901dff09b8, 0x31f595dc683cd, 0x37a57745fd682, 0x355bb12ab2617 },
+    },
+    {
+        { 0x1dac75a8c7318, 0x3b679d5423460, 0x6b8fcb7b6400e, 0x6c73783be5f9d, 0x7518eaf8e052a },
+        { 0x664cc7493bbf4, 0x33d94761874e3, 0x0179e1796f613, 0x1890535e2867d, 0x0f9b8132182ec },
+        { 0x059c41b7f6c32, 0x79e8706531491, 0x6c747643cb582, 0x2e20c0ad494e4, 0x47c3871bbb175 },
+    },
+    {
+        { 0x65d50c85066b0, 0x6167453361f7c, 0x06ba3818bb312, 0x6aff29baa7522, 0x08fea02ce8d48 },
+        { 0x4539771ec4f48, 0x7b9318badca28, 0x70f19afe016c5, 0x4ee7bb1608d23, 0x00b89b8576469 },
+        { 0x5dd7668deead0, 0x4096d0ba47049, 0x6275997219114, 0x29bda8a67e6ae, 0x473829a74f75d },
+    },
+    {
+        { 0x1533aad3902c9, 0x1dde06b11e47b, 0x784bed1930b77, 0x1c80a92b9c867, 0x6c668b4d44e4d },
+        { 0x2da754679c418, 0x3164c31be105a, 0x11fac2b98ef5f, 0x35a1aaf779256, 0x2078684c4833c },
+        { 0x0cf217a78820c, 0x65024e7d2e769, 0x23bb5efdda82a, 0x19fd4b632d3c6, 0x7411a6054f8a4 },
+    },
+    {
+        { 0x2e53d18b175b4, 0x33e7254204af3, 0x3bcd7d5a1c4c5, 0x4c7c22af65d0f, 0x1ec9a872458c3 },
+        { 0x59d32b99dc86d, 0x6ac075e22a9ac, 0x30b9220113371, 0x27fd9a638966e, 0x7c136574fb813 },
+        { 0x6a4d400a2509b, 0x041791056971c, 0x655d5866e075c, 0x2302bf3e64df8, 0x3add88a5c7cd6 },
+    },
+    {
+        { 0x298d459393046, 0x30bfecb3d90b8, 0x3d9b8ea3df8d6, 0x3900e96511579, 0x61ba1131a406a },
+        { 0x15770b635dcf2, 0x59ecd83f79571, 0x2db461c0b7fbd, 0x73a42a981345f, 0x249929fccc879 },
+        { 0x0a0f116959029, 0x5974fd7b1347a, 0x1e0cc1c08edad, 0x673bdf8ad1f13, 0x5620310cbbd8e },
+    },
+    {
+        { 0x6b5f477e285d6, 0x4ed91ec326cc8, 0x6d6537503a3fd, 0x626d3763988d5, 0x7ec846f3658ce },
+        { 0x193434934d643, 0x0d4a2445eaa51, 0x7d0708ae76fe0, 0x39847b6c3c7e1, 0x37676a2a4d9d9 },
+        { 0x68f3f1da22ec7, 0x6ed8039a2736b, 0x2627ee04c3c75, 0x6ea90a647e7d1, 0x6daaf723399b9 },
+    },
+},
+{
+    {
+        { 0x304bfacad8ea2, 0x502917d108b07, 0x043176ca6dd0f, 0x5d5158f2c1d84, 0x2b5449e58eb3b },
+        { 0x27562eb3dbe47, 0x291d7b4170be7, 0x5d1ca67dfa8e1, 0x2a88061f298a2, 0x1304e9e71627d },
+        { 0x014d26adc9cfe, 0x7f1691ba16f13, 0x5e71828f06eac, 0x349ed07f0fffc, 0x4468de2d7c2dd },
+    },
+    {
+        { 0x2d8c6f86307ce, 0x6286ba1850973, 0x5e9dcb08444d4, 0x1a96a543362b2, 0x5da6427e63247 },
+        { 0x3355e9419469e, 0x1847bb8ea8a37, 0x1fe6588cf9b71, 0x6b1c9d2db6b22, 0x6cce7c6ffb44b },
+        { 0x4c688deac22ca, 0x6f775c3ff0352, 0x565603ee419bb, 0x6544456c61c46, 0x58f29abfe79f2 },
+    },
+    {
+        { 0x264bf710ecdf6, 0x708c58527896b, 0x42ceae6c53394, 0x4381b21e82b6a, 0x6af93724185b4 },
+        { 0x6cfab8de73e68, 0x3e6efced4bd21, 0x0056609500dbe, 0x71b7824ad85df, 0x577629c4a7f41 },
+        { 0x0024509c6a888, 0x2696ab12e6644, 0x0cca27f4b80d8, 0x0c7c1f11b119e, 0x701f25bb0caec },
+    },
+    {
+        { 0x0f6d97cbec113, 0x4ce97fb7c93a3, 0x139835a11281b, 0x728907ada9156, 0x720a5bc050955 },
+        { 0x0b0f8e4616ced, 0x1d3c4b50fb875, 0x2f29673dc0198, 0x5f4b0f1830ffa, 0x2e0c92bfbdc40 },
+        { 0x709439b805a35, 0x6ec48557f8187, 0x08a4d1ba13a2c, 0x076348a0bf9ae, 0x0e9b9cbb144ef },
+    },
+    {
+        { 0x69bd55db1beee, 0x6e14e47f731bd, 0x1a35e47270eac, 0x66f225478df8e, 0x366d44191cfd3 },
+        { 0x2d48ffb5720ad, 0x57b7f21a1df77, 0x5550effba0645, 0x5ec6a4098a931, 0x221104eb3f337 },
+        { 0x41743f2bc8c14, 0x796b0ad8773c7, 0x29fee5cbb689b, 0x122665c178734, 0x4167a4e6bc593 },
+    },
+    {
+        { 0x62665f8ce8fee, 0x29d101ac59857, 0x4d93bbba59ffc, 0x17b7897373f17, 0x34b33370cb7ed },
+        { 0x39d2876f62700, 0x001cecd1d6c87, 0x7f01a11747675, 0x2350da5a18190, 0x7938bb7e22552 },
+        { 0x591ee8681d6cc, 0x39db0b4ea79b8, 0x202220f380842, 0x2f276ba42e0ac, 0x1176fc6e2dfe6 },
+    },
+    {
+        { 0x0e28949770eb8, 0x5559e88147b72, 0x35e1e6e63ef30, 0x35b109aa7ff6f, 0x1f6a3e54f2690 },
+        { 0x76cd05b9c619b, 0x69654b0901695, 0x7a53710b77f27, 0x79a1ea7d28175, 0x08fc3a4c677d5 },
+        { 0x4c199d30734ea, 0x6c622cb9acc14, 0x5660a55030216, 0x068f1199f11fb, 0x4f2fad0116b90 },
+    },
+    {
+        { 0x4d91db73bb638, 0x55f82538112c5, 0x6d85a279815de, 0x740b7b0cd9cf9, 0x3451995f2944e },
+        { 0x6b24194ae4e54, 0x2230afded8897, 0x23412617d5071, 0x3d5d30f35969b, 0x445484a4972ef },
+        { 0x2fcd09fea7d7c, 0x296126b9ed22a, 0x4a171012a05b2, 0x1db92c74d5523, 0x10b89ca604289 },
+    },
+},
+{
+    {
+        { 0x141be5a45f06e, 0x5adb38becaea7, 0x3fd46db41f2bb, 0x6d488bbb5ce39, 0x17d2d1d9ef0d4 },
+        { 0x147499718289c, 0x0a48a67e4c7ab, 0x30fbc544bafe3, 0x0c701315fe58a, 0x20b878d577b75 },
+        { 0x2af18073f3e6a, 0x33aea420d24fe, 0x298008bf4ff94, 0x3539171db961e, 0x72214f63cc65c },
+    },
+    {
+        { 0x5b7b9f43b29c9, 0x149ea31eea3b3, 0x4be7713581609, 0x2d87960395e98, 0x1f24ac855a154 },
+        { 0x37f405307a693, 0x2e5e66cf2b69c, 0x5d84266ae9c53, 0x5e4eb7de853b9, 0x5fdf48c58171c },
+        { 0x608328e9505aa, 0x22182841dc49a, 0x3ec96891d2307, 0x2f363fff22e03, 0x00ba739e2ae39 },
+    },
+    {
+        { 0x426f5ea88bb26, 0x33092e77f75c8, 0x1a53940d819e7, 0x1132e4f818613, 0x72297de7d518d },
+        { 0x698de5c8790d6, 0x268b8545beb25, 0x6d2648b96fedf, 0x47988ad1db07c, 0x03283a3e67ad7 },
+        { 0x41dc7be0cb939, 0x1b16c66100904, 0x0a24c20cbc66d, 0x4a2e9efe48681, 0x05e1296846271 },
+    },
+    {
+        { 0x7bbc8242c4550, 0x59a06103b35b7, 0x7237e4af32033, 0x726421ab3537a, 0x78cf25d38258c },
+        { 0x2eeb32d9c495a, 0x79e25772f9750, 0x6d747833bbf23, 0x6cdd816d5d749, 0x39c00c9c13698 },
+        { 0x66b8e31489d68, 0x573857e10e2b5, 0x13be816aa1472, 0x41964d3ad4bf8, 0x006b52076b3ff },
+    },
+    {
+        { 0x37e16b9ce082d, 0x1882f57853eb9, 0x7d29eacd01fc5, 0x2e76a59b5e715, 0x7de2e9561a9f7 },
+        { 0x0cfe19d95781c, 0x312cc621c453c, 0x145ace6da077c, 0x0912bef9ce9b8, 0x4d57e3443bc76 },
+        { 0x0d4f4b6a55ecb, 0x7ebb0bb733bce, 0x7ba6a05200549, 0x4f6ede4e22069, 0x6b2a90af1a602 },
+    },
+    {
+        { 0x3f3245bb2d80a, 0x0e5f720f36efd, 0x3b9cccf60c06d, 0x084e323f37926, 0x465812c8276c2 },
+        { 0x3f4fc9ae61e97, 0x3bc07ebfa2d24, 0x3b744b55cd4a0, 0x72553b25721f3, 0x5fd8f4e9d12d3 },
+        { 0x3beb22a1062d9, 0x6a7063b82c9a8, 0x0a5a35dc197ed, 0x3c80c06a53def, 0x05b32c2b1cb16 },
+    },
+    {
+        { 0x4a42c7ad58195, 0x5c8667e799eff, 0x02e5e74c850a1, 0x3f0db614e869a, 0x31771a4856730 },
+        { 0x05eccd24da8fd, 0x580bbfdf07918, 0x7e73586873c6a, 0x74ceddf77f93e, 0x3b5556a37b471 },
+        { 0x0c524e14dd482, 0x283457496c656, 0x0ad6bcfb6cd45, 0x375d1e8b02414, 0x4fc079d27a733 },
+    },
+    {
+        { 0x48b440c86c50d, 0x139929cca3b86, 0x0f8f2e44cdf2f, 0x68432117ba6b2, 0x241170c2bae3c },
+        { 0x138b089bf2f7f, 0x4a05bfd34ea39, 0x203914c925ef5, 0x7497fffe04e3c, 0x124567cecaf98 },
+        { 0x1ab860ac473b4, 0x5c0227c86a7ff, 0x71b12bfc24477, 0x006a573a83075, 0x3f8612966c870 },
+    },
+},
+{
+    {
+        { 0x0fcfa36048d13, 0x66e7133bbb383, 0x64b42a8a45676, 0x4ea6e4f9a85cf, 0x26f57eee878a1 },
+        { 0x20cc9782a0dde, 0x65d4e3070aab3, 0x7bc8e31547736, 0x09ebfb1432d98, 0x504aa77679736 },
+        { 0x32cd55687efb1, 0x4448f5e2f6195, 0x568919d460345, 0x034c2e0ad1a27, 0x4041943d9dba3 },
+    },
+    {
+        { 0x17743a26caadd, 0x48c9156f9c964, 0x7ef278d1e9ad0, 0x00ce58ea7bd01, 0x12d931429800d },
+        { 0x0eeba43ebcc96, 0x384dd5395f878, 0x1df331a35d272, 0x207ecfd4af70e, 0x1420a1d976843 },
+        { 0x67799d337594f, 0x01647548f6018, 0x57fce5578f145, 0x009220c142a71, 0x1b4f92314359a },
+    },
+    {
+        { 0x73030a49866b1, 0x2442be90b2679, 0x77bd3d8947dcf, 0x1fb55c1552028, 0x5ff191d56f9a2 },
+        { 0x4109d89150951, 0x225bd2d2d47cb, 0x57cc080e73bea, 0x6d71075721fcb, 0x239b572a7f132 },
+        { 0x6d433ac2d9068, 0x72bf930a47033, 0x64facf4a20ead, 0x365f7a2b9402a, 0x020c526a758f3 },
+    },
+    {
+        { 0x1ef59f042cc89, 0x3b1c24976dd26, 0x31d665cb16272, 0x28656e470c557, 0x452cfe0a5602c },
+        { 0x034f89ed8dbbc, 0x73b8f948d8ef3, 0x786c1d323caab, 0x43bd4a9266e51, 0x02aacc4615313 },
+        { 0x0f7a0647877df, 0x4e1cc0f93f0d4, 0x7ec4726ef1190, 0x3bdd58bf512f8, 0x4cfb7d7b304b8 },
+    },
+    {
+        { 0x699c29789ef12, 0x63beae321bc50, 0x325c340adbb35, 0x562e1a1e42bf6, 0x5b1d4cbc434d3 },
+        { 0x43d6cb89b75fe, 0x3338d5b900e56, 0x38d327d531a53, 0x1b25c61d51b9f, 0x14b4622b39075 },
+        { 0x32615cc0a9f26, 0x57711b99cb6df, 0x5a69c14e93c38, 0x6e88980a4c599, 0x2f98f71258592 },
+    },
+    {
+        { 0x2ae444f54a701, 0x615397afbc5c2, 0x60d7783f3f8fb, 0x2aa675fc486ba, 0x1d8062e9e7614 },
+        { 0x4a74cb50f9e56, 0x531d1c2640192, 0x0c03d9d6c7fd2, 0x57ccd156610c1, 0x3a6ae249d806a },
+        { 0x2da85a9907c5a, 0x6b23721ec4caf, 0x4d2d3a4683aa2, 0x7f9c6870efdef, 0x298b8ce8aef25 },
+    },
+    {
+        { 0x272ea0a2165de, 0x68179ef3ed06f, 0x4e2b9c0feac1e, 0x3ee290b1b63bb, 0x6ba6271803a7d },
+        { 0x27953eff70cb2, 0x54f22ae0ec552, 0x29f3da92e2724, 0x242ca0c22bd18, 0x34b8a8404d5ce },
+        { 0x6ecb583693335, 0x3ec76bfdfb84d, 0x2c895cf56a04f, 0x6355149d54d52, 0x71d62bdd465e1 },
+    },
+    {
+        { 0x5b5dab1f75ef5, 0x1e2d60cbeb9a5, 0x527c2175dfe57, 0x59e8a2b8ff51f, 0x1c333621262b2 },
+        { 0x3cc28d378df80, 0x72141f4968ca6, 0x407696bdb6d0d, 0x5d271b22ffcfb, 0x74d5f317f3172 },
+        { 0x7e55467d9ca81, 0x6a5653186f50d, 0x6b188ece62df1, 0x4c66d36844971, 0x4aebcc4547e9d },
+    },
+},
+{
+    {
+        { 0x08d9e7354b610, 0x26b750b6dc168, 0x162881e01acc9, 0x7966df31d01a5, 0x173bd9ddc9a1d },
+        { 0x0071b276d01c9, 0x0b0d8918e025e, 0x75beea79ee2eb, 0x3c92984094db8, 0x5d88fbf95a3db },
+        { 0x00f1efe5872df, 0x5da872318256a, 0x59ceb81635960, 0x18cf37693c764, 0x06e1cd13b19ea },
+    },
+    {
+        { 0x3af629e5b0353, 0x204f1a088e8e5, 0x10efc9ceea82e, 0x589863c2fa34b, 0x7f3a6a1a8d837 },
+        { 0x0ad516f166f23, 0x263f56d57c81a, 0x13422384638ca, 0x1331ff1af0a50, 0x3080603526e16 },
+        { 0x644395d3d800b, 0x2b9203dbedefc, 0x4b18ce656a355, 0x03f3466bc182c, 0x30d0fded2e513 },
+    },
+    {
+        { 0x4971e68b84750, 0x52ccc9779f396, 0x3e904ae8255c8, 0x4ecae46f39339, 0x4615084351c58 },
+        { 0x14d1af21233b3, 0x1de1989b39c0b, 0x52669dc6f6f9e, 0x43434b28c3fc7, 0x0a9214202c099 },
+        { 0x019c0aeb9a02e, 0x1a2c06995d792, 0x664cbb1571c44, 0x6ff0736fa80b2, 0x3bca0d2895ca5 },
+    },
+    {
+        { 0x08eb69ecc01bf, 0x5b4c8912df38d, 0x5ea7f8bc2f20e, 0x120e516caafaf, 0x4ea8b4038df28 },
+        { 0x031bc3c5d62a4, 0x7d9fe0f4c081e, 0x43ed51467f22c, 0x1e6cc0c1ed109, 0x5631deddae8f1 },
+        { 0x5460af1cad202, 0x0b4919dd0655d, 0x7c4697d18c14c, 0x231c890bba2a4, 0x24ce0930542ca },
+    },
+    {
+        { 0x7a155fdf30b85, 0x1c6c6e5d487f9, 0x24be1134bdc5a, 0x1405970326f32, 0x549928a7324f4 },
+        { 0x090f5fd06c106, 0x6abb1021e43fd, 0x232bcfad711a0, 0x3a5c13c047f37, 0x41d4e3c28a06d },
+        { 0x632a763ee1a2e, 0x6fa4bffbd5e4d, 0x5fd35a6ba4792, 0x7b55e1de99de8, 0x491b66dec0dcf },
+    },
+    {
+        { 0x04a8ed0da64a1, 0x5ecfc45096ebe, 0x5edee93b488b2, 0x5b3c11a51bc8f, 0x4cf6b8b0b7018 },
+        { 0x5b13dc7ea32a7, 0x18fc2db73131e, 0x7e3651f8f57e3, 0x25656055fa965, 0x08f338d0c85ee },
+        { 0x3a821991a73bd, 0x03be6418f5870, 0x1ddc18eac9ef0, 0x54ce09e998dc2, 0x530d4a82eb078 },
+    },
+    {
+        { 0x173456c9abf9e, 0x7892015100dad, 0x33ee14095fecb, 0x6ad95d67a0964, 0x0db3e7e00cbfb },
+        { 0x43630e1f94825, 0x4d1956a6b4009, 0x213fe2df8b5e0, 0x05ce3a41191e6, 0x65ea753f10177 },
+        { 0x6fc3ee2096363, 0x7ec36b96d67ac, 0x510ec6a0758b1, 0x0ed87df022109, 0x02a4ec1921e1a },
+    },
+    {
+        { 0x06162f1cf795f, 0x324ddcafe5eb9, 0x018d5e0463218, 0x7e78b9092428e, 0x36d12b5dec067 },
+        { 0x6259a3b24b8a2, 0x188b5f4170b9c, 0x681c0dee15deb, 0x4dfe665f37445, 0x3d143c5112780 },
+        { 0x5279179154557, 0x39f8f0741424d, 0x45e6eb357923d, 0x42c9b5edb746f, 0x2ef517885ba82 },
+    },
+},
+{
+    {
+        { 0x6bffb305b2f51, 0x5b112b2d712dd, 0x35774974fe4e2, 0x04af87a96e3a3, 0x57968290bb3a0 },
+        { 0x7974e8c58aedc, 0x7757e083488c6, 0x601c62ae7bc8b, 0x45370c2ecab74, 0x2f1b78fab143a },
+        { 0x2b8430a20e101, 0x1a49e1d88fee3, 0x38bbb47ce4d96, 0x1f0e7ba84d437, 0x7dc43e35dc2aa },
+    },
+    {
+        { 0x02a5c273e9718, 0x32bc9dfb28b4f, 0x48df4f8d5db1a, 0x54c87976c028f, 0x044fb81d82d50 },
+        { 0x66665887dd9c3, 0x629760a6ab0b2, 0x481e6c7243e6c, 0x097e37046fc77, 0x7ef72016758cc },
+        { 0x718c5a907e3d9, 0x3b9c98c6b383b, 0x006ed255eccdc, 0x6976538229a59, 0x7f79823f9c30d },
+    },
+    {
+        { 0x41ff068f587ba, 0x1c00a191bcd53, 0x7b56f9c209e25, 0x3781e5fccaabe, 0x64a9b0431c06d },
+        { 0x4d239a3b513e8, 0x29723f51b1066, 0x642f4cf04d9c3, 0x4da095aa09b7a, 0x0a4e0373d784d },
+        { 0x3d6a15b7d2919, 0x41aa75046a5d6, 0x691751ec2d3da, 0x23638ab6721c4, 0x071a7d0ace183 },
+    },
+    {
+        { 0x4355220e14431, 0x0e1362a283981, 0x2757cd8359654, 0x2e9cd7ab10d90, 0x7c69bcf761775 },
+        { 0x72daac887ba0b, 0x0b7f4ac5dda60, 0x3bdda2c0498a4, 0x74e67aa180160, 0x2c3bcc7146ea7 },
+        { 0x0d7eb04e8295f, 0x4a5ea1e6fa0fe, 0x45e635c436c60, 0x28ef4a8d4d18b, 0x6f5a9a7322aca },
+    },
+    {
+        { 0x1d4eba3d944be, 0x0100f15f3dce5, 0x61a700e367825, 0x5922292ab3d23, 0x02ab9680ee8d3 },
+        { 0x1000c2f41c6c5, 0x0219fdf737174, 0x314727f127de7, 0x7e5277d23b81e, 0x494e21a2e147a },
+        { 0x48a85dde50d9a, 0x1c1f734493df4, 0x47bdb64866889, 0x59a7d048f8eec, 0x6b5d76cbea46b },
+    },
+    {
+        { 0x141171e782522, 0x6806d26da7c1f, 0x3f31d1bc79ab9, 0x09f20459f5168, 0x16fb869c03dd3 },
+        { 0x7556cec0cd994, 0x5eb9a03b7510a, 0x50ad1dd91cb71, 0x1aa5780b48a47, 0x0ae333f685277 },
+        { 0x6199733b60962, 0x69b157c266511, 0x64740f893f1ca, 0x03aa408fbf684, 0x3f81e38b8f70d },
+    },
+    {
+        { 0x37f355f17c824, 0x07ae85334815b, 0x7e3abddd2e48f, 0x61eeabe1f45e5, 0x0ad3e2d34cded },
+        { 0x10fcc7ed9affe, 0x4248cb0e96ff2, 0x4311c115172e2, 0x4c9d41cbf6925, 0x50510fc104f50 },
+        { 0x40fc5336e249d, 0x3386639fb2de1, 0x7bbf871d17b78, 0x75f796b7e8004, 0x127c158bf0fa1 },
+    },
+    {
+        { 0x28fc4ae51b974, 0x26e89bfd2dbd4, 0x4e122a07665cf, 0x7cab1203405c3, 0x4ed82479d167d },
+        { 0x17c422e9879a2, 0x28a5946c8fec3, 0x53ab32e912b77, 0x7b44da09fe0a5, 0x354ef87d07ef4 },
+        { 0x3b52260c5d975, 0x79d6836171fdc, 0x7d994f140d4bb, 0x1b6c404561854, 0x302d92d205392 },
+    },
+},
+{
+    {
+        { 0x46fb6e4e0f177, 0x53497ad5265b7, 0x1ebdba01386fc, 0x0302f0cb36a3c, 0x0edc5f5eb426d },
+        { 0x3c1a2bca4283d, 0x23430c7bb2f02, 0x1a3ea1bb58bc2, 0x7265763de5c61, 0x10e5d3b76f1ca },
+        { 0x3bfd653da8e67, 0x584953ec82a8a, 0x55e288fa7707b, 0x5395fc3931d81, 0x45b46c51361cb },
+    },
+    {
+        { 0x54ddd8a7fe3e4, 0x2cecc41c619d3, 0x43a6562ac4d91, 0x4efa5aca7bdd9, 0x5c1c0aef32122 },
+        { 0x02abf314f7fa1, 0x391d19e8a1528, 0x6a2fa13895fc7, 0x09d8eddeaa591, 0x2177bfa36dcb7 },
+        { 0x01bbcfa79db8f, 0x3d84beb3666e1, 0x20c921d812204, 0x2dd843d3b32ce, 0x4ae619387d8ab },
+    },
+    {
+        { 0x17e44985bfb83, 0x54e32c626cc22, 0x096412ff38118, 0x6b241d61a246a, 0x75685abe5ba43 },
+        { 0x3f6aa5344a32e, 0x69683680f11bb, 0x04c3581f623aa, 0x701af5875cba5, 0x1a00d91b17bf3 },
+        { 0x60933eb61f2b2, 0x5193fe92a4dd2, 0x3d995a550f43e, 0x3556fb93a883d, 0x135529b623b0e },
+    },
+    {
+        { 0x716bce22e83fe, 0x33d0130b83eb8, 0x0952abad0afac, 0x309f64ed31b8a, 0x5972ea051590a },
+        { 0x0dbd7add1d518, 0x119f823e2231e, 0x451d66e5e7de2, 0x500c39970f838, 0x79b5b81a65ca3 },
+        { 0x4ac20dc8f7811, 0x29589a9f501fa, 0x4d810d26a6b4a, 0x5ede00d96b259, 0x4f7e9c95905f3 },
+    },
+    {
+        { 0x0443d355299fe, 0x39b7d7d5aee39, 0x692519a2f34ec, 0x6e4404924cf78, 0x1942eec4a144a },
+        { 0x74bbc5781302e, 0x73135bb81ec4c, 0x7ef671b61483c, 0x7264614ccd729, 0x31993ad92e638 },
+        { 0x45319ae234992, 0x2219d47d24fb5, 0x4f04488b06cf6, 0x53aaa9e724a12, 0x2a0a65314ef9c },
+    },
+    {
+        { 0x61acd3c1c793a, 0x58b46b78779e6, 0x3369aacbe7af2, 0x509b0743074d4, 0x055dc39b6dea1 },
+        { 0x7937ff7f927c2, 0x0c2fa14c6a5b6, 0x556bddb6dd07c, 0x6f6acc179d108, 0x4cf6e218647c2 },
+        { 0x1227cc28d5bb6, 0x78ee9bff57623, 0x28cb2241f893a, 0x25b541e3c6772, 0x121a307710aa2 },
+    },
+    {
+        { 0x1713ec77483c9, 0x6f70572d5facb, 0x25ef34e22ff81, 0x54d944f141188, 0x527bb94a6ced3 },
+        { 0x35d5e9f034a97, 0x126069785bc9b, 0x5474ec7854ff0, 0x296a302a348ca, 0x333fc76c7a40e },
+        { 0x5992a995b482e, 0x78dc707002ac7, 0x5936394d01741, 0x4fba4281aef17, 0x6b89069b20a7a },
+    },
+    {
+        { 0x2fa8cb5c7db77, 0x718e6982aa810, 0x39e95f81a1a1b, 0x5e794f3646cfb, 0x0473d308a7639 },
+        { 0x2a0416270220d, 0x75f248b69d025, 0x1cbbc16656a27, 0x5b9ffd6e26728, 0x23bc2103aa73e },
+        { 0x6792603589e05, 0x248db9892595d, 0x006a53cad2d08, 0x20d0150f7ba73, 0x102f73bfde043 },
+    },
+},
+{
+    {
+        { 0x4dae0b5511c9a, 0x5257fffe0d456, 0x54108d1eb2180, 0x096cc0f9baefa, 0x3f6bd725da4ea },
+        { 0x0b9ab7f5745c6, 0x5caf0f8d21d63, 0x7debea408ea2b, 0x09edb93896d16, 0x36597d25ea5c0 },
+        { 0x58d7b106058ac, 0x3cdf8d20bee69, 0x00a4cb765015e, 0x36832337c7cc9, 0x7b7ecc19da60d },
+    },
+    {
+        { 0x64a51a77cfa9b, 0x29cf470ca0db5, 0x4b60b6e0898d9, 0x55d04ddffe6c7, 0x03bedc661bf5c },
+        { 0x2373c695c690d, 0x4c0c8520dcf18, 0x384af4b7494b9, 0x4ab4a8ea22225, 0x4235ad7601743 },
+        { 0x0cb0d078975f5, 0x292313e530c4b, 0x38dbb9124a509, 0x350d0655a11f1, 0x0e7ce2b0cdf06 },
+    },
+    {
+        { 0x6fedfd94b70f9, 0x2383f9745bfd4, 0x4beae27c4c301, 0x75aa4416a3f3f, 0x615256138aece },
+        { 0x4643ac48c85a3, 0x6878c2735b892, 0x3a53523f4d877, 0x3a504ed8bee9d, 0x666e0a5d8fb46 },
+        { 0x3f64e4870cb0d, 0x61548b16d6557, 0x7a261773596f3, 0x7724d5f275d3a, 0x7f0bc810d514d },
+    },
+    {
+        { 0x49dad737213a0, 0x745dee5d31075, 0x7b1a55e7fdbe2, 0x5ba988f176ea1, 0x1d3a907ddec5a },
+        { 0x06ba426f4136f, 0x3cafc0606b720, 0x518f0a2359cda, 0x5fae5e46feca7, 0x0d1f8dbcf8eed },
+        { 0x693313ed081dc, 0x5b0a366901742, 0x40c872ca4ca7e, 0x6f18094009e01, 0x00011b44a31bf },
+    },
+    {
+        { 0x61f696a0aa75c, 0x38b0a57ad42ca, 0x1e59ab706fdc9, 0x01308d46ebfcd, 0x63d988a2d2851 },
+        { 0x7a06c3fc66c0c, 0x1c9bac1ba47fb, 0x23935c575038e, 0x3f0bd71c59c13, 0x3ac48d916e835 },
+        { 0x20753afbd232e, 0x71fbb1ed06002, 0x39cae47a4af3a, 0x0337c0b34d9c2, 0x33fad52b2368a },
+    },
+    {
+        { 0x4c8d0c422cfe8, 0x760b4275971a5, 0x3da95bc1cad3d, 0x0f151ff5b7376, 0x3cc355ccb90a7 },
+        { 0x649c6c5e41e16, 0x60667eee6aa80, 0x4179d182be190, 0x653d9567e6979, 0x16c0f429a256d },
+        { 0x69443903e9131, 0x16f4ac6f9dd36, 0x2ea4912e29253, 0x2b4643e68d25d, 0x631eaf426bae7 },
+    },
+    {
+        { 0x175b9a3700de8, 0x77c5f00aa48fb, 0x3917785ca0317, 0x05aa9b2c79399, 0x431f2c7f665f8 },
+        { 0x10410da66fe9f, 0x24d82dcb4d67d, 0x3e6fe0e17752d, 0x4dade1ecbb08f, 0x5599648b1ea91 },
+        { 0x26344858f7b19, 0x5f43d4a295ac0, 0x242a75c52acd4, 0x5934480220d10, 0x7b04715f91253 },
+    },
+    {
+        { 0x6c280c4e6bac6, 0x3ada3b361766e, 0x42fe5125c3b4f, 0x111d84d4aac22, 0x48d0acfa57cde },
+        { 0x5bd28acf6ae43, 0x16fab8f56907d, 0x7acb11218d5f2, 0x41fe02023b4db, 0x59b37bf5c2f65 },
+        { 0x726e47dabe671, 0x2ec45e746f6c1, 0x6580e53c74686, 0x5eda104673f74, 0x16234191336d3 },
+    },
+},
+{
+    {
+        { 0x19cd61ff38640, 0x060c6c4b41ba9, 0x75cf70ca7366f, 0x118a8f16c011e, 0x4a25707a203b9 },
+        { 0x499def6267ff6, 0x76e858108773c, 0x693cac5ddcb29, 0x00311d00a9ff4, 0x2cdfdfecd5d05 },
+        { 0x7668a53f6ed6a, 0x303ba2e142556, 0x3880584c10909, 0x4fe20000a261d, 0x5721896d248e4 },
+    },
+    {
+        { 0x55091a1d0da4e, 0x4f6bfc7c1050b, 0x64e4ecd2ea9be, 0x07eb1f28bbe70, 0x03c935afc4b03 },
+        { 0x65517fd181bae, 0x3e5772c76816d, 0x019189640898a, 0x1ed2a84de7499, 0x578edd74f63c1 },
+        { 0x276c6492b0c3d, 0x09bfc40bf932e, 0x588e8f11f330b, 0x3d16e694dc26e, 0x3ec2ab590288c },
+    },
+    {
+        { 0x13a09ae32d1cb, 0x3e81eb85ab4e4, 0x07aaca43cae1f, 0x62f05d7526374, 0x0e1bf66c6adba },
+        { 0x0d27be4d87bb9, 0x56c27235db434, 0x72e6e0ea62d37, 0x5674cd06ee839, 0x2dd5c25a200fc },
+        { 0x3d5e9792c887e, 0x319724dabbc55, 0x2b97c78680800, 0x7afdfdd34e6dd, 0x730548b35ae88 },
+    },
+    {
+        { 0x3094ba1d6e334, 0x6e126a7e3300b, 0x089c0aefcfbc5, 0x2eea11f836583, 0x585a2277d8784 },
+        { 0x551a3cba8b8ee, 0x3b6422be2d886, 0x630e1419689bc, 0x4653b07a7a955, 0x3043443b411db },
+        { 0x25f8233d48962, 0x6bd8f04aff431, 0x4f907fd9a6312, 0x40fd3c737d29b, 0x7656278950ef9 },
+    },
+    {
+        { 0x073a3ea86cf9d, 0x6e0e2abfb9c2e, 0x60e2a38ea33ee, 0x30b2429f3fe18, 0x28bbf484b613f },
+        { 0x3cf59d51fc8c0, 0x7a0a0d6de4718, 0x55c3a3e6fb74b, 0x353135f884fd5, 0x3f4160a8c1b84 },
+        { 0x12f5c6f136c7c, 0x0fedba237de4c, 0x779bccebfab44, 0x3aea93f4d6909, 0x1e79cb358188f },
+    },
+    {
+        { 0x153d8f5e08181, 0x08533bbdb2efd, 0x1149796129431, 0x17a6e36168643, 0x478ab52d39d1f },
+        { 0x436c3eef7e3f1, 0x7ffd3c21f0026, 0x3e77bf20a2da9, 0x418bffc8472de, 0x65d7951b3a3b3 },
+        { 0x6a4d39252d159, 0x790e35900ecd4, 0x30725bf977786, 0x10a5c1635a053, 0x16d87a411a212 },
+    },
+    {
+        { 0x4d5e2d54e0583, 0x2e5d7b33f5f74, 0x3a5de3f887ebf, 0x6ef24bd6139b7, 0x1f990b577a5a6 },
+        { 0x57e5a42066215, 0x1a18b44983677, 0x3e652de1e6f8f, 0x6532be02ed8eb, 0x28f87c8165f38 },
+        { 0x44ead1be8f7d6, 0x5759d4f31f466, 0x0378149f47943, 0x69f3be32b4f29, 0x45882fe1534d6 },
+    },
+    {
+        { 0x49929943c6fe4, 0x4347072545b15, 0x3226bced7e7c5, 0x03a134ced89df, 0x7dcf843ce405f },
+        { 0x1345d757983d6, 0x222f54234cccd, 0x1784a3d8adbb4, 0x36ebeee8c2bcc, 0x688fe5b8f626f },
+        { 0x0d6484a4732c0, 0x7b94ac6532d92, 0x5771b8754850f, 0x48dd9df1461c8, 0x6739687e73271 },
+    },
+},
+{
+    {
+        { 0x5cc9dc80c1ac0, 0x683671486d4cd, 0x76f5f1a5e8173, 0x6d5d3f5f9df4a, 0x7da0b8f68d7e7 },
+        { 0x02014385675a6, 0x6155fb53d1def, 0x37ea32e89927c, 0x059a668f5a82e, 0x46115aba1d4dc },
+        { 0x71953c3b5da76, 0x6642233d37a81, 0x2c9658076b1bd, 0x5a581e63010ff, 0x5a5f887e83674 },
+    },
+    {
+        { 0x628d3a0a643b9, 0x01cd8640c93d2, 0x0b7b0cad70f2c, 0x3864da98144be, 0x43e37ae2d5d1c },
+        { 0x301cf70a13d11, 0x2a6a1ba1891ec, 0x2f291fb3f3ae0, 0x21a7b814bea52, 0x3669b656e44d1 },
+        { 0x63f06eda6e133, 0x233342758070f, 0x098e0459cc075, 0x4df5ead6c7c1b, 0x6a21e6cd4fd5e },
+    },
+    {
+        { 0x129126699b2e3, 0x0ee11a2603de8, 0x60ac2f5c74c21, 0x59b192a196808, 0x45371b07001e8 },
+        { 0x6170a3046e65f, 0x5401a46a49e38, 0x20add5561c4a8, 0x7abb4edde9e46, 0x586bf9f1a195f },
+        { 0x3088d5ef8790b, 0x38c2126fcb4db, 0x685bae149e3c3, 0x0bcd601a4e930, 0x0eafb03790e52 },
+    },
+    {
+        { 0x0805e0f75ae1d, 0x464cc59860a28, 0x248e5b7b00bef, 0x5d99675ef8f75, 0x44ae3344c5435 },
+        { 0x555c13748042f, 0x4d041754232c0, 0x521b430866907, 0x3308e40fb9c39, 0x309acc675a02c },
+        { 0x289b9bba543ee, 0x3ab592e28539e, 0x64d82abcdd83a, 0x3c78ec172e327, 0x62d5221b7f946 },
+    },
+    {
+        { 0x5d4263af77a3c, 0x23fdd2289aeb0, 0x7dc64f77eb9ec, 0x01bd28338402c, 0x14f29a5383922 },
+        { 0x4299c18d0936d, 0x5914183418a49, 0x52a18c721aed5, 0x2b151ba82976d, 0x5c0efde4bc754 },
+        { 0x17edc25b2d7f5, 0x37336a6081bee, 0x7b5318887e5c3, 0x49f6d491a5be1, 0x5e72365c7bee0 },
+    },
+    {
+        { 0x339062f08b33e, 0x4bbf3e657cfb2, 0x67af7f56e5967, 0x4dbd67f9ed68f, 0x70b20555cb734 },
+        { 0x3fc074571217f, 0x3a0d29b2b6aeb, 0x06478ccdde59d, 0x55e4d051bddfa, 0x77f1104c47b4e },
+        { 0x113c555112c4c, 0x7535103f9b7ca, 0x140ed1d9a2108, 0x02522333bc2af, 0x0e34398f4a064 },
+    },
+    {
+        { 0x30b093e4b1928, 0x1ce7e7ec80312, 0x4e575bdf78f84, 0x61f7a190bed39, 0x6f8aded6ca379 },
+        { 0x522d93ecebde8, 0x024f045e0f6cf, 0x16db63426cfa1, 0x1b93a1fd30fd8, 0x5e5405368a362 },
+        { 0x0123dfdb7b29a, 0x4344356523c68, 0x79a527921ee5f, 0x74bfccb3e817e, 0x780de72ec8d3d },
+    },
+    {
+        { 0x7eaf300f42772, 0x5455188354ce3, 0x4dcca4a3dcbac, 0x3d314d0bfebcb, 0x1defc6ad32b58 },
+        { 0x28545089ae7bc, 0x1e38fe9a0c15c, 0x12046e0e2377b, 0x6721c560aa885, 0x0eb28bf671928 },
+        { 0x3be1aef5195a7, 0x6f22f62bdb5eb, 0x39768b8523049, 0x43394c8fbfdbd, 0x467d201bf8dd2 },
+    },
+},
+{
+    {
+        { 0x6f4bd567ae7a9, 0x65ac89317b783, 0x07d3b20fd8932, 0x000f208326916, 0x2ef9c5a5ba384 },
+        { 0x6919a74ef4fad, 0x59ed4611452bf, 0x691ec04ea09ef, 0x3cbcb2700e984, 0x71c43c4f5ba3c },
+        { 0x56df6fa9e74cd, 0x79c95e4cf56df, 0x7be643bc609e2, 0x149c12ad9e878, 0x5a758ca390c5f },
+    },
+    {
+        { 0x0918b1d61dc94, 0x0d350260cd19c, 0x7a2ab4e37b4d9, 0x21fea735414d7, 0x0a738027f639d },
+        { 0x72710d9462495, 0x25aafaa007456, 0x2d21f28eaa31b, 0x17671ea005fd0, 0x2dbae244b3eb7 },
+        { 0x74a2f57ffe1cc, 0x1bc3073087301, 0x7ec57f4019c34, 0x34e082e1fa524, 0x2698ca635126a },
+    },
+    {
+        { 0x5702f5e3dd90e, 0x31c9a4a70c5c7, 0x136a5aa78fc24, 0x1992f3b9f7b01, 0x3c004b0c4afa3 },
+        { 0x5318832b0ba78, 0x6f24b9ff17cec, 0x0a47f30e060c7, 0x58384540dc8d0, 0x1fb43dcc49cae },
+        { 0x146ac06f4b82b, 0x4b500d89e7355, 0x3351e1c728a12, 0x10b9f69932fe3, 0x6b43fd01cd1fd },
+    },
+    {
+        { 0x742583e760ef3, 0x73dc1573216b8, 0x4ae48fdd7714a, 0x4f85f8a13e103, 0x73420b2d6ff0d },
+        { 0x75d4b4697c544, 0x11be1fff7f8f4, 0x119e16857f7e1, 0x38a14345cf5d5, 0x5a68d7105b52f },
+        { 0x4f6cb9e851e06, 0x278c4471895e5, 0x7efcdce3d64e4, 0x64f6d455c4b4c, 0x3db5632fea34b },
+    },
+    {
+        { 0x190b1829825d5, 0x0e7d3513225c9, 0x1c12be3b7abae, 0x58777781e9ca6, 0x59197ea495df2 },
+        { 0x6ee2bf75dd9d8, 0x6c72ceb34be8d, 0x679c9cc345ec7, 0x7898df96898a4, 0x04321adf49d75 },
+        { 0x16019e4e55aae, 0x74fc5f25d209c, 0x4566a939ded0d, 0x66063e716e0b7, 0x45eafdc1f4d70 },
+    },
+    {
+        { 0x64624cfccb1ed, 0x257ab8072b6c1, 0x0120725676f0a, 0x4a018d04e8eee, 0x3f73ceea5d56d },
+        { 0x401858045d72b, 0x459e5e0ca2d30, 0x488b719308bea, 0x56f4a0d1b32b5, 0x5a5eebc80362d },
+        { 0x7bfd10a4e8dc6, 0x7c899366736f4, 0x55ebbeaf95c01, 0x46db060903f8a, 0x2605889126621 },
+    },
+    {
+        { 0x18e3cc676e542, 0x26079d995a990, 0x04a7c217908b2, 0x1dc7603e6655a, 0x0dedfa10b2444 },
+        { 0x704a68360ff04, 0x3cecc3cde8b3e, 0x21cd5470f64ff, 0x6abc18d953989, 0x54ad0c2e4e615 },
+        { 0x367d5b82b522a, 0x0d3f4b83d7dc7, 0x3067f4cdbc58d, 0x20452da697937, 0x62ecb2baa77a9 },
+    },
+    {
+        { 0x72836afb62874, 0x0af3c2094b240, 0x0c285297f357a, 0x7cc2d5680d6e3, 0x61913d5075663 },
+        { 0x5795261152b3d, 0x7a1dbbafa3cbd, 0x5ad31c52588d5, 0x45f3a4164685c, 0x2e59f919a966d },
+        { 0x62d361a3231da, 0x65284004e01b8, 0x656533be91d60, 0x6ae016c00a89f, 0x3ddbc2a131c05 },
+    },
+},
+{
+    {
+        { 0x257a22796bb14, 0x6f360fb443e75, 0x680e47220eaea, 0x2fcf2a5f10c18, 0x5ee7fb38d8320 },
+        { 0x40ff9ce5ec54b, 0x57185e261b35b, 0x3e254540e70a9, 0x1b5814003e3f8, 0x78968314ac04b },
+        { 0x5fdcb41446a8e, 0x5286926ff2a71, 0x0f231e296b3f6, 0x684a357c84693, 0x61d0633c9bca0 },
+    },
+    {
+        { 0x328bcf8fc73df, 0x3b4de06ff95b4, 0x30aa427ba11a5, 0x5ee31bfda6d9c, 0x5b23ac2df8067 },
+        { 0x44935ffdb2566, 0x12f016d176c6e, 0x4fbb00f16f5ae, 0x3fab78d99402a, 0x6e965fd847aed },
+        { 0x2b953ee80527b, 0x55f5bcdb1b35a, 0x43a0b3fa23c66, 0x76e07388b820a, 0x79b9bbb9dd95d },
+    },
+    {
+        { 0x17dae8e9f7374, 0x719f76102da33, 0x5117c2a80ca8b, 0x41a66b65d0936, 0x1ba811460accb },
+        { 0x355406a3126c2, 0x50d1918727d76, 0x6e5ea0b498e0e, 0x0a3b6063214f2, 0x5065f158c9fd2 },
+        { 0x169fb0c429954, 0x59aedd9ecee10, 0x39916eb851802, 0x57917555cc538, 0x3981f39e58a4f },
+    },
+    {
+        { 0x5dfa56de66fde, 0x0058809075908, 0x6d3d8cb854a94, 0x5b2f4e970b1e3, 0x30f4452edcbc1 },
+        { 0x38a7559230a93, 0x52c1cde8ba31f, 0x2a4f2d4745a3d, 0x07e9d42d4a28a, 0x38dc083705acd },
+        { 0x52782c5759740, 0x53f3397d990ad, 0x3a939c7e84d15, 0x234c4227e39e0, 0x632d9a1a593f2 },
+    },
+    {
+        { 0x1fd11ed0c84a7, 0x021b3ed2757e1, 0x73e1de58fc1c6, 0x5d110c84616ab, 0x3a5a7df28af64 },
+        { 0x36b15b807cba6, 0x3f78a9e1afed7, 0x0a59c2c608f1f, 0x52bdd8ecb81b7, 0x0b24f48847ed4 },
+        { 0x2d4be511beac7, 0x6bda4d99e5b9b, 0x17e6996914e01, 0x7b1f0ce7fcf80, 0x34fcf74475481 },
+    },
+    {
+        { 0x31dab78cfaa98, 0x4e3216e5e54b7, 0x249823973b689, 0x2584984e48885, 0x0119a3042fb37 },
+        { 0x7e04c789767ca, 0x1671b28cfb832, 0x7e57ea2e1c537, 0x1fbaaef444141, 0x3d3bdc164dfa6 },
+        { 0x2d89ce8c2177d, 0x6cd12ba182cf4, 0x20a8ac19a7697, 0x539fab2cc72d9, 0x56c088f1ede20 },
+    },
+    {
+        { 0x35fac24f38f02, 0x7d75c6197ab03, 0x33e4bc2a42fa7, 0x1c7cd10b48145, 0x038b7ea483590 },
+        { 0x53d1110a86e17, 0x6416eb65f466d, 0x41ca6235fce20, 0x5c3fc8a99bb12, 0x09674c6b99108 },
+        { 0x6f82199316ff8, 0x05d54f1a9f3e9, 0x3bcc5d0bd274a, 0x5b284b8d2d5ad, 0x6e5e31025969e },
+    },
+    {
+        { 0x4fb0e63066222, 0x130f59747e660, 0x041868fecd41a, 0x3105e8c923bc6, 0x3058ad43d1838 },
+        { 0x462f587e593fb, 0x3d94ba7ce362d, 0x330f9b52667b7, 0x5d45a48e0f00a, 0x08f5114789a8d },
+        { 0x40ffde57663d0, 0x71445d4c20647, 0x2653e68170f7c, 0x64cdee3c55ed6, 0x26549fa4efe3d },
+    },
+},
+{
+    {
+        { 0x68549af3f666e, 0x09e2941d4bb68, 0x2e8311f5dff3c, 0x6429ef91ffbd2, 0x3a10dfe132ce3 },
+        { 0x55a461e6bf9d6, 0x78eeef4b02e83, 0x1d34f648c16cf, 0x07fea2aba5132, 0x1926e1dc6401e },
+        { 0x74e8aea17cea0, 0x0c743f83fbc0f, 0x7cb03c4bf5455, 0x68a8ba9917e98, 0x1fa1d01d861e5 },
+    },
+    {
+        { 0x4ac00d1df94ab, 0x3ba2101bd271b, 0x7578988b9c4af, 0x0f2bf89f49f7e, 0x73fced18ee9a0 },
+        { 0x055947d599832, 0x346fe2aa41990, 0x0164c8079195b, 0x799ccfb7bba27, 0x773563bc6a75c },
+        { 0x1e90863139cb3, 0x4f8b407d9a0d6, 0x58e24ca924f69, 0x7a246bbe76456, 0x1f426b701b864 },
+    },
+    {
+        { 0x635c891a12552, 0x26aebd38ede2f, 0x66dc8faddae05, 0x21c7d41a03786, 0x0b76bb1b3fa7e },
+        { 0x1264c41911c01, 0x702f44584bdf9, 0x43c511fc68ede, 0x0482c3aed35f9, 0x4e1af5271d31b },
+        { 0x0c1f97f92939b, 0x17a88956dc117, 0x6ee005ef99dc7, 0x4aa9172b231cc, 0x7b6dd61eb772a },
+    },
+    {
+        { 0x0abf9ab01d2c7, 0x3880287630ae6, 0x32eca045beddb, 0x57f43365f32d0, 0x53fa9b659bff6 },
+        { 0x5c1e850f33d92, 0x1ec119ab9f6f5, 0x7f16f6de663e9, 0x7a7d6cb16dec6, 0x703e9bceaf1d2 },
+        { 0x4c8e994885455, 0x4ccb5da9cad82, 0x3596bc610e975, 0x7a80c0ddb9f5e, 0x398d93e5c4c61 },
+    },
+    {
+        { 0x77c60d2e7e3f2, 0x4061051763870, 0x67bc4e0ecd2aa, 0x2bb941f1373b9, 0x699c9c9002c30 },
+        { 0x3d16733e248f3, 0x0e2b7e14be389, 0x42c0ddaf6784a, 0x589ea1fc67850, 0x53b09b5ddf191 },
+        { 0x6a7235946f1cc, 0x6b99cbb2fbe60, 0x6d3a5d6485c62, 0x4839466e923c0, 0x51caf30c6fcdd },
+    },
+    {
+        { 0x2f99a18ac54c7, 0x398a39661ee6f, 0x384331e40cde3, 0x4cd15c4de19a6, 0x12ae29c189f8e },
+        { 0x3a7427674e00a, 0x6142f4f7e74c1, 0x4cc93318c3a15, 0x6d51bac2b1ee7, 0x5504aa292383f },
+        { 0x6c0cb1f0d01cf, 0x187469ef5d533, 0x27138883747bf, 0x2f52ae53a90e8, 0x5fd14fe958eba },
+    },
+    {
+        { 0x2fe5ebf93cb8e, 0x226da8acbe788, 0x10883a2fb7ea1, 0x094707842cf44, 0x7dd73f960725d },
+        { 0x42ddf2845ab2c, 0x6214ffd3276bb, 0x00b8d181a5246, 0x268a6d579eb20, 0x093ff26e58647 },
+        { 0x524fe68059829, 0x65b75e47cb621, 0x15eb0a5d5cc19, 0x05209b3929d5a, 0x2f59bcbc86b47 },
+    },
+    {
+        { 0x1d560b691c301, 0x7f5bafce3ce08, 0x4cd561614806c, 0x4588b6170b188, 0x2aa55e3d01082 },
+        { 0x47d429917135f, 0x3eacfa07af070, 0x1deab46b46e44, 0x7a53f3ba46cdf, 0x5458b42e2e51a },
+        { 0x192e60c07444f, 0x5ae8843a21daa, 0x6d721910b1538, 0x3321a95a6417e, 0x13e9004a8a768 },
+    },
+},
+{
+    {
+        { 0x600c9193b877f, 0x21c1b8a0d7765, 0x379927fb38ea2, 0x70d7679dbe01b, 0x5f46040898de9 },
+        { 0x58845832fcedb, 0x135cd7f0c6e73, 0x53ffbdfe8e35b, 0x22f195e06e55b, 0x73937e8814bce },
+        { 0x37116297bf48d, 0x45a9e0d069720, 0x25af71aa744ec, 0x41af0cb8aaba3, 0x2cf8a4e891d5e },
+    },
+    {
+        { 0x5487e17d06ba2, 0x3872a032d6596, 0x65e28c09348e0, 0x27b6bb2ce40c2, 0x7a6f7f2891d6a },
+        { 0x3fd8707110f67, 0x26f8716a92db2, 0x1cdaa1b753027, 0x504be58b52661, 0x2049bd6e58252 },
+        { 0x1fd8d6a9aef49, 0x7cb67b7216fa1, 0x67aff53c3b982, 0x20ea610da9628, 0x6011aadfc5459 },
+    },
+    {
+        { 0x6d0c802cbf890, 0x141bfed554c7b, 0x6dbb667ef4263, 0x58f3126857edc, 0x69ce18b779340 },
+        { 0x7926dcf95f83c, 0x42e25120e2bec, 0x63de96df1fa15, 0x4f06b50f3f9cc, 0x6fc5cc1b0b62f },
+        { 0x75528b29879cb, 0x79a8fd2125a3d, 0x27c8d4b746ab8, 0x0f8893f02210c, 0x15596b3ae5710 },
+    },
+    {
+        { 0x731167e5124ca, 0x17b38e8bbe13f, 0x3d55b942f9056, 0x09c1495be913f, 0x3aa4e241afb6d },
+        { 0x739d23f9179a2, 0x632fadbb9e8c4, 0x7c8522bfe0c48, 0x6ed0983ef5aa9, 0x0d2237687b5f4 },
+        { 0x138bf2a3305f5, 0x1f45d24d86598, 0x5274bad2160fe, 0x1b6041d58d12a, 0x32fcaa6e4687a },
+    },
+    {
+        { 0x7a4732787ccdf, 0x11e427c7f0640, 0x03659385f8c64, 0x5f4ead9766bfb, 0x746f6336c2600 },
+        { 0x56e8dc57d9af5, 0x5b3be17be4f78, 0x3bf928cf82f4b, 0x52e55600a6f11, 0x4627e9cefebd6 },
+        { 0x2f345ab6c971c, 0x653286e63e7e9, 0x51061b78a23ad, 0x14999acb54501, 0x7b4917007ed66 },
+    },
+    {
+        { 0x41b28dd53a2dd, 0x37be85f87ea86, 0x74be3d2a85e41, 0x1be87fac96ca6, 0x1d03620fe08cd },
+        { 0x5fb5cab84b064, 0x2513e778285b0, 0x457383125e043, 0x6bda3b56e223d, 0x122ba376f844f },
+        { 0x232cda2b4e554, 0x0422ba30ff840, 0x751e7667b43f5, 0x6261755da5f3e, 0x02c70bf52b68e },
+    },
+    {
+        { 0x532bf458d72e1, 0x40f96e796b59c, 0x22ef79d6f9da3, 0x501ab67beca77, 0x6b0697e3feb43 },
+        { 0x7ec4b5d0b2fbb, 0x200e910595450, 0x742057105715e, 0x2f07022530f60, 0x26334f0a409ef },
+        { 0x0f04adf62a3c0, 0x5e0edb48bb6d9, 0x7c34aa4fbc003, 0x7d74e4e5cac24, 0x1cc37f43441b2 },
+    },
+    {
+        { 0x656f1c9ceaeb9, 0x7031cacad5aec, 0x1308cd0716c57, 0x41c1373941942, 0x3a346f772f196 },
+        { 0x7565a5cc7324f, 0x01ca0d5244a11, 0x116b067418713, 0x0a57d8c55edae, 0x6c6809c103803 },
+        { 0x55112e2da6ac8, 0x6363d0a3dba5a, 0x319c98ba6f40c, 0x2e84b03a36ec7, 0x05911b9f6ef7c },
+    },
+},
+{
+    {
+        { 0x1acf3512eeaef, 0x2639839692a69, 0x669a234830507, 0x68b920c0603d4, 0x555ef9d1c64b2 },
+        { 0x39983f5df0ebb, 0x1ea2589959826, 0x6ce638703cdd6, 0x6311678898505, 0x6b3cecf9aa270 },
+        { 0x770ba3b73bd08, 0x11475f7e186d4, 0x0251bc9892bbc, 0x24eab9bffcc5a, 0x675f4de133817 },
+    },
+    {
+        { 0x7f6d93bdab31d, 0x1f3aca5bfd425, 0x2fa521c1c9760, 0x62180ce27f9cd, 0x60f450b882cd3 },
+        { 0x452036b1782fc, 0x02d95b07681c5, 0x5901cf99205b2, 0x290686e5eecb4, 0x13d99df70164c },
+        { 0x35ec321e5c0ca, 0x13ae337f44029, 0x4008e813f2da7, 0x640272f8e0c3a, 0x1c06de9e55eda },
+    },
+    {
+        { 0x52b40ff6d69aa, 0x31b8809377ffa, 0x536625cd14c2c, 0x516af252e17d1, 0x78096f8e7d32b },
+        { 0x77ad6a33ec4e2, 0x717c5dc11d321, 0x4a114559823e4, 0x306ce50a1e2b1, 0x4cf38a1fec2db },
+        { 0x2aa650dfa5ce7, 0x54916a8f19415, 0x00dc96fe71278, 0x55f2784e63eb8, 0x373cad3a26091 },
+    },
+    {
+        { 0x6a8fb89ddbbad, 0x78c35d5d97e37, 0x66e3674ef2cb2, 0x34347ac53dd8f, 0x21547eda5112a },
+        { 0x4634d82c9f57c, 0x4249268a6d652, 0x6336d687f2ff7, 0x4fe4f4e26d9a0, 0x0040f3d945441 },
+        { 0x5e939fd5986d3, 0x12a2147019bdf, 0x4c466e7d09cb2, 0x6fa5b95d203dd, 0x63550a334a254 },
+    },
+    {
+        { 0x2584572547b49, 0x75c58811c1377, 0x4d3c637cc171b, 0x33d30747d34e3, 0x39a92bafaa7d7 },
+        { 0x7d6edb569cf37, 0x60194a5dc2ca0, 0x5af59745e10a6, 0x7a8f53e004875, 0x3eea62c7daf78 },
+        { 0x4c713e693274e, 0x6ed1b7a6eb3a4, 0x62ace697d8e15, 0x266b8292ab075, 0x68436a0665c9c },
+    },
+    {
+        { 0x6d317e820107c, 0x090815d2ca3ca, 0x03ff1eb1499a1, 0x23960f050e319, 0x5373669c91611 },
+        { 0x235e8202f3f27, 0x44c9f2eb61780, 0x630905b1d7003, 0x4fcc8d274ead1, 0x17b6e7f68ab78 },
+        { 0x014ab9a0e5257, 0x09939567f8ba5, 0x4b47b2a423c82, 0x688d7e57ac42d, 0x1cb4b5a678f87 },
+    },
+    {
+        { 0x4aa62a2a007e7, 0x61e0e38f62d6e, 0x02f888fcc4782, 0x7562b83f21c00, 0x2dc0fd2d82ef6 },
+        { 0x4c06b394afc6c, 0x4931b4bf636cc, 0x72b60d0322378, 0x25127c6818b25, 0x330bca78de743 },
+        { 0x6ff841119744e, 0x2c560e8e49305, 0x7254fefe5a57a, 0x67ae2c560a7df, 0x3c31be1b369f1 },
+    },
+    {
+        { 0x0bc93f9cb4272, 0x3f8f9db73182d, 0x2b235eabae1c4, 0x2ddbf8729551a, 0x41cec1097e7d5 },
+        { 0x4864d08948aee, 0x5d237438df61e, 0x2b285601f7067, 0x25dbcbae6d753, 0x330b61134262d },
+        { 0x619d7a26d808a, 0x3c3b3c2adbef2, 0x6877c9eec7f52, 0x3beb9ebe1b66d, 0x26b44cd91f287 },
+    },
+},
+{
+    {
+        { 0x7f29362730383, 0x7fd7951459c36, 0x7504c512d49e7, 0x087ed7e3bc55f, 0x7deb10149c726 },
+        { 0x048478f387475, 0x69397d9678a3e, 0x67c8156c976f3, 0x2eb4d5589226c, 0x2c709e6c1c10a },
+        { 0x2af6a8766ee7a, 0x08aaa79a1d96c, 0x42f92d59b2fb0, 0x1752c40009c07, 0x08e68e9ff62ce },
+    },
+    {
+        { 0x509d50ab8f2f9, 0x1b8ab247be5e5, 0x5d9b2e6b2e486, 0x4faa5479a1339, 0x4cb13bd738f71 },
+        { 0x5500a4bc130ad, 0x127a17a938695, 0x02a26fa34e36d, 0x584d12e1ecc28, 0x2f1f3f87eeba3 },
+        { 0x48c75e515b64a, 0x75b6952071ef0, 0x5d46d42965406, 0x7746106989f9f, 0x19a1e353c0ae2 },
+    },
+    {
+        { 0x172cdd596bdbd, 0x0731ddf881684, 0x10426d64f8115, 0x71a4fd8a9a3da, 0x736bd3990266a },
+        { 0x47560bafa05c3, 0x418dcabcc2fa3, 0x35991cecf8682, 0x24371a94b8c60, 0x41546b11c20c3 },
+        { 0x32d509334b3b4, 0x16c102cae70aa, 0x1720dd51bf445, 0x5ae662faf9821, 0x412295a2b87fa },
+    },
+    {
+        { 0x55261e293eac6, 0x06426759b65cc, 0x40265ae116a48, 0x6c02304bae5bc, 0x0760bb8d195ad },
+        { 0x19b88f57ed6e9, 0x4cdbf1904a339, 0x42b49cd4e4f2c, 0x71a2e771909d9, 0x14e153ebb52d2 },
+        { 0x61a17cde6818a, 0x53dad34108827, 0x32b32c55c55b6, 0x2f9165f9347a3, 0x6b34be9bc33ac },
+    },
+    {
+        { 0x469656571f2d3, 0x0aa61ce6f423f, 0x3f940d71b27a1, 0x185f19d73d16a, 0x01b9c7b62e6dd },
+        { 0x72f643a78c0b2, 0x3de45c04f9e7b, 0x706d68d30fa5c, 0x696f63e8e2f24, 0x2012c18f0922d },
+        { 0x355e55ac89d29, 0x3e8b414ec7101, 0x39db07c520c90, 0x6f41e9b77efe1, 0x08af5b784e4ba },
+    },
+    {
+        { 0x314d289cc2c4b, 0x23450e2f1bc4e, 0x0cd93392f92f4, 0x1370c6a946b7d, 0x6423c1d5afd98 },
+        { 0x499dc881f2533, 0x34ef26476c506, 0x4d107d2741497, 0x346c4bd6efdb3, 0x32b79d71163a1 },
+        { 0x5f8d9edfcb36a, 0x1e6e8dcbf3990, 0x7974f348af30a, 0x6e6724ef19c7c, 0x480a5efbc13e2 },
+    },
+    {
+        { 0x14ce442ce221f, 0x18980a72516cc, 0x072f80db86677, 0x703331fda526e, 0x24b31d47691c8 },
+        { 0x1e70b01622071, 0x1f163b5f8a16a, 0x56aaf341ad417, 0x7989635d830f7, 0x47aa27600cb7b },
+        { 0x41eedc015f8c3, 0x7cf8d27ef854a, 0x289e3584693f9, 0x04a7857b309a7, 0x545b585d14dda },
+    },
+    {
+        { 0x4e4d0e3b321e1, 0x7451fe3d2ac40, 0x666f678eea98d, 0x038858667fead, 0x4d22dc3e64c8d },
+        { 0x7275ea0d43a0f, 0x681137dd7ccf7, 0x1e79cbab79a38, 0x22a214489a66a, 0x0f62f9c332ba5 },
+        { 0x46589d63b5f39, 0x7eaf979ec3f96, 0x4ebe81572b9a8, 0x21b7f5d61694a, 0x1c0fa01a36371 },
+    },
+},
+{
+    {
+        { 0x02b0e8c936a50, 0x6b83b58b6cd21, 0x37ed8d3e72680, 0x0a037db9f2a62, 0x4005419b1d2bc },
+        { 0x604b622943dff, 0x1c899f6741a58, 0x60219e2f232fb, 0x35fae92a7f9cb, 0x0fa3614f3b1ca },
+        { 0x3febdb9be82f0, 0x5e74895921400, 0x553ea38822706, 0x5a17c24cfc88c, 0x1fba218aef40a },
+    },
+    {
+        { 0x657043e7b0194, 0x5c11b55efe9e7, 0x7737bc6a074fb, 0x0eae41ce355cc, 0x6c535d13ff776 },
+        { 0x49448fac8f53e, 0x34f74c6e8356a, 0x0ad780607dba2, 0x7213a7eb63eb6, 0x392e3acaa8c86 },
+        { 0x534e93e8a35af, 0x08b10fd02c997, 0x26ac2acb81e05, 0x09d8c98ce3b79, 0x25e17fe4d50ac },
+    },
+    {
+        { 0x77ff576f121a7, 0x4e5f9b0fc722b, 0x46f949b0d28c8, 0x4cde65d17ef26, 0x6bba828f89698 },
+        { 0x09bd71e04f676, 0x25ac841f2a145, 0x1a47eac823871, 0x1a8a8c36c581a, 0x255751442a9fb },
+        { 0x1bc6690fe3901, 0x314132f5abc5a, 0x611835132d528, 0x5f24b8eb48a57, 0x559d504f7f6b7 },
+    },
+    {
+        { 0x091e7f6d266fd, 0x36060ef037389, 0x18788ec1d1286, 0x287441c478eb0, 0x123ea6a3354bd },
+        { 0x38378b3eb54d5, 0x4d4aaa78f94ee, 0x4a002e875a74d, 0x10b851367b17c, 0x01ab12d5807e3 },
+        { 0x5189041e32d96, 0x05b062b090231, 0x0c91766e7b78f, 0x0aa0f55a138ec, 0x4a3961e2c918a },
+    },
+    {
+        { 0x7d644f3233f1e, 0x1c69f9e02c064, 0x36ae5e5266898, 0x08fc1dad38b79, 0x68aceead9bd41 },
+        { 0x43be0f8e6bba0, 0x68fdffc614e3b, 0x4e91dab5b3be0, 0x3b1d4c9212ff0, 0x2cd6bce3fb1db },
+        { 0x4c90ef3d7c210, 0x496f5a0818716, 0x79cf88cc239b8, 0x2cb9c306cf8db, 0x595760d5b508f },
+    },
+    {
+        { 0x2cbebfd022790, 0x0b8822aec1105, 0x4d1cfd226bccc, 0x515b2fa4971be, 0x2cb2c5df54515 },
+        { 0x1bfe104aa6397, 0x11494ff996c25, 0x64251623e5800, 0x0d49fc5e044be, 0x709fa43edcb29 },
+        { 0x25d8c63fd2aca, 0x4c5cd29dffd61, 0x32ec0eb48af05, 0x18f9391f9b77c, 0x70f029ecf0c81 },
+    },
+    {
+        { 0x2afaa5e10b0b9, 0x61de08355254d, 0x0eb587de3c28d, 0x4f0bb9f7dbbd5, 0x44eca5a2a74bd },
+        { 0x307b32eed3e33, 0x6748ab03ce8c2, 0x57c0d9ab810bc, 0x42c64a224e98c, 0x0b7d5d8a6c314 },
+        { 0x448327b95d543, 0x0146681e3a4ba, 0x38714adc34e0c, 0x4f26f0e298e30, 0x272224512c7de },
+    },
+    {
+        { 0x3bb8a42a975fc, 0x6f2d5b46b17ef, 0x7b6a9223170e5, 0x053713fe3b7e6, 0x19735fd7f6bc2 },
+        { 0x492af49c5342e, 0x2365cdf5a0357, 0x32138a7ffbb60, 0x2a1f7d14646fe, 0x11b5df18a44cc },
+        { 0x390d042c84266, 0x1efe32a8fdc75, 0x6925ee7ae1238, 0x4af9281d0e832, 0x0fef911191df8 },
+    },
+},
+};
+#else
 /* base[i][j] = (j+1)*256^i*B */
 static const ge_precomp base[32][8] = {
 {
@@ -2113,6 +3460,7 @@ static const ge_precomp base[32][8] = {
  },
 },
 } ;
+#endif
 
 
 static void ge_select(ge_precomp *t,int pos,signed char b)
@@ -2221,7 +3569,50 @@ static void slide(signed char *r,const unsigned char *a)
     }
 }
 
-
+#ifdef HAVE___UINT128_T
+static const ge_precomp Bi[8] = {
+    {
+        { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b },
+        { 0x03905d740913e, 0x0ba2817d673a2, 0x23e2827f4e67c, 0x133d2e0c21a34, 0x44fd2f9298f81 },
+        { 0x11205877aaa68, 0x479955893d579, 0x50d66309b67a0, 0x2d42d0dbee5ee, 0x6f117b689f0c6 },
+    },
+    {
+        { 0x5b0a84cee9730, 0x61d10c97155e4, 0x4059cc8096a10, 0x47a608da8014f, 0x7a164e1b9a80f },
+        { 0x11fe8a4fcd265, 0x7bcb8374faacc, 0x52f5af4ef4d4f, 0x5314098f98d10, 0x2ab91587555bd },
+        { 0x6933f0dd0d889, 0x44386bb4c4295, 0x3cb6d3162508c, 0x26368b872a2c6, 0x5a2826af12b9b },
+    },
+    {
+        { 0x2bc4408a5bb33, 0x078ebdda05442, 0x2ffb112354123, 0x375ee8df5862d, 0x2945ccf146e20 },
+        { 0x182c3a447d6ba, 0x22964e536eff2, 0x192821f540053, 0x2f9f19e788e5c, 0x154a7e73eb1b5 },
+        { 0x3dbf1812a8285, 0x0fa17ba3f9797, 0x6f69cb49c3820, 0x34d5a0db3858d, 0x43aabe696b3bb },
+    },
+    {
+        { 0x25cd0944ea3bf, 0x75673b81a4d63, 0x150b925d1c0d4, 0x13f38d9294114, 0x461bea69283c9 },
+        { 0x72c9aaa3221b1, 0x267774474f74d, 0x064b0e9b28085, 0x3f04ef53b27c9, 0x1d6edd5d2e531 },
+        { 0x36dc801b8b3a2, 0x0e0a7d4935e30, 0x1deb7cecc0d7d, 0x053a94e20dd2c, 0x7a9fbb1c6a0f9 },
+    },
+    {
+        { 0x6678aa6a8632f, 0x5ea3788d8b365, 0x21bd6d6994279, 0x7ace75919e4e3, 0x34b9ed338add7 },
+        { 0x6217e039d8064, 0x6dea408337e6d, 0x57ac112628206, 0x647cb65e30473, 0x49c05a51fadc9 },
+        { 0x4e8bf9045af1b, 0x514e33a45e0d6, 0x7533c5b8bfe0f, 0x583557b7e14c9, 0x73c172021b008 },
+    },
+    {
+        { 0x700848a802ade, 0x1e04605c4e5f7, 0x5c0d01b9767fb, 0x7d7889f42388b, 0x4275aae2546d8 },
+        { 0x75b0249864348, 0x52ee11070262b, 0x237ae54fb5acd, 0x3bfd1d03aaab5, 0x18ab598029d5c },
+        { 0x32cc5fd6089e9, 0x426505c949b05, 0x46a18880c7ad2, 0x4a4221888ccda, 0x3dc65522b53df },
+    },
+    {
+        { 0x0c222a2007f6d, 0x356b79bdb77ee, 0x41ee81efe12ce, 0x120a9bd07097d, 0x234fd7eec346f },
+        { 0x7013b327fbf93, 0x1336eeded6a0d, 0x2b565a2bbf3af, 0x253ce89591955, 0x0267882d17602 },
+        { 0x0a119732ea378, 0x63bf1ba8e2a6c, 0x69f94cc90df9a, 0x431d1779bfc48, 0x497ba6fdaa097 },
+    },
+    {
+        { 0x6cc0313cfeaa0, 0x1a313848da499, 0x7cb534219230a, 0x39596dedefd60, 0x61e22917f12de },
+        { 0x3cd86468ccf0b, 0x48553221ac081, 0x6c9464b4e0a6e, 0x75fba84180403, 0x43b5cd4218d05 },
+        { 0x2762f9bd0b516, 0x1c6e7fbddcbb3, 0x75909c3ace2bd, 0x42101972d3ec9, 0x511d61210ae4d },
+    },
+};
+#else
 static const ge_precomp Bi[8] = {
  {
   { 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605 },
@@ -2264,6 +3655,7 @@ static const ge_precomp Bi[8] = {
   { -3099351,10324967,-2241613,7453183,-5446979,-2735503,-13812022,-16236442,-32461234,-12290683 },
  },
 } ;
+#endif
 
 
 /*
@@ -2327,17 +3719,30 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
   return 0;
 }
 
-
+#ifdef HAVE___UINT128_T
+static const fe d = {
+    0x34dca135978a3, 0x1a8283b156ebd, 0x5e7a26001c029, 0x739c663a03cbb,
+    0x52036cee2b6ff
+};
+#else
 static const fe d = {
 -10913610,13857413,-15372611,6949391,114729,
 -8787816,-6275908,-3247719,-18696448,-12055116
 } ;
+#endif
 
 
+#ifdef HAVE___UINT128_T
+static const fe sqrtm1 = {
+    0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, 0x78595a6804c9e,
+    0x2b8324804fc1d
+};
+#else
 static const fe sqrtm1 = {
 -32595792,-7943725,9377950,3500415,12389472,
 -272473,-25146209,-2005654,326686,11406482
 } ;
+#endif
 
 
 int ge_frombytes_negate_vartime(ge_p3 *h,const unsigned char *s)
@@ -2516,10 +3921,17 @@ void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p)
 r = p
 */
 
+#ifdef HAVE___UINT128_T
+static const fe d2 = {
+    0x69b9426b2f159, 0x35050762add7a, 0x3cf44c0038052, 0x6738cc7407977,
+    0x2406d9dc56dff
+};
+#else
 static const fe d2 = {
 -21827239,-5839606,-30745221,13898782,229458,
 15978800,-12551817,-6495438,29715968,9444199
 } ;
+#endif
 
 
 extern void ge_p3_to_cached(ge_cached *r,const ge_p3 *p)
diff --git a/wolfcrypt/src/hash.c b/wolfcrypt/src/hash.c
index bb03cde14..58a9c2251 100644
--- a/wolfcrypt/src/hash.c
+++ b/wolfcrypt/src/hash.c
@@ -388,82 +388,70 @@ int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type, byte* out)
 #if !defined(WOLFSSL_TI_HASH)
 
 #if !defined(NO_MD5)
-void wc_Md5GetHash(Md5* md5, byte* hash)
-{
-    Md5 save = *md5 ;
-    wc_Md5Final(md5, hash) ;
-    *md5 = save ;
-}
+    int wc_Md5Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret;
+    #ifdef WOLFSSL_SMALL_STACK
+        Md5* md5;
+    #else
+        Md5  md5[1];
+    #endif
 
-WOLFSSL_API void wc_Md5RestorePos(Md5* m1, Md5* m2) {
-    *m1 = *m2 ;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        md5 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (md5 == NULL)
+            return MEMORY_E;
+    #endif
 
-#endif
+        ret = wc_InitMd5(md5);
+        if (ret == 0) {
+            ret = wc_Md5Update(md5, data, len);
+            if (ret == 0) {
+                ret = wc_Md5Final(md5, hash);
+            }
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* !NO_MD5 */
 
 #if !defined(NO_SHA)
-int wc_ShaGetHash(Sha* sha, byte* hash)
-{
-    int ret ;
-    Sha save = *sha ;
-    ret = wc_ShaFinal(sha, hash) ;
-    *sha = save ;
-    return ret ;
-}
+    int wc_ShaHash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        Sha* sha;
+    #else
+        Sha sha[1];
+    #endif
 
-void wc_ShaRestorePos(Sha* s1, Sha* s2) {
-    *s1 = *s2 ;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha == NULL)
+            return MEMORY_E;
+    #endif
 
-int wc_ShaHash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha* sha;
-#else
-    Sha sha[1];
-#endif
+        if ((ret = wc_InitSha(sha)) != 0) {
+            WOLFSSL_MSG("wc_InitSha failed");
+        }
+        else {
+            wc_ShaUpdate(sha, data, len);
+            wc_ShaFinal(sha, hash);
+        }
 
-#ifdef WOLFSSL_SMALL_STACK
-    sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha == NULL)
-        return MEMORY_E;
-#endif
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
 
-    if ((ret = wc_InitSha(sha)) != 0) {
-        WOLFSSL_MSG("wc_InitSha failed");
+        return ret;
     }
-    else {
-        wc_ShaUpdate(sha, data, len);
-        wc_ShaFinal(sha, hash);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-
-}
-
-#endif /* !defined(NO_SHA) */
+#endif /* !NO_SHA */
 
 #if defined(WOLFSSL_SHA224)
-int wc_Sha224GetHash(Sha224* sha224, byte* hash)
-{
-    int ret;
-    Sha224 save;
-
-    if (sha224 == NULL || hash == NULL)
-        return BAD_FUNC_ARG;
-
-    save= *sha224;
-    ret = wc_Sha224Final(sha224, hash);
-    *sha224 = save;
-
-    return ret;
-}
-
 int wc_Sha224Hash(const byte* data, word32 len, byte* hash)
 {
     int ret = 0;
@@ -495,154 +483,109 @@ int wc_Sha224Hash(const byte* data, word32 len, byte* hash)
 
     return ret;
 }
-
-#endif /* defined(WOLFSSL_SHA224) */
+#endif /* WOLFSSL_SHA224 */
 
 #if !defined(NO_SHA256)
-int wc_Sha256GetHash(Sha256* sha256, byte* hash)
-{
-    int ret ;
-    Sha256 save = *sha256 ;
-    ret = wc_Sha256Final(sha256, hash) ;
-    *sha256 = save ;
-    return ret ;
-}
+    int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        Sha256* sha256;
+    #else
+        Sha256 sha256[1];
+    #endif
 
-void wc_Sha256RestorePos(Sha256* s1, Sha256* s2) {
-    *s1 = *s2 ;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        sha256 = (Sha256*)XMALLOC(sizeof(Sha256), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha256 == NULL)
+            return MEMORY_E;
+    #endif
 
-int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha256* sha256;
-#else
-    Sha256 sha256[1];
-#endif
+        if ((ret = wc_InitSha256(sha256)) != 0) {
+            WOLFSSL_MSG("InitSha256 failed");
+        }
+        else if ((ret = wc_Sha256Update(sha256, data, len)) != 0) {
+            WOLFSSL_MSG("Sha256Update failed");
+        }
+        else if ((ret = wc_Sha256Final(sha256, hash)) != 0) {
+            WOLFSSL_MSG("Sha256Final failed");
+        }
 
-#ifdef WOLFSSL_SMALL_STACK
-    sha256 = (Sha256*)XMALLOC(sizeof(Sha256), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha256 == NULL)
-        return MEMORY_E;
-#endif
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
 
-    if ((ret = wc_InitSha256(sha256)) != 0) {
-        WOLFSSL_MSG("InitSha256 failed");
+        return ret;
     }
-    else if ((ret = wc_Sha256Update(sha256, data, len)) != 0) {
-        WOLFSSL_MSG("Sha256Update failed");
-    }
-    else if ((ret = wc_Sha256Final(sha256, hash)) != 0) {
-        WOLFSSL_MSG("Sha256Final failed");
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
-#endif /* !defined(NO_SHA256) */
+#endif /* !NO_SHA256 */
 
 #endif /* !defined(WOLFSSL_TI_HASH) */
 
+
 #if defined(WOLFSSL_SHA512)
-int wc_Sha512GetHash(Sha512* sha512, byte* hash)
-{
-    int ret;
-    Sha512 save;
+    int wc_Sha512Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        Sha512* sha512;
+    #else
+        Sha512 sha512[1];
+    #endif
 
-    if (sha512 == NULL || hash == NULL)
-        return BAD_FUNC_ARG;
+    #ifdef WOLFSSL_SMALL_STACK
+        sha512 = (Sha512*)XMALLOC(sizeof(Sha512), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha512 == NULL)
+            return MEMORY_E;
+    #endif
 
-    save= *sha512;
-    ret = wc_Sha512Final(sha512, hash);
-    *sha512 = save;
+        if ((ret = wc_InitSha512(sha512)) != 0) {
+            WOLFSSL_MSG("InitSha512 failed");
+        }
+        else if ((ret = wc_Sha512Update(sha512, data, len)) != 0) {
+            WOLFSSL_MSG("Sha512Update failed");
+        }
+        else if ((ret = wc_Sha512Final(sha512, hash)) != 0) {
+            WOLFSSL_MSG("Sha512Final failed");
+        }
 
-    return ret;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
 
-int wc_Sha512Hash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha512* sha512;
-#else
-    Sha512 sha512[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    sha512 = (Sha512*)XMALLOC(sizeof(Sha512), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha512 == NULL)
-        return MEMORY_E;
-#endif
-
-    if ((ret = wc_InitSha512(sha512)) != 0) {
-        WOLFSSL_MSG("InitSha512 failed");
-    }
-    else if ((ret = wc_Sha512Update(sha512, data, len)) != 0) {
-        WOLFSSL_MSG("Sha512Update failed");
-    }
-    else if ((ret = wc_Sha512Final(sha512, hash)) != 0) {
-        WOLFSSL_MSG("Sha512Final failed");
+        return ret;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    #if defined(WOLFSSL_SHA384)
+        int wc_Sha384Hash(const byte* data, word32 len, byte* hash)
+        {
+            int ret = 0;
+        #ifdef WOLFSSL_SMALL_STACK
+            Sha384* sha384;
+        #else
+            Sha384 sha384[1];
+        #endif
 
-    return ret;
-}
+        #ifdef WOLFSSL_SMALL_STACK
+            sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (sha384 == NULL)
+                return MEMORY_E;
+        #endif
 
-#if defined(WOLFSSL_SHA384)
-int wc_Sha384GetHash(Sha384* sha384, byte* hash)
-{
-    int ret;
-    Sha384 save;
+            if ((ret = wc_InitSha384(sha384)) != 0) {
+                WOLFSSL_MSG("InitSha384 failed");
+            }
+            else if ((ret = wc_Sha384Update(sha384, data, len)) != 0) {
+                WOLFSSL_MSG("Sha384Update failed");
+            }
+            else if ((ret = wc_Sha384Final(sha384, hash)) != 0) {
+                WOLFSSL_MSG("Sha384Final failed");
+            }
 
-    if (sha384 == NULL || hash == NULL)
-        return BAD_FUNC_ARG;
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
 
-    save= *sha384;
-    ret = wc_Sha384Final(sha384, hash);
-    *sha384 = save;
-
-    return ret;
-}
-
-int wc_Sha384Hash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha384* sha384;
-#else
-    Sha384 sha384[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha384 == NULL)
-        return MEMORY_E;
-#endif
-
-    if ((ret = wc_InitSha384(sha384)) != 0) {
-        WOLFSSL_MSG("InitSha384 failed");
-    }
-    else if ((ret = wc_Sha384Update(sha384, data, len)) != 0) {
-        WOLFSSL_MSG("Sha384Update failed");
-    }
-    else if ((ret = wc_Sha384Final(sha384, hash)) != 0) {
-        WOLFSSL_MSG("Sha384Final failed");
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
-#endif /* defined(WOLFSSL_SHA384) */
-#endif /* defined(WOLFSSL_SHA512) */
+            return ret;
+        }
+    #endif /* WOLFSSL_SHA384 */
+#endif /* WOLFSSL_SHA512 */
diff --git a/wolfcrypt/src/hmac.c b/wolfcrypt/src/hmac.c
old mode 100644
new mode 100755
index e64fd0b4f..f1020aa28
--- a/wolfcrypt/src/hmac.c
+++ b/wolfcrypt/src/hmac.c
@@ -38,83 +38,79 @@
 #endif
 
 
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
-/* does init */
-int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz)
-{
-    return HmacSetKey_fips(hmac, type, key, keySz);
-}
-
-
-int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz)
-{
-    return HmacUpdate_fips(hmac, in, sz);
-}
-
-
-int wc_HmacFinal(Hmac* hmac, byte* out)
-{
-    return HmacFinal_fips(hmac, out);
-}
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    int  wc_HmacAsyncInit(Hmac* hmac, int i)
+    /* does init */
+    int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz)
     {
-        return HmacAsyncInit(hmac, i);
+        return HmacSetKey_fips(hmac, type, key, keySz);
+    }
+    int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz)
+    {
+        return HmacUpdate_fips(hmac, in, sz);
+    }
+    int wc_HmacFinal(Hmac* hmac, byte* out)
+    {
+        return HmacFinal_fips(hmac, out);
+    }
+    int wolfSSL_GetHmacMaxSize(void)
+    {
+        return CyaSSL_GetHmacMaxSize();
     }
 
-
-    void wc_HmacAsyncFree(Hmac* hmac)
+    int wc_HmacInit(Hmac* hmac, void* heap, int devId)
     {
-        HmacAsyncFree(hmac);
+        (void)hmac;
+        (void)heap;
+        (void)devId;
+        /* FIPS doesn't support:
+            return HmacInit(hmac, heap, devId); */
+        return 0;
+    }
+    void wc_HmacFree(Hmac* hmac)
+    {
+        (void)hmac;
+        /* FIPS doesn't support:
+            HmacFree(hmac); */
     }
-#endif
 
-int wolfSSL_GetHmacMaxSize(void)
-{
-    return CyaSSL_GetHmacMaxSize();
-}
-
-#ifdef HAVE_HKDF
-
-int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+    #ifdef HAVE_HKDF
+        int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
                     const byte* salt, word32 saltSz,
                     const byte* info, word32 infoSz,
                     byte* out, word32 outSz)
-{
-    return HKDF(type, inKey, inKeySz, salt, saltSz, info, infoSz, out, outSz);
-}
+        {
+            return HKDF(type, inKey, inKeySz, salt, saltSz,
+                info, infoSz, out, outSz);
+        }
+    #endif /* HAVE_HKDF */
 
-
-#endif /* HAVE_HKDF */
 #else /* else build without fips */
-#ifdef WOLFSSL_PIC32MZ_HASH
 
-#define wc_InitMd5   wc_InitMd5_sw
-#define wc_Md5Update wc_Md5Update_sw
-#define wc_Md5Final  wc_Md5Final_sw
-
-#define wc_InitSha   wc_InitSha_sw
-#define wc_ShaUpdate wc_ShaUpdate_sw
-#define wc_ShaFinal  wc_ShaFinal_sw
-
-#define wc_InitSha256   wc_InitSha256_sw
-#define wc_Sha256Update wc_Sha256Update_sw
-#define wc_Sha256Final  wc_Sha256Final_sw
-
-#endif
-
-#ifdef HAVE_FIPS
-    /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
-    #define FIPS_NO_WRAPPERS
-#endif
 
 #include 
 
 
+#ifdef WOLFSSL_PIC32MZ_HASH
+    #define wc_InitMd5   wc_InitMd5_sw
+    #define wc_Md5Update wc_Md5Update_sw
+    #define wc_Md5Final  wc_Md5Final_sw
+
+    #define wc_InitSha   wc_InitSha_sw
+    #define wc_ShaUpdate wc_ShaUpdate_sw
+    #define wc_ShaFinal  wc_ShaFinal_sw
+
+    #define wc_InitSha256   wc_InitSha256_sw
+    #define wc_Sha256Update wc_Sha256Update_sw
+    #define wc_Sha256Final  wc_Sha256Final_sw
+#endif /* WOLFSSL_PIC32MZ_HASH */
+
+
+
 int wc_HmacSizeByType(int type)
 {
+    int ret;
+
     if (!(type == MD5 || type == SHA    || type == SHA256 || type == SHA384
                       || type == SHA512 || type == BLAKE2B_ID
                       || type == SHA224)) {
@@ -124,121 +120,149 @@ int wc_HmacSizeByType(int type)
     switch (type) {
     #ifndef NO_MD5
         case MD5:
-            return MD5_DIGEST_SIZE;
-    #endif
+            ret = MD5_DIGEST_SIZE;
+            break;
+    #endif /* !NO_MD5 */
 
     #ifndef NO_SHA
         case SHA:
-            return SHA_DIGEST_SIZE;
-    #endif
+            ret = SHA_DIGEST_SIZE;
+            break;
+    #endif /* !NO_SHA */
 
     #ifdef WOLFSSL_SHA224
         case SHA224:
-            return SHA224_DIGEST_SIZE;
-    #endif
+            ret = SHA224_DIGEST_SIZE;
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
     #ifndef NO_SHA256
         case SHA256:
-            return SHA256_DIGEST_SIZE;
-    #endif
-
-    #ifdef WOLFSSL_SHA384
-        case SHA384:
-            return SHA384_DIGEST_SIZE;
-    #endif
+            ret = SHA256_DIGEST_SIZE;
+            break;
+    #endif /* !NO_SHA256 */
 
     #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
+        case SHA384:
+            ret = SHA384_DIGEST_SIZE;
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-            return SHA512_DIGEST_SIZE;
-    #endif
+            ret = SHA512_DIGEST_SIZE;
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
     #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-            return BLAKE2B_OUTBYTES;
-    #endif
+            ret = BLAKE2B_OUTBYTES;
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-            return BAD_FUNC_ARG;
+            ret = BAD_FUNC_ARG;
+            break;
     }
+
+    return ret;
 }
 
-static int InitHmac(Hmac* hmac, int type)
+static int _InitHmac(Hmac* hmac, int type, void* heap)
 {
     int ret = 0;
 
-    hmac->innerHashKeyed = 0;
-    hmac->macType = (byte)type;
-
-    if (!(type == MD5 || type == SHA    || type == SHA256 || type == SHA384
-                      || type == SHA512 || type == BLAKE2B_ID
-                      || type == SHA224))
-        return BAD_FUNC_ARG;
-
     switch (type) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
             ret = wc_InitMd5(&hmac->hash.md5);
-        break;
-        #endif
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
             ret = wc_InitSha(&hmac->hash.sha);
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
             ret = wc_InitSha224(&hmac->hash.sha224);
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
             ret = wc_InitSha256(&hmac->hash.sha256);
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
             ret = wc_InitSha384(&hmac->hash.sha384);
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
             ret = wc_InitSha512(&hmac->hash.sha512);
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
             ret = wc_InitBlake2b(&hmac->hash.blake2b, BLAKE2B_256);
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-            return BAD_FUNC_ARG;
+            ret = BAD_FUNC_ARG;
+            break;
     }
 
+    /* default to NULL heap hint or test value */
+#ifdef WOLFSSL_HEAP_TEST
+    hmac->heap = (void)WOLFSSL_HEAP_TEST;
+#else
+    hmac->heap = heap;
+#endif /* WOLFSSL_HEAP_TEST */
+
     return ret;
 }
 
 
 int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
 {
-    byte*  ip = (byte*) hmac->ipad;
-    byte*  op = (byte*) hmac->opad;
+    byte*  ip;
+    byte*  op;
     word32 i, hmac_block_size = 0;
-    int    ret;
+    int    ret = 0;
+    void*  heap = NULL;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
-        return NitroxHmacSetKey(hmac, type, key, length);
+    if (hmac == NULL || key == NULL ||
+        !(type == MD5 || type == SHA    || type == SHA256 || type == SHA384
+                      || type == SHA512 || type == BLAKE2B_ID
+                      || type == SHA224)) {
+        return BAD_FUNC_ARG;
     }
-#endif
 
-    ret = InitHmac(hmac, type);
+    hmac->innerHashKeyed = 0;
+    hmac->macType = (byte)type;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+    if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+    #if defined(HAVE_CAVIUM) || defined(HAVE_INTEL_QA)
+        if (length > HMAC_BLOCK_SIZE) {
+            return WC_KEY_SIZE_E;
+        }
+
+        XMEMCPY(hmac->keyRaw, key, length);
+        hmac->keyLen = (word16)length;
+
+        return 0; /* nothing to do here */
+    #endif /* HAVE_CAVIUM || HAVE_INTEL_QA */
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    ret = _InitHmac(hmac, type, heap);
     if (ret != 0)
         return ret;
 
@@ -247,46 +271,48 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
         return HMAC_MIN_KEYLEN_E;
 #endif
 
+    ip = (byte*)hmac->ipad;
+    op = (byte*)hmac->opad;
+
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-        {
             hmac_block_size = MD5_BLOCK_SIZE;
             if (length <= MD5_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
             }
             else {
                 ret = wc_Md5Update(&hmac->hash.md5, key, length);
-                if (ret != 0) {
-                    return ret;
-                }
+                if (ret != 0)
+                    break;
                 ret = wc_Md5Final(&hmac->hash.md5, ip);
-                if (ret != 0) {
-                    return ret;
-                }
+                if (ret != 0)
+                    break;
                 length = MD5_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-        {
             hmac_block_size = SHA_BLOCK_SIZE;
             if (length <= SHA_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
             }
             else {
-                wc_ShaUpdate(&hmac->hash.sha, key, length);
-                wc_ShaFinal(&hmac->hash.sha, ip);
+                ret = wc_ShaUpdate(&hmac->hash.sha, key, length);
+                if (ret != 0)
+                    break;
+                ret = wc_ShaFinal(&hmac->hash.sha, ip);
+                if (ret != 0)
+                    break;
+
                 length = SHA_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
         {
             hmac_block_size = SHA224_BLOCK_SIZE;
@@ -296,21 +322,19 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha224Update(&hmac->hash.sha224, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha224Final(&hmac->hash.sha224, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA224_DIGEST_SIZE;
             }
         }
         break;
-        #endif
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
-        {
     		hmac_block_size = SHA256_BLOCK_SIZE;
             if (length <= SHA256_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
@@ -318,21 +342,19 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha256Update(&hmac->hash.sha256, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha256Final(&hmac->hash.sha256, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA256_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
-        {
             hmac_block_size = SHA384_BLOCK_SIZE;
             if (length <= SHA384_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
@@ -340,21 +362,16 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha384Update(&hmac->hash.sha384, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha384Final(&hmac->hash.sha384, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA384_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-        {
             hmac_block_size = SHA512_BLOCK_SIZE;
             if (length <= SHA512_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
@@ -362,21 +379,18 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha512Update(&hmac->hash.sha512, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha512Final(&hmac->hash.sha512, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA512_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-        {
             hmac_block_size = BLAKE2B_BLOCKBYTES;
             if (length <= BLAKE2B_BLOCKBYTES) {
                 XMEMCPY(ip, key, length);
@@ -384,29 +398,31 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Blake2bUpdate(&hmac->hash.blake2b, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Blake2bFinal(&hmac->hash.blake2b, ip, BLAKE2B_256);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = BLAKE2B_256;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
             return BAD_FUNC_ARG;
     }
-    if (length < hmac_block_size)
-        XMEMSET(ip + length, 0, hmac_block_size - length);
 
-    for(i = 0; i < hmac_block_size; i++) {
-        op[i] = ip[i] ^ OPAD;
-        ip[i] ^= IPAD;
+    if (ret == 0) {
+        if (length < hmac_block_size)
+            XMEMSET(ip + length, 0, hmac_block_size - length);
+
+        for(i = 0; i < hmac_block_size; i++) {
+            op[i] = ip[i] ^ OPAD;
+            ip[i] ^= IPAD;
+        }
     }
-    return 0;
+
+    return ret;
 }
 
 
@@ -415,76 +431,60 @@ static int HmacKeyInnerHash(Hmac* hmac)
     int ret = 0;
 
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-            ret = wc_Md5Update(&hmac->hash.md5, (byte*) hmac->ipad,
-                                                    MD5_BLOCK_SIZE);
-            if (ret != 0) {
-                return ret;
-            }
-        break;
-        #endif
+            ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
+                                                                MD5_BLOCK_SIZE);
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*) hmac->ipad,
-                                                    SHA_BLOCK_SIZE);
-            if (ret != 0) {
-                return ret;
-            }
-        break;
-        #endif
+            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad,
+                                                                SHA_BLOCK_SIZE);
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
-            ret = wc_Sha224Update(&hmac->hash.sha224,
-                                         (byte*) hmac->ipad, SHA224_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
+                                                             SHA224_BLOCK_SIZE);
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
-            ret = wc_Sha256Update(&hmac->hash.sha256,
-                                         (byte*) hmac->ipad, SHA256_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
+                                                             SHA256_BLOCK_SIZE);
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
-            ret = wc_Sha384Update(&hmac->hash.sha384,
-                                         (byte*) hmac->ipad, SHA384_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
+                                                             SHA384_BLOCK_SIZE);
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-            ret = wc_Sha512Update(&hmac->hash.sha512,
-                                         (byte*) hmac->ipad, SHA512_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
+                                                             SHA512_BLOCK_SIZE);
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
-                                         (byte*) hmac->ipad,BLAKE2B_BLOCKBYTES);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->ipad,
+                                                            BLAKE2B_BLOCKBYTES);
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-        break;
+            break;
     }
 
-    hmac->innerHashKeyed = 1;
+    if (ret == 0)
+        hmac->innerHashKeyed = 1;
 
     return ret;
 }
@@ -492,13 +492,18 @@ static int HmacKeyInnerHash(Hmac* hmac)
 
 int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
 {
-    int ret;
+    int ret = 0;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+    #if defined(HAVE_CAVIUM)
         return NitroxHmacUpdate(hmac, msg, length);
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaHmac(&hmac->asyncDev, hmac->macType,
+            hmac->keyRaw, hmac->keyLen, NULL, msg, length);
+    #endif
     }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
     if (!hmac->innerHashKeyed) {
         ret = HmacKeyInnerHash(hmac);
@@ -507,69 +512,52 @@ int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
     }
 
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
             ret = wc_Md5Update(&hmac->hash.md5, msg, length);
-            if (ret != 0) {
-                return ret;
-            }
-        break;
-        #endif
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
             ret = wc_ShaUpdate(&hmac->hash.sha, msg, length);
-            if (ret != 0) {
-                return ret;
-            }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
             ret = wc_Sha224Update(&hmac->hash.sha224, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
             ret = wc_Sha256Update(&hmac->hash.sha256, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
             ret = wc_Sha384Update(&hmac->hash.sha384, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
             ret = wc_Sha512Update(&hmac->hash.sha512, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
             ret = wc_Blake2bUpdate(&hmac->hash.blake2b, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-        break;
+            break;
     }
 
-    return 0;
+    return ret;
 }
 
 
@@ -577,11 +565,20 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
 {
     int ret;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
-        return NitroxHmacFinal(hmac, hash);
+        int hashLen = wc_HmacSizeByType(hmac->macType);
+        if (hashLen <= 0)
+            return hashLen;
+
+    #if defined(HAVE_CAVIUM)
+        return NitroxHmacFinal(hmac, hmac->macType, hash, hashLen);
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaHmac(&hmac->asyncDev, hmac->macType,
+            hmac->keyRaw, hmac->keyLen, hash, NULL, hashLen);
+    #endif
     }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
     if (!hmac->innerHashKeyed) {
         ret = HmacKeyInnerHash(hmac);
@@ -590,241 +587,184 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
     }
 
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-        {
-            ret = wc_Md5Final(&hmac->hash.md5, (byte*) hmac->innerHash);
-            if (ret != 0) {
-                return ret;
-            }
-
-            ret = wc_Md5Update(&hmac->hash.md5, (byte*) hmac->opad,
-                                                        MD5_BLOCK_SIZE);
-            if (ret != 0) {
-                return ret;
-            }
-
-            ret = wc_Md5Update(&hmac->hash.md5,
-                                     (byte*) hmac->innerHash, MD5_DIGEST_SIZE);
-            if (ret != 0) {
-                return ret;
-            }
-
+            ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash);
+            if (ret != 0)
+                break;
+            ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
+                                                                MD5_BLOCK_SIZE);
+            if (ret != 0)
+                break;
+            ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
+                                                               MD5_DIGEST_SIZE);
+            if (ret != 0)
+                break;
             ret = wc_Md5Final(&hmac->hash.md5, hash);
-            if (ret != 0) {
-                return ret;
-            }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-        {
-            ret = wc_ShaFinal(&hmac->hash.sha, (byte*) hmac->innerHash);
-            if (ret != 0) {
-                return ret;
-            }
-            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*) hmac->opad,
-                                                            SHA_BLOCK_SIZE);
-            if (ret != 0) {
-                return ret;
-            }
-            ret = wc_ShaUpdate(&hmac->hash.sha,
-                                     (byte*) hmac->innerHash, SHA_DIGEST_SIZE);
-            if (ret != 0) {
-                return ret;
-            }
+            ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash);
+            if (ret != 0)
+                break;
+            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
+                                                                SHA_BLOCK_SIZE);
+            if (ret != 0)
+                break;
+            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
+                                                               SHA_DIGEST_SIZE);
+            if (ret != 0)
+                break;
             ret = wc_ShaFinal(&hmac->hash.sha, hash);
-            if (ret != 0) {
-                return ret;
-            }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
         {
-            ret = wc_Sha224Final(&hmac->hash.sha224, (byte*) hmac->innerHash);
+            ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha224Update(&hmac->hash.sha224,
-                                 (byte*) hmac->opad, SHA224_BLOCK_SIZE);
+                break;
+            ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
+                                                             SHA224_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha224Update(&hmac->hash.sha224,
-                                 (byte*) hmac->innerHash, SHA224_DIGEST_SIZE);
+                break;
+            ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
+                                                            SHA224_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha224Final(&hmac->hash.sha224, hash);
             if (ret != 0)
-                return ret;
+                break;
         }
         break;
-        #endif
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
-        {
-            ret = wc_Sha256Final(&hmac->hash.sha256, (byte*) hmac->innerHash);
+            ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha256Update(&hmac->hash.sha256,
-                                (byte*) hmac->opad, SHA256_BLOCK_SIZE);
+                break;
+            ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
+                                                             SHA256_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha256Update(&hmac->hash.sha256,
-                                (byte*) hmac->innerHash, SHA256_DIGEST_SIZE);
+                break;
+            ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
+                                                            SHA256_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha256Final(&hmac->hash.sha256, hash);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
-        {
-            ret = wc_Sha384Final(&hmac->hash.sha384, (byte*) hmac->innerHash);
+            ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha384Update(&hmac->hash.sha384,
-                                 (byte*) hmac->opad, SHA384_BLOCK_SIZE);
+                break;
+            ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
+                                                             SHA384_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha384Update(&hmac->hash.sha384,
-                                 (byte*) hmac->innerHash, SHA384_DIGEST_SIZE);
+                break;
+            ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
+                                                            SHA384_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha384Final(&hmac->hash.sha384, hash);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-        {
-            ret = wc_Sha512Final(&hmac->hash.sha512, (byte*) hmac->innerHash);
+            ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha512Update(&hmac->hash.sha512,
-                                 (byte*) hmac->opad, SHA512_BLOCK_SIZE);
+                break;
+            ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
+                                                             SHA512_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha512Update(&hmac->hash.sha512,
-                                 (byte*) hmac->innerHash, SHA512_DIGEST_SIZE);
+                break;
+            ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
+                                                            SHA512_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha512Final(&hmac->hash.sha512, hash);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-        {
-            ret = wc_Blake2bFinal(&hmac->hash.blake2b, (byte*) hmac->innerHash,
-                         BLAKE2B_256);
+            ret = wc_Blake2bFinal(&hmac->hash.blake2b, (byte*)hmac->innerHash,
+                                                                   BLAKE2B_256);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
-                                 (byte*) hmac->opad, BLAKE2B_BLOCKBYTES);
+                break;
+            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->opad,
+                                                            BLAKE2B_BLOCKBYTES);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
-                                 (byte*) hmac->innerHash, BLAKE2B_256);
+                break;
+            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->innerHash,
+                                                                   BLAKE2B_256);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Blake2bFinal(&hmac->hash.blake2b, hash, BLAKE2B_256);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-        break;
+            ret = BAD_FUNC_ARG;
+            break;
     }
 
-    hmac->innerHashKeyed = 0;
+    if (ret == 0) {
+        hmac->innerHashKeyed = 0;
+    }
 
-    return 0;
+    return ret;
 }
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Hmac for use with Nitrox device */
-int wc_HmacAsyncInit(Hmac* hmac, int devId)
+/* Initialize Hmac for use with async device */
+int wc_HmacInit(Hmac* hmac, void* heap, int devId)
 {
     int ret = 0;
 
     if (hmac == NULL)
-        return -1;
+        return BAD_FUNC_ARG;
 
-    ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC, devId);
-    if (ret != 0) {
-        return ret;
-    }
+    XMEMSET(hmac, 0, sizeof(Hmac));
+    hmac->heap = heap;
 
-#ifdef HAVE_CAVIUM
-    hmac->keyLen  = 0;
-    hmac->dataLen = 0;
-    hmac->type    = 0;
-    hmac->data    = NULL;        /* buffered input data */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+    hmac->keyLen = 0;
+    #ifdef HAVE_CAVIUM
+        hmac->dataLen = 0;
+        hmac->data    = NULL;        /* buffered input data */
+    #endif /* HAVE_CAVIUM */
 
-    hmac->innerHashKeyed = 0;
-#endif /* HAVE_CAVIUM */
-
-    /* default to NULL heap hint or test value */
-#ifdef WOLFSSL_HEAP_TEST
-    hmac->heap = (void)WOLFSSL_HEAP_TEST;
+    ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC,
+                                                         hmac->heap, devId);
 #else
-    hmac->heap = NULL;
-#endif /* WOLFSSL_HEAP_TEST */
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
-    return 0;
+    return ret;
 }
 
-
-/* Free Hmac from use with Nitrox device */
-void wc_HmacAsyncFree(Hmac* hmac)
+/* Free Hmac from use with async device */
+void wc_HmacFree(Hmac* hmac)
 {
     if (hmac == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&hmac->asyncDev);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+    wolfAsync_DevCtxFree(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC);
 
 #ifdef HAVE_CAVIUM
-    XFREE(hmac->data, hmac->heap, DYNAMIC_TYPE_ASYNC_TMP);
+    XFREE(hmac->data, hmac->heap, DYNAMIC_TYPE_HMAC);
     hmac->data = NULL;
-#endif
-}
-
+#endif /* HAVE_CAVIUM */
 #endif /* WOLFSSL_ASYNC_CRYPT */
-
+}
 
 int wolfSSL_GetHmacMaxSize(void)
 {
@@ -832,92 +772,91 @@ int wolfSSL_GetHmacMaxSize(void)
 }
 
 #ifdef HAVE_HKDF
+    /* HMAC-KDF with hash type, optional salt and info, return 0 on success */
+    int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+                       const byte* salt,  word32 saltSz,
+                       const byte* info,  word32 infoSz,
+                       byte* out,         word32 outSz)
+    {
+        Hmac   myHmac;
+    #ifdef WOLFSSL_SMALL_STACK
+        byte* tmp;
+        byte* prk;
+    #else
+        byte   tmp[MAX_DIGEST_SIZE]; /* localSalt helper and T */
+        byte   prk[MAX_DIGEST_SIZE];
+    #endif
+        const  byte* localSalt;  /* either points to user input or tmp */
+        int    hashSz = wc_HmacSizeByType(type);
+        word32 outIdx = 0;
+        byte   n = 0x1;
+        int    ret;
 
-/* HMAC-KDF with hash type, optional salt and info, return 0 on success */
-int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
-                   const byte* salt,  word32 saltSz,
-                   const byte* info,  word32 infoSz,
-                   byte* out,         word32 outSz)
-{
-    Hmac   myHmac;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* tmp;
-    byte* prk;
-#else
-    byte   tmp[MAX_DIGEST_SIZE]; /* localSalt helper and T */
-    byte   prk[MAX_DIGEST_SIZE];
-#endif
-    const  byte* localSalt;  /* either points to user input or tmp */
-    int    hashSz = wc_HmacSizeByType(type);
-    word32 outIdx = 0;
-    byte   n = 0x1;
-    int    ret;
+        if (hashSz < 0)
+            return BAD_FUNC_ARG;
 
-    if (hashSz < 0)
-        return BAD_FUNC_ARG;
+    #ifdef WOLFSSL_SMALL_STACK
+        tmp = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (tmp == NULL)
+            return MEMORY_E;
 
-#ifdef WOLFSSL_SMALL_STACK
-    tmp = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (tmp == NULL)
-        return MEMORY_E;
-
-    prk = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (prk == NULL) {
-        XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-
-    localSalt = salt;
-    if (localSalt == NULL) {
-        XMEMSET(tmp, 0, hashSz);
-        localSalt = tmp;
-        saltSz    = hashSz;
-    }
-
-    ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz);
-    if (ret == 0)
-        ret = wc_HmacUpdate(&myHmac, inKey, inKeySz);
-    if (ret == 0)
-        ret = wc_HmacFinal(&myHmac,  prk);
-
-    if (ret == 0) {
-        while (outIdx < outSz) {
-            int    tmpSz = (n == 1) ? 0 : hashSz;
-            word32 left = outSz - outIdx;
-
-            ret = wc_HmacSetKey(&myHmac, type, prk, hashSz);
-            if (ret != 0)
-                break;
-            ret = wc_HmacUpdate(&myHmac, tmp, tmpSz);
-            if (ret != 0)
-                break;
-            ret = wc_HmacUpdate(&myHmac, info, infoSz);
-            if (ret != 0)
-                break;
-            ret = wc_HmacUpdate(&myHmac, &n, 1);
-            if (ret != 0)
-                break;
-            ret = wc_HmacFinal(&myHmac, tmp);
-            if (ret != 0)
-                break;
-
-            left = min(left, (word32)hashSz);
-            XMEMCPY(out+outIdx, tmp, left);
-
-            outIdx += hashSz;
-            n++;
+        prk = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (prk == NULL) {
+            XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            return MEMORY_E;
         }
+    #endif
+
+        localSalt = salt;
+        if (localSalt == NULL) {
+            XMEMSET(tmp, 0, hashSz);
+            localSalt = tmp;
+            saltSz    = hashSz;
+        }
+
+        ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz);
+        if (ret == 0)
+            ret = wc_HmacUpdate(&myHmac, inKey, inKeySz);
+        if (ret == 0)
+            ret = wc_HmacFinal(&myHmac,  prk);
+
+        if (ret == 0) {
+            while (outIdx < outSz) {
+                int    tmpSz = (n == 1) ? 0 : hashSz;
+                word32 left = outSz - outIdx;
+
+                ret = wc_HmacSetKey(&myHmac, type, prk, hashSz);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(&myHmac, tmp, tmpSz);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(&myHmac, info, infoSz);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(&myHmac, &n, 1);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacFinal(&myHmac, tmp);
+                if (ret != 0)
+                    break;
+
+                left = min(left, (word32)hashSz);
+                XMEMCPY(out+outIdx, tmp, left);
+
+                outIdx += hashSz;
+                n++;
+            }
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(prk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(prk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
 #endif /* HAVE_HKDF */
 
 #endif /* HAVE_FIPS */
diff --git a/wolfcrypt/src/include.am b/wolfcrypt/src/include.am
index 81aa797db..3b895934b 100644
--- a/wolfcrypt/src/include.am
+++ b/wolfcrypt/src/include.am
@@ -39,7 +39,8 @@ EXTRA_DIST += \
               wolfcrypt/src/fp_sqr_comba_7.i \
               wolfcrypt/src/fp_sqr_comba_8.i \
               wolfcrypt/src/fp_sqr_comba_9.i \
-              wolfcrypt/src/fp_sqr_comba_small_set.i
+              wolfcrypt/src/fp_sqr_comba_small_set.i \
+              wolfcrypt/src/fe_x25519_128.i
 
 EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \
               wolfcrypt/src/port/ti/ti-des3.c \
@@ -58,3 +59,10 @@ src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_nitrox.c
 
 EXTRA_DIST += wolfcrypt/src/port/cavium/README.md
 endif
+
+if BUILD_INTEL_QA
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist.c
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist_mem.c
+
+EXTRA_DIST += wolfcrypt/src/port/intel/README.md
+endif
diff --git a/wolfcrypt/src/integer.c b/wolfcrypt/src/integer.c
index 63d5c0293..624deea29 100644
--- a/wolfcrypt/src/integer.c
+++ b/wolfcrypt/src/integer.c
@@ -157,6 +157,9 @@ int mp_init (mp_int * a)
   a->used  = 0;
   a->alloc = 0;
   a->sign  = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_init(&a->raw);
+#endif
 
   return MP_OKAY;
 }
@@ -178,15 +181,28 @@ void mp_clear (mp_int * a)
     }
 
     /* free ram */
-    XFREE(a->dp, NULL, DYNAMIC_TYPE_BIGINT);
+    mp_free(a);
 
     /* reset members to make debugging easier */
-    a->dp    = NULL;
     a->alloc = a->used = 0;
     a->sign  = MP_ZPOS;
   }
 }
 
+void mp_free (mp_int * a)
+{
+  /* only do anything if a hasn't been freed previously */
+  if (a->dp != NULL) {
+    /* free ram */
+    XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
+    a->dp    = NULL;
+  }
+
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_free(&a->raw);
+#endif
+}
+
 void mp_forcezero(mp_int * a)
 {
     if (a == NULL)
@@ -198,10 +214,9 @@ void mp_forcezero(mp_int * a)
       ForceZero(a->dp, a->used * sizeof(mp_digit));
 
       /* free ram */
-      XFREE(a->dp, NULL, DYNAMIC_TYPE_BIGINT);
+      mp_free(a);
 
       /* reset members to make debugging easier */
-      a->dp    = NULL;
       a->alloc = a->used = 0;
       a->sign  = MP_ZPOS;
     }
@@ -330,7 +345,7 @@ int mp_copy (mp_int * a, mp_int * b)
   }
 
   /* grow dest */
-  if (b->alloc < a->used) {
+  if (b->alloc < a->used || b->alloc == 0) {
      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
         return res;
      }
@@ -373,7 +388,7 @@ int mp_grow (mp_int * a, int size)
   mp_digit *tmp;
 
   /* if the alloc size is smaller alloc more ram */
-  if (a->alloc < size) {
+  if (a->alloc < size || size == 0) {
     /* ensure there are always at least MP_PREC digits extra on top */
     size += (MP_PREC * 2) - (size % MP_PREC);
 
@@ -469,6 +484,9 @@ void mp_zero (mp_int * a)
 
   a->sign = MP_ZPOS;
   a->used = 0;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_zero(&a->raw);
+#endif
 
   tmp = a->dp;
   for (n = 0; n < a->alloc; n++) {
@@ -2949,6 +2967,9 @@ int mp_init_size (mp_int * a, int size)
   a->used  = 0;
   a->alloc = size;
   a->sign  = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_init(&a->raw);
+#endif
 
   /* zero the digits */
   for (x = 0; x < size; x++) {
diff --git a/wolfcrypt/src/logging.c b/wolfcrypt/src/logging.c
index 8aecf5f0b..7253001ea 100644
--- a/wolfcrypt/src/logging.c
+++ b/wolfcrypt/src/logging.c
@@ -38,7 +38,7 @@
     WOLFSSL_API int  wolfSSL_Debugging_ON(void);
     WOLFSSL_API void wolfSSL_Debugging_OFF(void);
 #ifdef __cplusplus
-    } 
+    }
 #endif
 
 #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
@@ -233,7 +233,7 @@ void WOLFSSL_ERROR(int error)
     #endif
 {
     #if defined(DEBUG_WOLFSSL) && !defined(WOLFSSL_NGINX)
-    if (loggingEnabled)
+    if (loggingEnabled && error != WC_PENDING_E)
     #endif
     {
         char buffer[80];
diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c
old mode 100644
new mode 100755
index a427de00c..342aea466
--- a/wolfcrypt/src/md5.c
+++ b/wolfcrypt/src/md5.c
@@ -31,13 +31,8 @@
 
 #if defined(WOLFSSL_TI_HASH)
     /* #include  included by wc_port.c */
-#else
 
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitMd5   wc_InitMd5_sw
-#define wc_Md5Update wc_Md5Update_sw
-#define wc_Md5Final  wc_Md5Final_sw
-#endif
+#else
 
 #include 
 #include 
@@ -49,22 +44,21 @@
     #include 
 #endif
 
-#ifdef FREESCALE_MMCAU_SHA
-    #include "fsl_mmcau.h"
-    #define XTRANSFORM(S,B)  Transform((S), (B))
-#else
-    #define XTRANSFORM(S,B)  Transform((S))
-#endif
-
 
+/* Hardware Acceleration */
 #if defined(STM32F2_HASH) || defined(STM32F4_HASH)
     /*
      * STM32F2/F4 hardware MD5 support through the standard peripheral
      * library. (See note in README).
      */
 
-    void wc_InitMd5(Md5* md5)
+    #define HAVE_MD5_CUST_API
+
+    int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
     {
+        (void)heap;
+        (void)devId;
+
         /* STM32 struct notes:
          * md5->buffer  = first 4 bytes used to hold partial block if needed
          * md5->buffLen = num bytes currently stored in md5->buffer
@@ -85,9 +79,11 @@
 
         /* reset HASH processor */
         HASH->CR |= HASH_CR_INIT;
+
+        return 0;
     }
 
-    void wc_Md5Update(Md5* md5, const byte* data, word32 len)
+    int wc_Md5Update(Md5* md5, const byte* data, word32 len)
     {
         word32 i = 0;
         word32 fill = 0;
@@ -110,7 +106,7 @@
                 /* append partial to existing stored block */
                 XMEMCPY((byte*)md5->buffer + md5->buffLen, data, len);
                 md5->buffLen += len;
-                return;
+                return 0;
             }
         }
 
@@ -131,9 +127,11 @@
 
         /* keep track of total data length thus far */
         md5->loLen += (len - md5->buffLen);
+
+        return 0;
     }
 
-    void wc_Md5Final(Md5* md5, byte* hash)
+    int wc_Md5Final(Md5* md5, byte* hash)
     {
         __IO uint16_t nbvalidbitsdata = 0;
 
@@ -165,14 +163,149 @@
 
         XMEMCPY(hash, md5->digest, MD5_DIGEST_SIZE);
 
-        wc_InitMd5(md5);  /* reset state */
+        return wc_InitMd5(md5);  /* reset state */
     }
 
-#else /* Begin wolfCrypt software implementation */
+#elif defined(FREESCALE_MMCAU_SHA)
+    #include "cau_api.h"
+    #define XTRANSFORM(S,B)  Transform((S), (B))
 
-int wc_InitMd5(Md5* md5)
+    static int Transform(Md5* md5, byte* data)
+    {
+        int ret = wolfSSL_CryptHwMutexLock();
+        if(ret == 0) {
+            MMCAU_MD5_HashN(data, 1, (uint32_t*)md5->digest);
+            wolfSSL_CryptHwMutexUnLock();
+        }
+        return ret;
+    }
+
+#elif defined(WOLFSSL_PIC32MZ_HASH)
+    #define wc_InitMd5   wc_InitMd5_sw
+    #define wc_Md5Update wc_Md5Update_sw
+    #define wc_Md5Final  wc_Md5Final_sw
+
+    #define NEED_SOFT_MD5
+
+#else
+    #define NEED_SOFT_MD5
+
+#endif /* End Hardware Acceleration */
+
+
+#ifdef NEED_SOFT_MD5
+
+    #define XTRANSFORM(S,B)  Transform((S))
+
+    #define F1(x, y, z) (z ^ (x & (y ^ z)))
+    #define F2(x, y, z) F1(z, x, y)
+    #define F3(x, y, z) (x ^ y ^ z)
+    #define F4(x, y, z) (y ^ (x | ~z))
+
+    #define MD5STEP(f, w, x, y, z, data, s) \
+        w = rotlFixed(w + f(x, y, z) + data, s) + x
+
+    static int Transform(Md5* md5)
+    {
+        /* Copy context->state[] to working vars  */
+        word32 a = md5->digest[0];
+        word32 b = md5->digest[1];
+        word32 c = md5->digest[2];
+        word32 d = md5->digest[3];
+
+        MD5STEP(F1, a, b, c, d, md5->buffer[0]  + 0xd76aa478,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[1]  + 0xe8c7b756, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[2]  + 0x242070db, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[3]  + 0xc1bdceee, 22);
+        MD5STEP(F1, a, b, c, d, md5->buffer[4]  + 0xf57c0faf,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[5]  + 0x4787c62a, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[6]  + 0xa8304613, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[7]  + 0xfd469501, 22);
+        MD5STEP(F1, a, b, c, d, md5->buffer[8]  + 0x698098d8,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[9]  + 0x8b44f7af, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22);
+        MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22);
+
+        MD5STEP(F2, a, b, c, d, md5->buffer[1]  + 0xf61e2562,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[6]  + 0xc040b340,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[0]  + 0xe9b6c7aa, 20);
+        MD5STEP(F2, a, b, c, d, md5->buffer[5]  + 0xd62f105d,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[4]  + 0xe7d3fbc8, 20);
+        MD5STEP(F2, a, b, c, d, md5->buffer[9]  + 0x21e1cde6,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[3]  + 0xf4d50d87, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[8]  + 0x455a14ed, 20);
+        MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[2]  + 0xfcefa3f8,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[7]  + 0x676f02d9, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20);
+
+        MD5STEP(F3, a, b, c, d, md5->buffer[5]  + 0xfffa3942,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[8]  + 0x8771f681, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23);
+        MD5STEP(F3, a, b, c, d, md5->buffer[1]  + 0xa4beea44,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[4]  + 0x4bdecfa9, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[7]  + 0xf6bb4b60, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23);
+        MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[0]  + 0xeaa127fa, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[3]  + 0xd4ef3085, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[6]  + 0x04881d05, 23);
+        MD5STEP(F3, a, b, c, d, md5->buffer[9]  + 0xd9d4d039,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[2]  + 0xc4ac5665, 23);
+
+        MD5STEP(F4, a, b, c, d, md5->buffer[0]  + 0xf4292244,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[7]  + 0x432aff97, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[5]  + 0xfc93a039, 21);
+        MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[3]  + 0x8f0ccc92, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[1]  + 0x85845dd1, 21);
+        MD5STEP(F4, a, b, c, d, md5->buffer[8]  + 0x6fa87e4f,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[6]  + 0xa3014314, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21);
+        MD5STEP(F4, a, b, c, d, md5->buffer[4]  + 0xf7537e82,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[2]  + 0x2ad7d2bb, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[9]  + 0xeb86d391, 21);
+
+        /* Add the working vars back into digest state[]  */
+        md5->digest[0] += a;
+        md5->digest[1] += b;
+        md5->digest[2] += c;
+        md5->digest[3] += d;
+
+        return 0;
+    }
+#endif /* NEED_SOFT_MD5 */
+
+
+#ifndef HAVE_MD5_CUST_API
+static INLINE void AddMd5Length(Md5* md5, word32 len)
 {
-    if (md5 == NULL) {
+    word32 tmp = md5->loLen;
+    if ((md5->loLen += len) < tmp) {
+        md5->hiLen++;                       /* carry low to high */
+    }
+}
+
+static int _InitMd5(Md5* md5)
+{
+    int ret = 0;
+
+	if (md5 == NULL) {
         return BAD_FUNC_ARG;
     }
 
@@ -185,135 +318,55 @@ int wc_InitMd5(Md5* md5)
     md5->loLen   = 0;
     md5->hiLen   = 0;
 
-    return 0;
-}
-
-#ifdef FREESCALE_MMCAU_SHA
-static int Transform(Md5* md5, byte* data)
-{
-    int ret = wolfSSL_CryptHwMutexLock();
-    if(ret == 0) {
-        MMCAU_MD5_HashN(data, 1, (uint32_t*)(md5->digest));
-        wolfSSL_CryptHwMutexUnLock();
-    }
     return ret;
 }
-#endif /* FREESCALE_MMCAU_SHA */
 
-#ifndef FREESCALE_MMCAU_SHA
-
-static void Transform(Md5* md5)
+int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
 {
-#define F1(x, y, z) (z ^ (x & (y ^ z)))
-#define F2(x, y, z) F1(z, x, y)
-#define F3(x, y, z) (x ^ y ^ z)
-#define F4(x, y, z) (y ^ (x | ~z))
+    int ret = 0;
 
-#define MD5STEP(f, w, x, y, z, data, s) \
-    w = rotlFixed(w + f(x, y, z) + data, s) + x
+    if (md5 == NULL)
+        return BAD_FUNC_ARG;
 
-    /* Copy context->state[] to working vars  */
-    word32 a = md5->digest[0];
-    word32 b = md5->digest[1];
-    word32 c = md5->digest[2];
-    word32 d = md5->digest[3];
+    md5->heap = heap;
 
-    MD5STEP(F1, a, b, c, d, md5->buffer[0]  + 0xd76aa478,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[1]  + 0xe8c7b756, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[2]  + 0x242070db, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[3]  + 0xc1bdceee, 22);
-    MD5STEP(F1, a, b, c, d, md5->buffer[4]  + 0xf57c0faf,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[5]  + 0x4787c62a, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[6]  + 0xa8304613, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[7]  + 0xfd469501, 22);
-    MD5STEP(F1, a, b, c, d, md5->buffer[8]  + 0x698098d8,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[9]  + 0x8b44f7af, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22);
-    MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22);
+    ret = _InitMd5(md5);
+    if (ret != 0)
+        return ret;
 
-    MD5STEP(F2, a, b, c, d, md5->buffer[1]  + 0xf61e2562,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[6]  + 0xc040b340,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[0]  + 0xe9b6c7aa, 20);
-    MD5STEP(F2, a, b, c, d, md5->buffer[5]  + 0xd62f105d,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[4]  + 0xe7d3fbc8, 20);
-    MD5STEP(F2, a, b, c, d, md5->buffer[9]  + 0x21e1cde6,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[3]  + 0xf4d50d87, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[8]  + 0x455a14ed, 20);
-    MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[2]  + 0xfcefa3f8,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[7]  + 0x676f02d9, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20);
-
-    MD5STEP(F3, a, b, c, d, md5->buffer[5]  + 0xfffa3942,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[8]  + 0x8771f681, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23);
-    MD5STEP(F3, a, b, c, d, md5->buffer[1]  + 0xa4beea44,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[4]  + 0x4bdecfa9, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[7]  + 0xf6bb4b60, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23);
-    MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[0]  + 0xeaa127fa, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[3]  + 0xd4ef3085, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[6]  + 0x04881d05, 23);
-    MD5STEP(F3, a, b, c, d, md5->buffer[9]  + 0xd9d4d039,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[2]  + 0xc4ac5665, 23);
-
-    MD5STEP(F4, a, b, c, d, md5->buffer[0]  + 0xf4292244,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[7]  + 0x432aff97, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[5]  + 0xfc93a039, 21);
-    MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[3]  + 0x8f0ccc92, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[1]  + 0x85845dd1, 21);
-    MD5STEP(F4, a, b, c, d, md5->buffer[8]  + 0x6fa87e4f,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[6]  + 0xa3014314, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21);
-    MD5STEP(F4, a, b, c, d, md5->buffer[4]  + 0xf7537e82,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[2]  + 0x2ad7d2bb, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[9]  + 0xeb86d391, 21);
-
-    /* Add the working vars back into digest state[]  */
-    md5->digest[0] += a;
-    md5->digest[1] += b;
-    md5->digest[2] += c;
-    md5->digest[3] += d;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5,
+                                                            md5->heap, devId);
+#else
+    (void)devId;
+#endif
+    return ret;
 }
 
-#endif /* End Software implementation */
-
-
-static INLINE void AddLength(Md5* md5, word32 len)
-{
-    word32 tmp = md5->loLen;
-    if ( (md5->loLen += len) < tmp)
-        md5->hiLen++;                       /* carry low to high */
-}
-
-
 int wc_Md5Update(Md5* md5, const byte* data, word32 len)
 {
+    int ret = 0;
     byte* local;
 
-    if (md5 == NULL || (data == NULL && len > 0)){
+    if (md5 == NULL || (data == NULL && len > 0)) {
         return BAD_FUNC_ARG;
     }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymMd5(&md5->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     /* do block size increments */
     local = (byte*)md5->buffer;
 
+    /* check that internal buffLen is valid */
+    if (md5->buffLen > MD5_BLOCK_SIZE)
+        return BUFFER_E;
+
     while (len) {
         word32 add = min(len, MD5_BLOCK_SIZE - md5->buffLen);
         XMEMCPY(&local[md5->buffLen], data, add);
@@ -323,18 +376,17 @@ int wc_Md5Update(Md5* md5, const byte* data, word32 len)
         len          -= add;
 
         if (md5->buffLen == MD5_BLOCK_SIZE) {
-            #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-                ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
-            #endif
+        #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+            ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
+        #endif
             XTRANSFORM(md5, local);
-            AddLength(md5, MD5_BLOCK_SIZE);
+            AddMd5Length(md5, MD5_BLOCK_SIZE);
             md5->buffLen = 0;
         }
     }
-    return 0;
+    return ret;
 }
 
-
 int wc_Md5Final(Md5* md5, byte* hash)
 {
     byte* local;
@@ -343,10 +395,17 @@ int wc_Md5Final(Md5* md5, byte* hash)
         return BAD_FUNC_ARG;
     }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymMd5(&md5->asyncDev, hash, NULL, MD5_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     local = (byte*)md5->buffer;
 
-    AddLength(md5, md5->buffLen);  /* before adding pads */
-
+    AddMd5Length(md5, md5->buffLen);  /* before adding pads */
     local[md5->buffLen++] = 0x80;  /* add 1 */
 
     /* pad with zeros */
@@ -354,9 +413,9 @@ int wc_Md5Final(Md5* md5, byte* hash)
         XMEMSET(&local[md5->buffLen], 0, MD5_BLOCK_SIZE - md5->buffLen);
         md5->buffLen += MD5_BLOCK_SIZE - md5->buffLen;
 
-        #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-            ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
-        #endif
+    #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
+    #endif
         XTRANSFORM(md5, local);
         md5->buffLen = 0;
     }
@@ -368,70 +427,69 @@ int wc_Md5Final(Md5* md5, byte* hash)
     md5->loLen = md5->loLen << 3;
 
     /* store lengths */
-    #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-        ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
-    #endif
+#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+    ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
+#endif
     /* ! length ordering dependent on digest endian type ! */
     XMEMCPY(&local[MD5_PAD_SIZE], &md5->loLen, sizeof(word32));
     XMEMCPY(&local[MD5_PAD_SIZE + sizeof(word32)], &md5->hiLen, sizeof(word32));
 
     XTRANSFORM(md5, local);
-    #ifdef BIG_ENDIAN_ORDER
-        ByteReverseWords(md5->digest, md5->digest, MD5_DIGEST_SIZE);
-    #endif
+#ifdef BIG_ENDIAN_ORDER
+    ByteReverseWords(md5->digest, md5->digest, MD5_DIGEST_SIZE);
+#endif
     XMEMCPY(hash, md5->digest, MD5_DIGEST_SIZE);
 
+    return _InitMd5(md5); /* reset state */
+}
+#endif /* !HAVE_MD5_CUST_API */
 
-    return wc_InitMd5(md5);  /* reset state */
+
+int wc_InitMd5(Md5* md5)
+{
+    return wc_InitMd5_ex(md5, NULL, INVALID_DEVID);
 }
 
-#endif /* End wolfCrypt software implementation */
+void wc_Md5Free(Md5* md5)
+{
+    if (md5 == NULL)
+        return;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    wolfAsync_DevCtxFree(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
-
-int wc_Md5Hash(const byte* data, word32 len, byte* hash)
+int wc_Md5GetHash(Md5* md5, byte* hash)
 {
     int ret;
-#ifdef WOLFSSL_SMALL_STACK
-    Md5* md5;
-#else
-    Md5 md5[1];
-#endif
+    Md5 tmpMd5;
 
-#ifdef WOLFSSL_SMALL_STACK
-    md5 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (md5 == NULL)
-        return MEMORY_E;
-#endif
+    if (md5 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
 
-    ret = wc_InitMd5(md5);
-    if (ret != 0) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-        return ret;
-    }
-    ret = wc_Md5Update(md5, data, len);
-    if (ret != 0) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-        return ret;
-    }
-    ret = wc_Md5Final(md5, hash);
-    if (ret != 0) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
-        return ret;
+    ret = wc_Md5Copy(md5, &tmpMd5);
+    if (ret == 0) {
+        ret = wc_Md5Final(&tmpMd5, hash);
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    return ret;
+}
+
+int wc_Md5Copy(Md5* src, Md5* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Md5));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
 #endif
 
-    return 0;
+    return ret;
 }
 
 #endif /* WOLFSSL_TI_HASH */
-
 #endif /* NO_MD5 */
diff --git a/wolfcrypt/src/memory.c b/wolfcrypt/src/memory.c
index c5f0e47b9..927b0c4ad 100644
--- a/wolfcrypt/src/memory.c
+++ b/wolfcrypt/src/memory.c
@@ -74,6 +74,16 @@ int wolfSSL_SetAllocators(wolfSSL_Malloc_cb  mf,
     return res;
 }
 
+int wolfSSL_GetAllocators(wolfSSL_Malloc_cb*  mf,
+                          wolfSSL_Free_cb*    ff,
+                          wolfSSL_Realloc_cb* rf)
+{
+    if (mf) *mf = malloc_function;
+    if (ff) *ff = free_function;
+    if (rf) *rf = realloc_function;
+    return 0;
+}
+
 #ifndef WOLFSSL_STATIC_MEMORY
 #ifdef WOLFSSL_DEBUG_MEMORY
 void* wolfSSL_Malloc(size_t size, const char* func, unsigned int line)
diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c
index 363db46a8..08ba55e86 100644
--- a/wolfcrypt/src/misc.c
+++ b/wolfcrypt/src/misc.c
@@ -31,7 +31,7 @@
 
 #include 
 
-/* inlining these functions is a huge speed increase and a small size decrease, 
+/* inlining these functions is a huge speed increase and a small size decrease,
    because the functions are smaller than function call setup/cleanup, e.g.,
    md5 benchmark is twice as fast with inline.  If you don't want it, then
    define NO_INLINE and compile this file into wolfssl, otherwise it's used as
@@ -79,7 +79,7 @@
     STATIC INLINE word32 rotlFixed(word32 x, word32 y)
     {
         return (x << y) | (x >> (sizeof(y) * 8 - y));
-    }   
+    }
 
 
     STATIC INLINE word32 rotrFixed(word32 x, word32 y)
@@ -128,7 +128,7 @@ STATIC INLINE void ByteReverseWords(word32* out, const word32* in,
 STATIC INLINE word64 rotlFixed64(word64 x, word64 y)
 {
     return (x << y) | (x >> (sizeof(y) * 8 - y));
-}  
+}
 
 
 STATIC INLINE word64 rotrFixed64(word64 x, word64 y)
@@ -139,8 +139,8 @@ STATIC INLINE word64 rotrFixed64(word64 x, word64 y)
 
 STATIC INLINE word64 ByteReverseWord64(word64 value)
 {
-#ifdef WOLFCRYPT_SLOW_WORD64
-	return (word64)(ByteReverseWord32((word32)value)) << 32 | 
+#if defined(WOLFCRYPT_SLOW_WORD64)
+	return (word64)(ByteReverseWord32((word32)value)) << 32 |
                     ByteReverseWord32((word32)(value>>32));
 #else
 	value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) |
diff --git a/wolfcrypt/src/pkcs12.c b/wolfcrypt/src/pkcs12.c
index 24877b60d..b96d2a636 100644
--- a/wolfcrypt/src/pkcs12.c
+++ b/wolfcrypt/src/pkcs12.c
@@ -527,15 +527,19 @@ static int wc_PKCS12_verify(WC_PKCS12* pkcs12, byte* data, word32 dataSz,
     }
 
     /* now that key has been created use it to get HMAC hash on data */
-    if ((ret = wc_HmacSetKey(&hmac, typeH, key, kLen)) != 0) {
+    if ((ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID)) != 0) {
         return ret;
     }
-    if ((ret = wc_HmacUpdate(&hmac, data, dataSz)) != 0) {
+    ret = wc_HmacSetKey(&hmac, typeH, key, kLen);
+    if (ret == 0)
+        ret = wc_HmacUpdate(&hmac, data, dataSz);
+    if (ret == 0)
+        ret = wc_HmacFinal(&hmac, digest);
+    wc_HmacFree(&hmac);
+
+    if (ret != 0)
         return ret;
-    }
-    if ((ret = wc_HmacFinal(&hmac, digest)) != 0) {
-        return ret;
-    }
+
 #ifdef WOLFSSL_DEBUG_PKCS12
     {
         byte* p;
diff --git a/wolfcrypt/src/pkcs7.c b/wolfcrypt/src/pkcs7.c
index 4f7962f34..b65cfdb10 100644
--- a/wolfcrypt/src/pkcs7.c
+++ b/wolfcrypt/src/pkcs7.c
@@ -31,6 +31,12 @@
 #include 
 #include 
 #include 
+#ifndef NO_RSA
+    #include 
+#endif
+#ifdef HAVE_ECC
+    #include 
+#endif
 #ifdef NO_INLINE
     #include 
 #else
@@ -2259,7 +2265,7 @@ int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
     }
 
     /* generate random content encryption key */
-    ret = wc_InitRng_ex(&rng, pkcs7->heap);
+    ret = wc_InitRng_ex(&rng, pkcs7->heap, INVALID_DEVID);
     if (ret != 0)
         return ret;
 
diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c
index 250411924..518c8fcda 100644
--- a/wolfcrypt/src/port/arm/armv8-aes.c
+++ b/wolfcrypt/src/port/arm/armv8-aes.c
@@ -301,16 +301,22 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
 
 
 /* set the heap hint for aes struct */
-int wc_InitAes_h(Aes* aes, void* h)
+int wc_AesInit(Aes* aes, void* heap, int devId)
 {
     if (aes == NULL)
         return BAD_FUNC_ARG;
 
     aes->heap = h;
+    (void)devId;
 
     return 0;
 }
 
+void wc_AesFree(Aes* aes)
+{
+    (void)aes;
+}
+
 
 #ifdef __aarch64__
 /* AES CCM/GCM use encrypt direct but not decrypt */
@@ -4552,26 +4558,7 @@ int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
 #endif /* HAVE_AES_DECRYPT */
 #endif /* HAVE_AESCCM */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
 
-/* Initialize Aes for use with Nitrox device */
-int wc_AesAsyncInit(Aes* aes, int devId)
-{
-    WOLFSSL_STUB("wc_AesAsyncInit");
-    (void)aes;
-    (void)devId;
-    return 0;
-}
-
-
-/* Free Aes from use with Nitrox device */
-void wc_AesAsyncFree(Aes* aes)
-{
-    WOLFSSL_STUB("wc_AesAsyncFree");
-    (void)aes;
-}
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */
 WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c
index 80f3a901a..48d7230ef 100644
--- a/wolfcrypt/src/port/arm/armv8-sha256.c
+++ b/wolfcrypt/src/port/arm/armv8-sha256.c
@@ -55,7 +55,8 @@ static const ALIGN32 word32 K[64] = {
     0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
 };
 
-int wc_InitSha256(Sha256* sha256)
+
+int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
 {
     int ret = 0;
 
@@ -76,9 +77,21 @@ int wc_InitSha256(Sha256* sha256)
     sha256->loLen   = 0;
     sha256->hiLen   = 0;
 
+    (void)heap;
+    (void)devId;
+
     return ret;
 }
 
+int wc_InitSha256(Sha256* sha256)
+{
+    return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
+}
+
+void wc_Sha256Free(Sha256* sha256)
+{
+    (void)sha256;
+}
 
 static INLINE void AddLength(Sha256* sha256, word32 len)
 {
@@ -1287,7 +1300,35 @@ int wc_Sha256Final(Sha256* sha256, byte* hash)
 
     return wc_InitSha256(sha256);  /* reset state */
 }
+
 #endif /* __aarch64__ */
 
-#endif /* NO_SHA256 and WOLFSSL_ARMASM */
 
+int wc_Sha256GetHash(Sha256* sha256, byte* hash)
+{
+    int ret;
+    Sha256 tmpSha256;
+
+    if (sha256 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha256Copy(sha256, &tmpSha256);
+    if (ret == 0) {
+        ret = wc_Sha256Final(&tmpSha256, hash);
+    }
+    return ret;
+}
+
+int wc_Sha256Copy(Sha256* src, Sha256* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha256));
+
+    return ret;
+}
+
+#endif /* NO_SHA256 and WOLFSSL_ARMASM */
diff --git a/wolfcrypt/src/port/cavium/README.md b/wolfcrypt/src/port/cavium/README.md
index 982a938b9..b98d866dd 100644
--- a/wolfcrypt/src/port/cavium/README.md
+++ b/wolfcrypt/src/port/cavium/README.md
@@ -1,32 +1,3 @@
-# Cavium Nitrox V Support
+# Cavium Nitrox III/V Support
 
-## Directory Structure:
-`/`
-    `/CNN55XX-SDK`
-    `/wolfssl`
-
-## Cavium Driver
-
-Tested again `CNN55XX-Driver-Linux-KVM-XEN-PF-SDK-0.2-04.tar`
-From inside `CNN55XX-SDK`:
-1. `make`
-    Note: To resolve warnings in `CNN55XX-SDK/include/vf_defs.h`:
-    a. Changed `vf_config_mode_str` to return `const char*` and modify `vf_mode_str` to be `const char*`.
-    b. In `vf_config_mode_to_num_vfs` above `default:` add `case PF:`.
-
-2. `sudo make load`
-
-## wolfSSL
-
-Currently the AES and DES3 benchmark tests causes the kernel to crash, so they are disabled for now, even though the wolfCrypt tests pass for those.
-
-From inside `wolfssl`:
-1. `./configure --with-cavium-v=../CNN55XX-SDK --enable-asynccrypt --enable-aesni --enable-intelasm --disable-aes --disable-aesgcm --disable-des3`
-2. `make`
-
-## Usage
-
-Note: Must run applications with sudo to access device.
-
-`sudo ./wolfcrypt/benchmark/benchmark`
-`sudo ./wolfcrypt/test/testwolfcrypt`
+Please contact wolfSSL at info@wolfssl.com to request an evaluation.
diff --git a/wolfcrypt/src/port/cavium/cavium_nitrox.c b/wolfcrypt/src/port/cavium/cavium_nitrox.c
deleted file mode 100644
index 1acc49644..000000000
--- a/wolfcrypt/src/port/cavium/cavium_nitrox.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/* cavium-nitrox.c
- *
- * Copyright (C) 2006-2016 wolfSSL Inc.
- *
- * This file is part of wolfSSL. (formerly known as CyaSSL)
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#ifdef HAVE_CONFIG_H
-    #include 
-#endif
-
-#include 
-
-#ifdef HAVE_CAVIUM
-
-#include 
-#include 
-#include 
-#include 
-#ifndef NO_RSA
-    #include 
-#endif
-#ifndef NO_AES
-    #include 
-#endif
-
-#include 
-#include  /* For ntohs */
-
-static CspHandle mLastDevHandle = INVALID_DEVID;
-
-int NitroxTranslateResponseCode(int ret)
-{
-    switch (ret) {
-        case EAGAIN:
-        case ERR_REQ_PENDING:
-            ret = WC_PENDING_E;
-            break;
-        case ERR_REQ_TIMEOUT:
-            ret = WC_TIMEOUT_E;
-            break;
-        case 0:
-            /* leave as-is */
-            break;
-        default:
-            printf("NitroxTranslateResponseCode Unknown ret=%x\n", ret);
-            ret = ASYNC_INIT_E;
-    }
-    return ret;
-}
-
-
-CspHandle NitroxGetDeviceHandle(void)
-{
-    return mLastDevHandle;
-}
-    
-CspHandle NitroxOpenDevice(int dma_mode, int dev_id)
-{
-    mLastDevHandle = INVALID_DEVID;
-
-#ifdef HAVE_CAVIUM_V
-    (void)dma_mode;
-
-    if (CspInitialize(dev_id, &mLastDevHandle)) {
-        return -1;
-    }
-
-#else
-    Csp1CoreAssignment core_assign;
-    Uint32             device;
-
-    if (CspInitialize(CAVIUM_DIRECT, CAVIUM_DEV_ID)) {
-        return -1;
-    }
-    if (Csp1GetDevType(&device)) {
-        return -1;
-    }
-    if (device != NPX_DEVICE) {
-        if (ioctl(gpkpdev_hdlr[CAVIUM_DEV_ID], IOCTL_CSP1_GET_CORE_ASSIGNMENT,
-        (Uint32 *)&core_assign)!= 0) {
-            return -1;
-        }
-    }
-    CspShutdown(CAVIUM_DEV_ID);
-
-    mLastDevHandle = CspInitialize(dma_mode, dev_id);
-    if (mLastDevHandle == 0) {
-        mLastDevHandle = dev_id;
-    }
-
-#endif /* HAVE_CAVIUM_V */
-
-    return mLastDevHandle;
-}
-
-
-int NitroxAllocContext(CaviumNitroxDev* nitrox, CspHandle devId,
-    ContextType type)
-{
-    int ret;
-
-    if (nitrox == NULL) {
-        return -1;
-    }
-
-    /* If invalid handle provided, use last open one */
-    if (devId == INVALID_DEVID) {
-        devId = NitroxGetDeviceHandle();
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspAllocContext(devId, type, &nitrox->contextHandle);
-#else
-    ret = CspAllocContext(type, &nitrox->contextHandle, devId);
-#endif
-    if (ret != 0) {
-        return -1;
-    }
-
-    nitrox->type = type;
-    nitrox->devId = devId;
-
-    return 0;
-}
-
-void NitroxFreeContext(CaviumNitroxDev* nitrox)
-{
-    if (nitrox == NULL) {
-        return;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    CspFreeContext(nitrox->devId, nitrox->type, nitrox->contextHandle);
-#else
-    CspFreeContext(nitrox->type, nitrox->contextHandle, nitrox->devId);
-#endif
-}
-
-void NitroxCloseDevice(CspHandle devId)
-{
-    if (devId >= 0) {
-        CspShutdown(devId);
-    }
-}
-
-#if defined(WOLFSSL_ASYNC_CRYPT)
-
-int NitroxCheckRequest(CspHandle devId, CavReqId reqId)
-{
-    int ret = CspCheckForCompletion(devId, reqId);
-    return NitroxTranslateResponseCode(ret);
-}
-
-int NitroxCheckRequests(CspHandle devId, CspMultiRequestStatusBuffer* req_stat_buf)
-{
-    int ret = CspGetAllResults(req_stat_buf, devId);
-    return NitroxTranslateResponseCode(ret);   
-}
-
-
-#ifndef NO_RSA
-
-int NitroxRsaExptMod(const byte* in, word32 inLen,
-                     byte* exponent, word32 expLen,
-                     byte* modulus, word32 modLen,
-                     byte* out, word32* outLen, RsaKey* key)
-{
-    int ret;
-
-    if (key == NULL || in == NULL || inLen == 0 || exponent == NULL ||
-                                            modulus == NULL || out == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    (void)outLen;
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspMe(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP,
-            CAVIUM_DPORT, modLen, expLen, inLen,
-            modulus, exponent, (Uint8*)in, out,
-            &key->asyncDev.dev.reqId);
-    #if 0
-    /* TODO: Try MeCRT */
-    ret = CspMeCRT();
-    #endif
-#else
-    /* Not implemented/supported */
-    ret = NOT_COMPILED_IN;
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    return ret;
-}
-
-int NitroxRsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
-                           word32 outLen, RsaKey* key)
-{
-    word32 ret;
-
-    if (key == NULL || in == NULL || out == NULL || outLen < (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15Enc(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                         BT2, key->n.used, key->e.used,
-                         (word16)inLen, key->n.dpraw, key->e.dpraw, (byte*)in, out,
-                         &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15Enc(CAVIUM_REQ_MODE, BT2, key->n.used, key->e.used,
-                         (word16)inLen, key->n.dpraw, key->e.dpraw, (byte*)in, out,
-                         &key->asyncDev.dev.reqId, key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    return key->n.used;
-}
-
-
-static INLINE void ato16(const byte* c, word16* u16)
-{
-    *u16 = (c[0] << 8) | (c[1]);
-}
-
-int NitroxRsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
-                            word32 outLen, RsaKey* key)
-{
-    word32 ret;
-    word16 outSz = (word16)outLen;
-
-    if (key == NULL || in == NULL || out == NULL ||
-                                                inLen != (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15CrtDec(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                            BT2, key->n.used, key->q.dpraw,
-                            key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, &outSz, out, &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15CrtDec(CAVIUM_REQ_MODE, BT2, key->n.used, key->q.dpraw,
-                            key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, &outSz, out, &key->asyncDev.dev.reqId,
-                            key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    ato16((const byte*)&outSz, &outSz); 
-
-    return outSz;
-}
-
-
-int NitroxRsaSSL_Sign(const byte* in, word32 inLen, byte* out,
-                      word32 outLen, RsaKey* key)
-{
-    word32 ret;
-
-    if (key == NULL || in == NULL || out == NULL || inLen == 0 || outLen <
-                                                         (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15CrtEnc(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                            BT1, key->n.used, (word16)inLen,
-                            key->q.dpraw, key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, out, &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15CrtEnc(CAVIUM_REQ_MODE, BT1, key->n.used, (word16)inLen,
-                            key->q.dpraw, key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, out, &key->asyncDev.dev.reqId, key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    return key->n.used;
-}
-
-
-int NitroxRsaSSL_Verify(const byte* in, word32 inLen, byte* out,
-                        word32 outLen, RsaKey* key)
-{
-    word32 ret;
-    word16 outSz = (word16)outLen;
-
-    if (key == NULL || in == NULL || out == NULL || inLen != (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15Dec(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                         BT1, key->n.used, key->e.used,
-                         key->n.dpraw, key->e.dpraw, (byte*)in, &outSz, out,
-                         &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15Dec(CAVIUM_REQ_MODE, BT1, key->n.used, key->e.used,
-                         key->n.dpraw, key->e.dpraw, (byte*)in, &outSz, out,
-                         &key->asyncDev.dev.reqId, key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    outSz = ntohs(outSz);
-
-    return outSz;
-}
-#endif /* !NO_RSA */
-
-
-#ifndef NO_AES
-int NitroxAesSetKey(Aes* aes, const byte* key, word32 length, const byte* iv)
-{
-    if (aes == NULL)
-        return BAD_FUNC_ARG;
-
-    XMEMCPY(aes->key, key, length);   /* key still holds key, iv still in reg */
-    if (length == 16)
-        aes->type = AES_128_BIT;
-    else if (length == 24)
-        aes->type = AES_192_BIT;
-    else if (length == 32)
-        aes->type = AES_256_BIT;
-
-    return wc_AesSetIV(aes, iv);
-}
-
-#ifdef HAVE_AES_CBC
-int NitroxAesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 length)
-{
-    int ret;
-    wolfssl_word offset = 0;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key,
-                          (byte*)aes->reg, 0, NULL, slen, (byte*)in + offset,
-                          out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspEncryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(aes->reg, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key,
-                          (byte*)aes->reg,  0, NULL, slen, (byte*)in + offset,
-                          out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspEncryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(aes->reg, out + offset+length - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    }
-    return 0;
-}
-
-#ifdef HAVE_AES_DECRYPT
-int NitroxAesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 length)
-{
-    wolfssl_word offset = 0;
-    int ret;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-        XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key, (byte*)aes->reg,
-                          0, NULL, slen, (byte*)in + offset, out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspDecryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-        XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key, (byte*)aes->reg,
-                          0, NULL, slen, (byte*)in + offset, out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspDecryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
-    }
-    return 0;
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AES_CBC */
-#endif /* !NO_AES */
-
-
-#if !defined(NO_ARC4) && !defined(HAVE_CAVIUM_V)
-void NitroxArc4SetKey(Arc4* arc4, const byte* key, word32 length)
-{
-    if (CspInitializeRc4(CAVIUM_BLOCKING, arc4->asyncDev.dev.contextHandle, length,
-                         (byte*)key, &arc4->asyncDev.dev.reqId, arc4->devId) != 0) {
-        WOLFSSL_MSG("Bad Cavium Arc4 Init");
-    }
-}
-
-void NitroxArc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
-{
-    int ret;
-    wolfssl_word offset = 0;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-        ret = CspEncryptRc4(CAVIUM_BLOCKING, arc4->asyncDev.dev.contextHandle,
-            CAVIUM_UPDATE, slen, (byte*)in + offset, out + offset,
-            &arc4->asyncDev.dev.reqId, arc4->devId);
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-    }
-    if (length) {
-        word16 slen = (word16)length;
-        ret = CspEncryptRc4(CAVIUM_BLOCKING, arc4->asyncDev.dev.contextHandle,
-            CAVIUM_UPDATE, slen, (byte*)in + offset, out + offset,
-            &arc4->asyncDev.dev.reqId, arc4->devId);
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-    }
-}
-#endif /* !NO_ARC4 && !HAVE_CAVIUM_V */
-
-
-#ifndef NO_DES3
-int NitroxDes3SetKey(Des3* des3, const byte* key, const byte* iv)
-{
-    if (des3 == NULL)
-        return BAD_FUNC_ARG;
-
-    /* key[0] holds key, iv in reg */
-    XMEMCPY(des3->key[0], key, DES_BLOCK_SIZE*3);
-
-    return wc_Des3_SetIV(des3, iv);
-}
-
-int NitroxDes3CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 length)
-{
-    wolfssl_word offset = 0;
-    int ret;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0],
-                            (byte*)des3->reg, slen, (byte*)in + offset,
-                            out + offset, &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspEncrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                            CAVIUM_NO_UPDATE, slen, (byte*)in + offset,
-                            out + offset, (byte*)des3->reg, (byte*)des3->key[0],
-                            &des3->asyncDev.dev.reqId, des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(des3->reg, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0], (byte*)des3->reg,
-                            slen, (byte*)in + offset, out + offset,
-                            &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspEncrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                            CAVIUM_NO_UPDATE, slen, (byte*)in + offset,
-                            out + offset, (byte*)des3->reg, (byte*)des3->key[0],
-                            &des3->asyncDev.dev.reqId, des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(des3->reg, out+offset+length - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-    }
-    return 0;
-}
-
-int NitroxDes3CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 length)
-{
-    wolfssl_word offset = 0;
-    int ret;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-        XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0], (byte*)des3->reg,
-                            slen, (byte*)in + offset, out + offset,
-                            &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspDecrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                           CAVIUM_NO_UPDATE, slen, (byte*)in + offset, out + offset,
-                           (byte*)des3->reg, (byte*)des3->key[0], &des3->asyncDev.dev.reqId,
-                           des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-        XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE,DES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0], (byte*)des3->reg,
-                            slen, (byte*)in + offset, out + offset,
-                            &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspDecrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                           CAVIUM_NO_UPDATE, slen, (byte*)in + offset, out + offset,
-                           (byte*)des3->reg, (byte*)des3->key[0], &des3->asyncDev.dev.reqId,
-                           des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE);
-    }
-    return 0;
-}
-#endif /* !NO_DES3 */
-
-
-#ifndef NO_HMAC
-int NitroxHmacFinal(Hmac* hmac, byte* hash)
-{
-    int ret = -1;
-
-#ifdef HAVE_CAVIUM_V
-    word16 hashLen = wc_HmacSizeByType(hmac->macType);
-    ret = CspHmac(hmac->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                  CAVIUM_SSL_GRP, CAVIUM_DPORT, hmac->type, hmac->keyLen,
-                  (byte*)hmac->ipad, hmac->dataLen, hmac->data, hashLen,
-                  hash, &hmac->asyncDev.dev.reqId);
-#else
-    ret = CspHmac(CAVIUM_BLOCKING, hmac->type, NULL, hmac->keyLen,
-                  (byte*)hmac->ipad, hmac->dataLen, hmac->data, hash,
-                  &hmac->asyncDev.dev.reqId, hmac->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    hmac->innerHashKeyed = 0;  /* tell update to start over if used again */
-
-    return 0;
-}
-
-int NitroxHmacUpdate(Hmac* hmac, const byte* msg, word32 length)
-{
-    word16 add = (word16)length;
-    word32 total;
-    byte*  tmp;
-
-    if (length > WOLFSSL_MAX_16BIT) {
-        WOLFSSL_MSG("Too big msg for cavium hmac");
-        return -1;
-    }
-
-    if (hmac->innerHashKeyed == 0) {  /* starting new */
-        hmac->dataLen        = 0;
-        hmac->innerHashKeyed = 1;
-    }
-
-    total = add + hmac->dataLen;
-    if (total > WOLFSSL_MAX_16BIT) {
-        WOLFSSL_MSG("Too big msg for cavium hmac");
-        return -1;
-    }
-
-    tmp = XMALLOC(hmac->dataLen + add, NULL, DYNAMIC_TYPE_ASYNC_TMP);
-    if (tmp == NULL) {
-        WOLFSSL_MSG("Out of memory for cavium update");
-        return -1;
-    }
-    if (hmac->dataLen)
-        XMEMCPY(tmp, hmac->data,  hmac->dataLen);
-    XMEMCPY(tmp + hmac->dataLen, msg, add);
-
-    hmac->dataLen += add;
-    XFREE(hmac->data, NULL, DYNAMIC_TYPE_ASYNC_TMP);
-    hmac->data = tmp;
-
-    return 0;
-}
-
-int NitroxHmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
-{
-    hmac->macType = (byte)type;
-    
-    /* Determine Cavium HashType */
-    switch(type) {
-    #ifndef NO_MD5
-        case MD5:
-            hmac->type = MD5_TYPE;
-            break;
-    #endif
-    #ifndef NO_SHA
-        case SHA:
-            hmac->type = SHA1_TYPE;
-            break;
-    #endif
-    #ifndef NO_SHA256
-        case SHA256:
-        #ifdef HAVE_CAVIUM_V
-            hmac->type = SHA2_SHA256;
-        #else
-            hmac->type = SHA256_TYPE;
-        #endif
-            break;
-    #endif
-    #ifdef HAVE_CAVIUM_V
-        #ifndef WOLFSSL_SHA512
-            case SHA512:
-                hmac->type = SHA2_SHA512;
-                break;
-        #endif
-        #ifndef WOLFSSL_SHA384
-            case SHA384:
-                hmac->type = SHA2_SHA384;
-                break;
-        #endif
-    #endif /* HAVE_CAVIUM_V */
-        default:
-            WOLFSSL_MSG("unsupported cavium hmac type");
-            break;
-    }
-
-    hmac->innerHashKeyed = 0;  /* should we key Startup flag */
-
-    hmac->keyLen = (word16)length;
-    /* store key in ipad */
-    XMEMCPY(hmac->ipad, key, length);
-
-    return 0;
-}
-#endif /* !NO_HMAC */
-
-
-#if !defined(HAVE_HASHDRBG) && !defined(NO_RC4)
-void NitroxRngGenerateBlock(WC_RNG* rng, byte* output, word32 sz)
-{
-    wolfssl_word offset = 0;
-    word32      requestId;
-
-    while (sz > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspTrueRandom(rng->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, slen, output + offset, &requestId);
-    #else
-        ret = CspRandom(CAVIUM_BLOCKING, slen, output + offset, &requestId,
-                        rng->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        sz     -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-    }
-    if (sz) {
-        word16 slen = (word16)sz;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspTrueRandom(rng->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, slen, output + offset, &requestId);
-    #else
-        ret = CspRandom(CAVIUM_BLOCKING, slen, output + offset, &requestId,
-                        rng->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-    }
-}
-#endif /* !defined(HAVE_HASHDRBG) && !defined(NO_RC4) */
-
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
-#endif /* HAVE_CAVIUM */
diff --git a/wolfcrypt/src/port/intel/README.md b/wolfcrypt/src/port/intel/README.md
new file mode 100644
index 000000000..4b5d971ba
--- /dev/null
+++ b/wolfcrypt/src/port/intel/README.md
@@ -0,0 +1,3 @@
+# Intel QuickAssist Adapter Asynchronous Support
+
+Please contact wolfSSL at info@wolfssl.com to request an evaluation.
diff --git a/wolfcrypt/src/port/nxp/ksdk_port.c b/wolfcrypt/src/port/nxp/ksdk_port.c
index 259a1fb5c..4c5853d7b 100644
--- a/wolfcrypt/src/port/nxp/ksdk_port.c
+++ b/wolfcrypt/src/port/nxp/ksdk_port.c
@@ -681,6 +681,7 @@ int wc_ecc_mulmod_ex(mp_int *k, ecc_point *G, ecc_point *R, mp_int* a,
     int res;
 
     (void)a;
+    (void)heap;
 
     uint8_t Gxbin[LTC_MAX_ECC_BITS / 8];
     uint8_t Gybin[LTC_MAX_ECC_BITS / 8];
diff --git a/wolfcrypt/src/port/ti/ti-ccm.c b/wolfcrypt/src/port/ti/ti-ccm.c
index abf4d602d..f4a4c6595 100644
--- a/wolfcrypt/src/port/ti/ti-ccm.c
+++ b/wolfcrypt/src/port/ti/ti-ccm.c
@@ -32,52 +32,63 @@
 #include 
 #include 
 
+#ifndef TI_DUMMY_BUILD
 #include "driverlib/sysctl.h"
 #include "driverlib/rom_map.h"
 #include "driverlib/rom.h"
 
 #ifndef SINGLE_THREADED
 #include 
-    static wolfSSL_Mutex TI_CCM_Mutex ;
+    static wolfSSL_Mutex TI_CCM_Mutex;
 #endif
+#endif /* TI_DUMMY_BUILD */
 
 #define TIMEOUT  500000
-#define WAIT(stat) { volatile int i ; for(i=0; i
 
 #include 
-#include       
-#include       
-#include       
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -59,67 +59,68 @@
 #define SHAMD5_ALGO_MD5 1
 #define SHAMD5_ALGO_SHA1 2
 #define SHAMD5_ALGO_SHA256 3
-bool wolfSSL_TI_CCMInit(void) { return true ; }
+#define SHAMD5_ALGO_SHA224 4
 #endif
 
 static int hashInit(wolfssl_TI_Hash *hash) {
-    if(!wolfSSL_TI_CCMInit())return 1 ;
-    hash->used = 0 ;
-    hash->msg  = 0 ;
-    hash->len  = 0 ;
-    return 0 ;
+    if (!wolfSSL_TI_CCMInit())return 1;
+    hash->used = 0;
+    hash->msg  = 0;
+    hash->len  = 0;
+    return 0;
 }
 
 static int hashUpdate(wolfssl_TI_Hash *hash, const byte* data, word32 len)
 {
-    void *p ;
+    void *p;
 
-    if((hash== NULL) || (data == NULL))return BAD_FUNC_ARG;
+    if ((hash== NULL) || (data == NULL))return BAD_FUNC_ARG;
 
-    if(hash->len < hash->used+len) {
-        if(hash->msg == NULL) {
+    if (hash->len < hash->used+len) {
+        if (hash->msg == NULL) {
             p = XMALLOC(hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         } else {
             p = XREALLOC(hash->msg, hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         }
-        if(p == 0)return 1 ;
-        hash->msg = p ;     
-        hash->len = hash->used+len ;
-    } 
-    XMEMCPY(hash->msg+hash->used, data, len) ;
-    hash->used += len ;
-    return 0 ;
+        if (p == 0)return 1;
+        hash->msg = p;
+        hash->len = hash->used+len;
+    }
+    XMEMCPY(hash->msg+hash->used, data, len);
+    hash->used += len;
+    return 0;
 }
 
 static int hashGetHash(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize)
-{   
-    uint32_t h[16] ;
+{
+    uint32_t h[16];
 #ifndef TI_DUMMY_BUILD
-    wolfSSL_TI_lockCCM() ;
+    wolfSSL_TI_lockCCM();
     ROM_SHAMD5Reset(SHAMD5_BASE);
     ROM_SHAMD5ConfigSet(SHAMD5_BASE, algo);
-    ROM_SHAMD5DataProcess(SHAMD5_BASE, 
+    ROM_SHAMD5DataProcess(SHAMD5_BASE,
                    (uint32_t *)hash->msg, hash->used, h);
-    wolfSSL_TI_unlockCCM() ;
+    wolfSSL_TI_unlockCCM();
 #else
-    (void) hash ;
-    (void) algo ;
+    (void) hash;
+    (void) algo;
 #endif
-    XMEMCPY(result, h, hsize) ;
+    XMEMCPY(result, h, hsize);
 
-    return 0 ;
+    return 0;
 }
 
-static void hashRestorePos(wolfssl_TI_Hash *h1, wolfssl_TI_Hash *h2) {
-	h1->used = h2->used ;
+static int hashCopy(wolfssl_TI_Hash *src, wolfssl_TI_Hash *dst) {
+    XMEMCPY(dst, src, sizeof(wolfssl_TI_Hash));
+    return 0;
 }
 
 static int hashFinal(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize)
-{   
-    hashGetHash(hash, result, algo, hsize) ;
+{
+    hashGetHash(hash, result, algo, hsize);
     XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    hashInit(hash) ;
-    return 0 ;
+    hashInit(hash);
+    return 0;
 }
 
 static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word32 hsize)
@@ -153,166 +154,183 @@ static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word3
 }
 
 static int hashFree(wolfssl_TI_Hash *hash)
-{   
+{
     XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    hashInit(hash) ;
-    return 0 ;
+    hashInit(hash);
+    return 0;
 }
 
 #if !defined(NO_MD5)
-WOLFSSL_API void wc_InitMd5(Md5* md5)
+WOLFSSL_API int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
 {
     if (md5 == NULL)
-        return ;
-    hashInit((wolfssl_TI_Hash *)md5) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)md5);
 }
-
-WOLFSSL_API void wc_Md5Update(Md5* md5, const byte* data, word32 len)
+WOLFSSL_API int wc_InitMd5(Md5* md5)
 {
-    hashUpdate((wolfssl_TI_Hash *)md5, data, len) ;
+    return wc_InitMd5_ex(md5, NULL, INVALID_DEVID);
 }
 
-WOLFSSL_API void wc_Md5Final(Md5* md5, byte* hash)
+WOLFSSL_API int wc_Md5Update(Md5* md5, const byte* data, word32 len)
 {
-    hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+    return hashUpdate((wolfssl_TI_Hash *)md5, data, len);
 }
 
-WOLFSSL_API void wc_Md5GetHash(Md5* md5, byte* hash)
+WOLFSSL_API int wc_Md5Final(Md5* md5, byte* hash)
 {
-    hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
 }
 
-WOLFSSL_API void wc_Md5RestorePos(Md5* m1, Md5* m2) {
-	hashRestorePos((wolfssl_TI_Hash *)m1, (wolfssl_TI_Hash *)m2) ;
+WOLFSSL_API int wc_Md5GetHash(Md5* md5, byte* hash)
+{
+    return hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
+}
+
+WOLFSSL_API int wc_Md5Copy(Md5* src, Md5* dst) {
+	return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst);
 }
 
 WOLFSSL_API int wc_Md5Hash(const byte*data, word32 len, byte*hash)
-{ 
-    return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+{
+    return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_Md5Free(Md5* md5)
 {
-    hashFree((wolfssl_TI_Hash *)md5) ;
+    hashFree((wolfssl_TI_Hash *)md5);
 }
 
-#endif /* NO_MD5 */
+#endif /* !NO_MD5 */
 
 #if !defined(NO_SHA)
-WOLFSSL_API int wc_InitSha(Sha* sha)
+WOLFSSL_API int wc_InitSha_ex(Md5* sha, void* heap, int devId)
 {
     if (sha == NULL)
-        return 1 ;
-    return hashInit((wolfssl_TI_Hash *)sha) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)sha);
+}
+WOLFSSL_API int wc_InitSha(Sha* sha)
+{
+    return wc_InitSha_ex(sha, NULL, INVALID_DEVID);
 }
 
 WOLFSSL_API int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
 {
-    return hashUpdate((wolfssl_TI_Hash *)sha, data, len) ;
+    return hashUpdate((wolfssl_TI_Hash *)sha, data, len);
 }
 
 WOLFSSL_API int wc_ShaFinal(Sha* sha, byte* hash)
 {
-    return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_ShaGetHash(Sha* sha, byte* hash)
 {
-    return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+    return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
 }
 
-WOLFSSL_API void wc_ShaRestorePos(Sha* s1, Sha* s2) {
-	hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+WOLFSSL_API int wc_ShaCopy(Sha* src, Sha* dst) {
+	return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst);
 }
 
 WOLFSSL_API int wc_ShaHash(const byte*data, word32 len, byte*hash)
-{ 
-    return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+{
+    return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_ShaFree(Sha* sha)
 {
-    hashFree((wolfssl_TI_Hash *)sha) ;
+    hashFree((wolfssl_TI_Hash *)sha);
 }
 
-#endif /* NO_SHA */
+#endif /* !NO_SHA */
 
-#if defined(HAVE_SHA224)
-WOLFSSL_API int wc_InitSha224(Sha224* sha224)
+#if defined(WOLFSSL_SHA224)
+WOLFSSL_API int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId)
 {
     if (sha224 == NULL)
-        return 1 ;
-    return hashInit((wolfssl_TI_Hash *)sha224) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)sha224);
+}
+WOLFSSL_API int wc_InitSha224(Sha224* sha224)
+{
+    return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
 }
 
 WOLFSSL_API int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
 {
-    return hashUpdate((wolfssl_TI_Hash *)sha224, data, len) ;
+    return hashUpdate((wolfssl_TI_Hash *)sha224, data, len);
 }
 
 WOLFSSL_API int wc_Sha224Final(Sha224* sha224, byte* hash)
 {
-    return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha224GetHash(Sha224* sha224, byte* hash)
 {
-    return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
-}
-
-WOLFSSL_API void wc_Sha224RestorePos(Sha224* s1, Sha224* s2) {
-	hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+    return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash)
-{ 
-    return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+{
+    return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_Sha224Free(Sha224* sha224)
 {
-    hashFree((wolfssl_TI_Hash *)sha224) ;
+    hashFree((wolfssl_TI_Hash *)sha224);
 }
 
-#endif /* HAVE_SHA224 */
+#endif /* WOLFSSL_SHA224 */
 
 #if !defined(NO_SHA256)
-WOLFSSL_API int wc_InitSha256(Sha256* sha256)
+WOLFSSL_API int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
 {
     if (sha256 == NULL)
-        return 1 ;
-    return hashInit((wolfssl_TI_Hash *)sha256) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)sha256);
+}
+
+WOLFSSL_API int wc_InitSha256(Sha256* sha256)
+{
+    return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
 }
 
 WOLFSSL_API int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
 {
-    return hashUpdate((wolfssl_TI_Hash *)sha256, data, len) ;
+    return hashUpdate((wolfssl_TI_Hash *)sha256, data, len);
 }
 
 WOLFSSL_API int wc_Sha256Final(Sha256* sha256, byte* hash)
 {
-    return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha256GetHash(Sha256* sha256, byte* hash)
 {
-    return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
-}
-
-WOLFSSL_API void wc_Sha256RestorePos(Sha256* s1, Sha256* s2) {
-	hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+    return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha256Hash(const byte* data, word32 len, byte*hash)
 {
-    return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+    return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_Sha256Free(Sha256* sha256)
 {
-    hashFree((wolfssl_TI_Hash *)sha256) ;
+    hashFree((wolfssl_TI_Hash *)sha256);
 }
 
-#endif
+#endif /* !NO_SHA256 */
 
 #endif
diff --git a/wolfcrypt/src/pwdbased.c b/wolfcrypt/src/pwdbased.c
index 68cb1cf45..0a34697f2 100644
--- a/wolfcrypt/src/pwdbased.c
+++ b/wolfcrypt/src/pwdbased.c
@@ -200,10 +200,11 @@ int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
         return MEMORY_E;
 #endif
 
-    ret = wc_HmacSetKey(&hmac, hashType, passwd, pLen);
-
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
     if (ret == 0) {
-        while (kLen) {
+        ret = wc_HmacSetKey(&hmac, hashType, passwd, pLen);
+
+        while (ret == 0 && kLen) {
             int currentLen;
 
             ret = wc_HmacUpdate(&hmac, salt, sLen);
@@ -248,6 +249,7 @@ int wc_PBKDF2(byte* output, const byte* passwd, int pLen, const byte* salt,
             kLen   -= currentLen;
             i++;
         }
+        wc_HmacFree(&hmac);
     }
 
 #ifdef WOLFSSL_SMALL_STACK
diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c
index cf0201ed6..9b0871f52 100644
--- a/wolfcrypt/src/random.c
+++ b/wolfcrypt/src/random.c
@@ -33,13 +33,6 @@
 
 #include 
 
-#if defined(CUSTOM_RAND_GENERATE) && !defined(CUSTOM_RAND_TYPE)
-/* To maintain compatibility the default return value from CUSTOM_RAND_GENERATE is byte */
-#define CUSTOM_RAND_TYPE    byte
-#endif
-
-#define RNG_HEALTH_TEST_CHECK_SIZE (SHA256_DIGEST_SIZE * 4)
-
 
 #ifdef HAVE_FIPS
 int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz)
@@ -47,31 +40,37 @@ int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz)
     return GenerateSeed(os, seed, sz);
 }
 
-int  wc_InitRng(WC_RNG* rng)
+int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
+{
+    (void)heap;
+    (void)devId;
+    return InitRng_fips(rng);
+}
+
+int wc_InitRng(WC_RNG* rng)
 {
     return InitRng_fips(rng);
 }
 
 
-int  wc_RNG_GenerateBlock(WC_RNG* rng, byte* b, word32 sz)
+int wc_RNG_GenerateBlock(WC_RNG* rng, byte* b, word32 sz)
 {
     return RNG_GenerateBlock_fips(rng, b, sz);
 }
 
 
-int  wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
+int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
 {
     return RNG_GenerateByte(rng, b);
 }
 
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#ifdef HAVE_HASHDRBG
 
     int wc_FreeRng(WC_RNG* rng)
     {
         return FreeRng_fips(rng);
     }
 
-
     int wc_RNG_HealthTest(int reseed,
                                         const byte* entropyA, word32 entropyASz,
                                         const byte* entropyB, word32 entropyBSz,
@@ -80,113 +79,77 @@ int  wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
         return RNG_HealthTest_fips(reseed, entropyA, entropyASz,
                               entropyB, entropyBSz, output, outputSz);
     }
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+#endif /* HAVE_HASHDRBG */
+
 #else /* else build without fips */
+
 #ifndef WC_NO_RNG /* if not FIPS and RNG is disabled then do not compile */
+
 #include 
+#include 
 
-/* Allow custom RNG system */
-#ifdef CUSTOM_RAND_GENERATE_BLOCK
-
-int wc_InitRng_ex(WC_RNG* rng, void* heap)
-{
-    (void)rng;
-    (void)heap;
-    return 0;
-}
-
-int wc_InitRng(WC_RNG* rng)
-{
-    return wc_InitRng_ex(rng, NULL);
-}
-
-int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
-{
-    (void)rng;
-    XMEMSET(output, 0, sz);
-    return CUSTOM_RAND_GENERATE_BLOCK(output, sz);
-}
-
-
-int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
-{
-    return wc_RNG_GenerateBlock(rng, b, 1);
-}
-
-
-int wc_FreeRng(WC_RNG* rng)
-{
-    (void)rng;
-    return 0;
-}
-
+#ifdef NO_INLINE
+    #include 
 #else
-
-/* Use HASHDRGB with SHA256 */
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
-
-    #include 
-
-    #ifdef NO_INLINE
-        #include 
-    #else
-        #define WOLFSSL_MISC_INCLUDED
-        #include 
-    #endif
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+    #define WOLFSSL_MISC_INCLUDED
+    #include 
+#endif
 
 #if defined(WOLFSSL_SGX)
-#include 
+    #include 
 #elif defined(USE_WINDOWS_API)
     #ifndef _WIN32_WINNT
         #define _WIN32_WINNT 0x0400
     #endif
     #include 
     #include 
-#else
-    #ifdef HAVE_WNR
-        #include 
-        #include 
-        wolfSSL_Mutex wnr_mutex;    /* global netRandom mutex */
-        int wnr_timeout     = 0;    /* entropy timeout, mililseconds */
-        int wnr_mutex_init  = 0;    /* flag for mutex init */
-        wnr_context*  wnr_ctx;      /* global netRandom context */
-    #elif !defined(NO_DEV_RANDOM) && !defined(CUSTOM_RAND_GENERATE) && \
-          !defined(WOLFSSL_GENSEED_FORTEST) && !defined(WOLFSSL_MDK_ARM) && \
-          !defined(WOLFSSL_IAR_ARM) && !defined(WOLFSSL_ROWLEY_ARM) && \
-          !defined(WOLFSSL_EMBOS)
-            #include 
-        #ifndef EBSNET
-            #include 
-        #endif
-    #elif defined(FREESCALE_KSDK_2_0_TRNG)
-        #include "fsl_trng.h"
-    #elif defined(FREESCALE_KSDK_2_0_RNGA)
-        #include "fsl_rnga.h"
-    #else
-        /* include headers that may be needed to get good seed */
-    #endif
-#endif /* USE_WINDOWS_API */
+#elif defined(HAVE_WNR)
+    #include 
+    #include 
+    wolfSSL_Mutex wnr_mutex;    /* global netRandom mutex */
+    int wnr_timeout     = 0;    /* entropy timeout, mililseconds */
+    int wnr_mutex_init  = 0;    /* flag for mutex init */
+    wnr_context*  wnr_ctx;      /* global netRandom context */
+#elif defined(FREESCALE_KSDK_2_0_TRNG)
+    #include "fsl_trng.h"
+#elif defined(FREESCALE_KSDK_2_0_RNGA)
+    #include "fsl_rnga.h"
 
-#ifdef HAVE_INTEL_RDGEN
-    static int wc_InitRng_IntelRD(void) ;
-    #if defined(HAVE_HASHDRBG) || defined(NO_RC4)
-    static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz) ;
-    #else
-    static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz) ;
+#elif defined(NO_DEV_RANDOM)
+#elif defined(CUSTOM_RAND_GENERATE)
+#elif defined(CUSTOM_RAND_GENERATE_BLOCK)
+#elif defined(WOLFSSL_GENSEED_FORTEST)
+#elif defined(WOLFSSL_MDK_ARM)
+#elif defined(WOLFSSL_IAR_ARM)
+#elif defined(WOLFSSL_ROWLEY_ARM)
+#elif defined(WOLFSSL_EMBOS)
+#else
+    /* include headers that may be needed to get good seed */
+    #include 
+    #ifndef EBSNET
+        #include 
     #endif
-    static word32 cpuid_check = 0 ;
-    static word32 cpuid_flags = 0 ;
-    #define CPUID_RDRAND 0x4
-    #define CPUID_RDSEED 0x8
-    #define IS_INTEL_RDRAND     (cpuid_flags&CPUID_RDRAND)
-    #define IS_INTEL_RDSEED     (cpuid_flags&CPUID_RDSEED)
 #endif
 
 
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED)
+    static void wc_InitRng_IntelRD(void);
+    #ifdef HAVE_INTEL_RDSEED
+    static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz);
+    #endif
+    #ifdef HAVE_INTEL_RDRAND
+    static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz);
+    #endif
+    static word32 cpuid_check = 0;
+    static word32 cpuid_flags = 0;
+    #define CPUID_RDRAND 0x4
+    #define CPUID_RDSEED 0x8
+    #define IS_INTEL_RDRAND     (cpuid_flags & CPUID_RDRAND)
+    #define IS_INTEL_RDSEED     (cpuid_flags & CPUID_RDSEED)
+#endif
 
 /* Start NIST DRBG code */
+#ifdef HAVE_HASHDRBG
 
 #define OUTPUT_BLOCK_LEN  (SHA256_DIGEST_SIZE)
 #define MAX_REQUEST_LEN   (0x10000)
@@ -209,12 +172,13 @@ int wc_FreeRng(WC_RNG* rng)
 #define DRBG_FAILED       2
 #define DRBG_CONT_FAILED  3
 
+#define RNG_HEALTH_TEST_CHECK_SIZE (SHA256_DIGEST_SIZE * 4)
+
 /* Verify max gen block len */
 #if RNG_MAX_BLOCK_LEN > MAX_REQUEST_LEN
     #error RNG_MAX_BLOCK_LEN is larger than NIST DBRG max request length
 #endif
 
-
 enum {
     drbgInitC     = 0,
     drbgReseed    = 1,
@@ -229,6 +193,10 @@ typedef struct DRBG {
     word32 lastBlock;
     byte V[DRBG_SEED_LEN];
     byte C[DRBG_SEED_LEN];
+#ifdef WOLFSSL_ASYNC_CRYPT
+    void* heap;
+    int devId;
+#endif
     byte   matchCount;
 } DRBG;
 
@@ -241,61 +209,75 @@ static int Hash_df(DRBG* drbg, byte* out, word32 outSz, byte type,
                                                   const byte* inA, word32 inASz,
                                                   const byte* inB, word32 inBSz)
 {
+    int ret;
     byte ctr;
     int i;
     int len;
     word32 bits = (outSz * 8); /* reverse byte order */
     Sha256 sha;
-    byte digest[SHA256_DIGEST_SIZE];
+    DECLARE_VAR(digest, byte, SHA256_DIGEST_SIZE, drbg->heap);
 
     (void)drbg;
-    #ifdef LITTLE_ENDIAN_ORDER
-        bits = ByteReverseWord32(bits);
-    #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (digest == NULL)
+        return DRBG_FAILURE;
+#endif
+
+#ifdef LITTLE_ENDIAN_ORDER
+    bits = ByteReverseWord32(bits);
+#endif
     len = (outSz / OUTPUT_BLOCK_LEN)
         + ((outSz % OUTPUT_BLOCK_LEN) ? 1 : 0);
 
-    for (i = 0, ctr = 1; i < len; i++, ctr++)
-    {
-        if (wc_InitSha256(&sha) != 0)
-            return DRBG_FAILURE;
+    for (i = 0, ctr = 1; i < len; i++, ctr++) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wc_InitSha256_ex(&sha, drbg->heap, drbg->devId);
+    #else
+        ret = wc_InitSha256(&sha);
+    #endif
+        if (ret != 0)
+            break;
 
-        if (wc_Sha256Update(&sha, &ctr, sizeof(ctr)) != 0)
-            return DRBG_FAILURE;
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, &ctr, sizeof(ctr));
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, (byte*)&bits, sizeof(bits));
 
-        if (wc_Sha256Update(&sha, (byte*)&bits, sizeof(bits)) != 0)
-            return DRBG_FAILURE;
-
-        /* churning V is the only string that doesn't have the type added */
-        if (type != drbgInitV)
-            if (wc_Sha256Update(&sha, &type, sizeof(type)) != 0)
-                return DRBG_FAILURE;
-
-        if (wc_Sha256Update(&sha, inA, inASz) != 0)
-            return DRBG_FAILURE;
-
-        if (inB != NULL && inBSz > 0)
-            if (wc_Sha256Update(&sha, inB, inBSz) != 0)
-                return DRBG_FAILURE;
-
-        if (wc_Sha256Final(&sha, digest) != 0)
-            return DRBG_FAILURE;
-
-        if (outSz > OUTPUT_BLOCK_LEN) {
-            XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
-            outSz -= OUTPUT_BLOCK_LEN;
-            out += OUTPUT_BLOCK_LEN;
+        if (ret == 0) {
+            /* churning V is the only string that doesn't have the type added */
+            if (type != drbgInitV)
+                ret = wc_Sha256Update(&sha, &type, sizeof(type));
         }
-        else {
-            XMEMCPY(out, digest, outSz);
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, inA, inASz);
+        if (ret == 0) {
+            if (inB != NULL && inBSz > 0)
+                ret = wc_Sha256Update(&sha, inB, inBSz);
+        }
+        if (ret == 0)
+            ret = wc_Sha256Final(&sha, digest);
+
+        if (ret == 0) {
+            wc_Sha256Free(&sha);
+
+            if (outSz > OUTPUT_BLOCK_LEN) {
+                XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
+                outSz -= OUTPUT_BLOCK_LEN;
+                out += OUTPUT_BLOCK_LEN;
+            }
+            else {
+                XMEMCPY(out, digest, outSz);
+            }
         }
     }
-    ForceZero(digest, sizeof(digest));
 
-    return DRBG_SUCCESS;
+    ForceZero(digest, SHA256_DIGEST_SIZE);
+
+    FREE_VAR(digest, drbg->heap);
+
+    return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
 
-
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
 static int Hash_DRBG_Reseed(DRBG* drbg, const byte* entropy, word32 entropySz)
 {
@@ -331,16 +313,16 @@ static INLINE void array_add_one(byte* data, word32 dataSz)
     }
 }
 
-
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
 static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V)
 {
+    int ret = DRBG_FAILURE;
     byte data[DRBG_SEED_LEN];
     int i;
     int len;
     word32 checkBlock;
     Sha256 sha;
-    byte digest[SHA256_DIGEST_SIZE];
+    DECLARE_VAR(digest, byte, SHA256_DIGEST_SIZE, drbg->heap);
 
     /* Special case: outSz is 0 and out is NULL. wc_Generate a block to save for
      * the continuous test. */
@@ -351,48 +333,56 @@ static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V)
 
     XMEMCPY(data, V, sizeof(data));
     for (i = 0; i < len; i++) {
-        if (wc_InitSha256(&sha) != 0 ||
-            wc_Sha256Update(&sha, data, sizeof(data)) != 0 ||
-            wc_Sha256Final(&sha, digest) != 0) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wc_InitSha256_ex(&sha, drbg->heap, drbg->devId);
+    #else
+        ret = wc_InitSha256(&sha);
+    #endif
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, data, sizeof(data));
+        if (ret == 0)
+            ret = wc_Sha256Final(&sha, digest);
+        if (ret == 0)
+            wc_Sha256Free(&sha);
 
-            return DRBG_FAILURE;
-        }
-
-        XMEMCPY(&checkBlock, digest, sizeof(word32));
-        if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) {
-            if (drbg->matchCount == 1) {
-                return DRBG_CONT_FAILURE;
+        if (ret == 0) {
+            XMEMCPY(&checkBlock, digest, sizeof(word32));
+            if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) {
+                if (drbg->matchCount == 1) {
+                    return DRBG_CONT_FAILURE;
+                }
+                else {
+                    if (i == len) {
+                        len++;
+                    }
+                    drbg->matchCount = 1;
+                }
             }
             else {
-                if (i == len) {
-                    len++;
-                }
-                drbg->matchCount = 1;
+                drbg->matchCount = 0;
+                drbg->lastBlock = checkBlock;
             }
-        }
-        else {
-            drbg->matchCount = 0;
-            drbg->lastBlock = checkBlock;
-        }
 
-        if (out != NULL) {
-            if (outSz >= OUTPUT_BLOCK_LEN) {
-                XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
-                outSz -= OUTPUT_BLOCK_LEN;
-                out += OUTPUT_BLOCK_LEN;
-                array_add_one(data, DRBG_SEED_LEN);
-            }
-            else if (out != NULL && outSz != 0) {
-                XMEMCPY(out, digest, outSz);
-                outSz = 0;
+            if (out != NULL) {
+                if (outSz >= OUTPUT_BLOCK_LEN) {
+                    XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
+                    outSz -= OUTPUT_BLOCK_LEN;
+                    out += OUTPUT_BLOCK_LEN;
+                    array_add_one(data, DRBG_SEED_LEN);
+                }
+                else if (out != NULL && outSz != 0) {
+                    XMEMCPY(out, digest, outSz);
+                    outSz = 0;
+                }
             }
         }
     }
     ForceZero(data, sizeof(data));
 
-    return DRBG_SUCCESS;
-}
+    FREE_VAR(digest, drbg->heap);
 
+    return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
+}
 
 static INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen)
 {
@@ -416,13 +406,12 @@ static INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen)
     }
 }
 
-
 /* Returns: DRBG_SUCCESS, DRBG_NEED_RESEED, or DRBG_FAILURE */
 static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
 {
     int ret = DRBG_NEED_RESEED;
     Sha256 sha;
-    byte digest[SHA256_DIGEST_SIZE];
+    DECLARE_VAR(digest, byte, SHA256_DIGEST_SIZE, drbg->heap);
 
     if (drbg->reseedCtr != RESEED_INTERVAL) {
         byte type = drbgGenerateH;
@@ -430,19 +419,26 @@ static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
 
         ret = Hash_gen(drbg, out, outSz, drbg->V);
         if (ret == DRBG_SUCCESS) {
-            if (wc_InitSha256(&sha) != 0 ||
-                wc_Sha256Update(&sha, &type, sizeof(type)) != 0 ||
-                wc_Sha256Update(&sha, drbg->V, sizeof(drbg->V)) != 0 ||
-                wc_Sha256Final(&sha, digest) != 0) {
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ret = wc_InitSha256_ex(&sha, drbg->heap, drbg->devId);
+        #else
+            ret = wc_InitSha256(&sha);
+        #endif
+            if (ret == 0)
+                ret = wc_Sha256Update(&sha, &type, sizeof(type));
+            if (ret == 0)
+                ret = wc_Sha256Update(&sha, drbg->V, sizeof(drbg->V));
+            if (ret == 0)
+                ret = wc_Sha256Final(&sha, digest);
+            if (ret == 0)
+                wc_Sha256Free(&sha);
 
-                ret = DRBG_FAILURE;
-            }
-            else {
-                array_add(drbg->V, sizeof(drbg->V), digest, sizeof(digest));
+            if (ret == 0) {
+                array_add(drbg->V, sizeof(drbg->V), digest, SHA256_DIGEST_SIZE);
                 array_add(drbg->V, sizeof(drbg->V), drbg->C, sizeof(drbg->C));
-                #ifdef LITTLE_ENDIAN_ORDER
-                    reseedCtr = ByteReverseWord32(reseedCtr);
-                #endif
+            #ifdef LITTLE_ENDIAN_ORDER
+                reseedCtr = ByteReverseWord32(reseedCtr);
+            #endif
                 array_add(drbg->V, sizeof(drbg->V),
                                           (byte*)&reseedCtr, sizeof(reseedCtr));
                 ret = DRBG_SUCCESS;
@@ -450,19 +446,28 @@ static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
             drbg->reseedCtr++;
         }
     }
-    ForceZero(digest, sizeof(digest));
+    ForceZero(digest, SHA256_DIGEST_SIZE);
 
-    return ret;
+    FREE_VAR(digest, drbg->heap);
+
+    return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
 
-
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
 static int Hash_DRBG_Instantiate(DRBG* drbg, const byte* seed, word32 seedSz,
-                                             const byte* nonce, word32 nonceSz)
+                                             const byte* nonce, word32 nonceSz,
+                                             void* heap, int devId)
 {
     int ret = DRBG_FAILURE;
 
     XMEMSET(drbg, 0, sizeof(DRBG));
+#ifdef WOLFSSL_ASYNC_CRYPT
+    drbg->heap = heap;
+    drbg->devId = devId;
+#else
+    (void)heap;
+    (void)devId;
+#endif
 
     if (Hash_df(drbg, drbg->V, sizeof(drbg->V), drbgInitV, seed, seedSz,
                                               nonce, nonceSz) == DRBG_SUCCESS &&
@@ -478,7 +483,6 @@ static int Hash_DRBG_Instantiate(DRBG* drbg, const byte* seed, word32 seedSz,
     return ret;
 }
 
-
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
 static int Hash_DRBG_Uninstantiate(DRBG* drbg)
 {
@@ -493,72 +497,104 @@ static int Hash_DRBG_Uninstantiate(DRBG* drbg)
 
     return (compareSum == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
-
+#endif /* HAVE_HASHDRBG */
 /* End NIST DRBG Code */
 
 
-/* Get seed and key cipher */
-int wc_InitRng_ex(WC_RNG* rng, void* heap)
+int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
 {
-    int ret = BAD_FUNC_ARG;
+    int ret = RNG_FAILURE_E;
+
+    if (rng == NULL)
+        return BAD_FUNC_ARG;
 
-    if (rng != NULL) {
 #ifdef WOLFSSL_HEAP_TEST
-        rng->heap = (void*)WOLFSSL_HEAP_TEST;
-        (void)heap;
+    rng->heap = (void*)WOLFSSL_HEAP_TEST;
+    (void)heap;
 #else
-        rng->heap = heap;
+    rng->heap = heap;
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    rng->devId = devId;
+#else
+    (void)devId;
 #endif
-        if (wc_RNG_HealthTestLocal(0) == 0) {
-            byte entropy[ENTROPY_NONCE_SZ];
 
-            rng->drbg =
-                    (struct DRBG*)XMALLOC(sizeof(DRBG), rng->heap,
-                                                              DYNAMIC_TYPE_RNG);
-            if (rng->drbg == NULL) {
-                ret = MEMORY_E;
-            }
-            /* This doesn't use a separate nonce. The entropy input will be
-             * the default size plus the size of the nonce making the seed
-             * size. */
-            else if (wc_GenerateSeed(&rng->seed,
-                                              entropy, ENTROPY_NONCE_SZ) == 0 &&
-                     Hash_DRBG_Instantiate(rng->drbg,
-                          entropy, ENTROPY_NONCE_SZ, NULL, 0) == DRBG_SUCCESS) {
+#ifdef HAVE_HASHDRBG
+    /* init the DBRG to known values */
+    rng->drbg = NULL;
+    rng->status = DRBG_NOT_INIT;
+#endif
 
-                ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
-            }
-            else
-                ret = DRBG_FAILURE;
+#if defined(HAVE_INTEL_RDSEED) || defined(HAVE_INTEL_RDRAND)
+    /* init the intel RD seed and/or rand */
+    wc_InitRng_IntelRD();
+#endif
 
-            ForceZero(entropy, ENTROPY_NONCE_SZ);
+    /* configure async RNG source if available */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+    ret = wolfAsync_DevCtxInit(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG, devId);
+    if (ret != 0)
+        return ret;
+#endif
+
+
+#ifdef HAVE_INTEL_RDRAND
+    /* if CPU supports RDRAND, use it directly and by-pass DRBG init */
+    if (IS_INTEL_RDRAND)
+        return 0;
+#endif
+
+#ifdef HAVE_HASHDRBG
+    if (wc_RNG_HealthTestLocal(0) == 0) {
+        DECLARE_VAR(entropy, byte, ENTROPY_NONCE_SZ, rng->heap);
+
+        rng->drbg =
+                (struct DRBG*)XMALLOC(sizeof(DRBG), rng->heap,
+                                                          DYNAMIC_TYPE_RNG);
+        if (rng->drbg == NULL) {
+            ret = MEMORY_E;
+        }
+        /* This doesn't use a separate nonce. The entropy input will be
+         * the default size plus the size of the nonce making the seed
+         * size. */
+        else if (wc_GenerateSeed(&rng->seed, entropy, ENTROPY_NONCE_SZ) == 0 &&
+                 Hash_DRBG_Instantiate(rng->drbg, entropy, ENTROPY_NONCE_SZ,
+                                   NULL, 0, rng->heap, devId) == DRBG_SUCCESS) {
+            ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
         }
         else
-            ret = DRBG_CONT_FAILURE;
+            ret = DRBG_FAILURE;
 
-        if (ret == DRBG_SUCCESS) {
-            rng->status = DRBG_OK;
-            ret = 0;
-        }
-        else if (ret == DRBG_CONT_FAILURE) {
-            rng->status = DRBG_CONT_FAILED;
-            ret = DRBG_CONT_FIPS_E;
-        }
-        else if (ret == DRBG_FAILURE) {
-            rng->status = DRBG_FAILED;
-            ret = RNG_FAILURE_E;
-        }
-        else {
-            rng->status = DRBG_FAILED;
-        }
+        ForceZero(entropy, ENTROPY_NONCE_SZ);
+        FREE_VAR(entropy, rng->heap);
     }
+    else
+        ret = DRBG_CONT_FAILURE;
+
+    if (ret == DRBG_SUCCESS) {
+        rng->status = DRBG_OK;
+        ret = 0;
+    }
+    else if (ret == DRBG_CONT_FAILURE) {
+        rng->status = DRBG_CONT_FAILED;
+        ret = DRBG_CONT_FIPS_E;
+    }
+    else if (ret == DRBG_FAILURE) {
+        rng->status = DRBG_FAILED;
+        ret = RNG_FAILURE_E;
+    }
+    else {
+        rng->status = DRBG_FAILED;
+    }
+#endif /* HAVE_HASHDRBG */
 
     return ret;
 }
 
 int wc_InitRng(WC_RNG* rng)
 {
-    return wc_InitRng_ex(rng, NULL);
+    return wc_InitRng_ex(rng, NULL, INVALID_DEVID);
 }
 
 
@@ -567,14 +603,33 @@ int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
 {
     int ret;
 
-    if (rng == NULL || output == NULL || sz > RNG_MAX_BLOCK_LEN)
+    if (rng == NULL || output == NULL)
+        return BAD_FUNC_ARG;
+
+#ifdef HAVE_INTEL_RDRAND
+    if (IS_INTEL_RDRAND)
+        return wc_GenerateRand_IntelRD(NULL, output, sz);
+#endif
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+    if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RNG) {
+        return NitroxRngGenerateBlock(rng, output, sz);
+    }
+#endif
+
+#ifdef CUSTOM_RAND_GENERATE_BLOCK
+    XMEMSET(output, 0, sz);
+    return CUSTOM_RAND_GENERATE_BLOCK(output, sz);
+#endif
+
+#ifdef HAVE_HASHDRBG
+    if (sz > RNG_MAX_BLOCK_LEN)
         return BAD_FUNC_ARG;
 
     if (rng->status != DRBG_OK)
         return RNG_FAILURE_E;
 
     ret = Hash_DRBG_Generate(rng->drbg, output, sz);
-
     if (ret == DRBG_NEED_RESEED) {
         if (wc_RNG_HealthTestLocal(1) == 0) {
             byte entropy[ENTROPY_SZ];
@@ -607,6 +662,12 @@ int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
         ret = RNG_FAILURE_E;
         rng->status = DRBG_FAILED;
     }
+#else
+
+    /* if we get here then there is an RNG configuration error */
+    ret = RNG_FAILURE_E;
+
+#endif /* HAVE_HASHDRBG */
 
     return ret;
 }
@@ -620,26 +681,31 @@ int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
 
 int wc_FreeRng(WC_RNG* rng)
 {
-    int ret = BAD_FUNC_ARG;
+    int ret = 0;
 
-    if (rng != NULL) {
-        if (rng->drbg != NULL) {
-            if (Hash_DRBG_Uninstantiate(rng->drbg) == DRBG_SUCCESS)
-                ret = 0;
-            else
-                ret = RNG_FAILURE_E;
+    if (rng == NULL)
+        return BAD_FUNC_ARG;
 
-            XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG);
-            rng->drbg = NULL;
-        }
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+    wolfAsync_DevCtxFree(&rng->asyncDev);
+#endif
 
-        rng->status = DRBG_NOT_INIT;
+#ifdef HAVE_HASHDRBG
+    if (rng->drbg != NULL) {
+        if (Hash_DRBG_Uninstantiate(rng->drbg) != DRBG_SUCCESS)
+            ret = RNG_FAILURE_E;
+
+        XFREE(rng->drbg, rng->heap, DYNAMIC_TYPE_RNG);
+        rng->drbg = NULL;
     }
 
+    rng->status = DRBG_NOT_INIT;
+#endif /* HAVE_HASHDRBG */
+
     return ret;
 }
 
-
+#ifdef HAVE_HASHDRBG
 int wc_RNG_HealthTest(int reseed, const byte* entropyA, word32 entropyASz,
                                   const byte* entropyB, word32 entropyBSz,
                                   byte* output, word32 outputSz)
@@ -671,7 +737,8 @@ int wc_RNG_HealthTest(int reseed, const byte* entropyA, word32 entropyASz,
     drbg = &drbg_var;
 #endif
 
-    if (Hash_DRBG_Instantiate(drbg, entropyA, entropyASz, NULL, 0) != 0) {
+    if (Hash_DRBG_Instantiate(drbg, entropyA, entropyASz, NULL, 0, NULL,
+                                                    INVALID_DEVID) != 0) {
         goto exit_rng_ht;
     }
 
@@ -801,97 +868,7 @@ static int wc_RNG_HealthTestLocal(int reseed)
     return ret;
 }
 
-
-#else /* HAVE_HASHDRBG || NO_RC4 */
-
-/* Get seed and key cipher */
-int wc_InitRng(WC_RNG* rng)
-{
-    int  ret;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* key;
-    byte* junk;
-#else
-    byte key[32];
-    byte junk[256];
-#endif
-
-#ifdef HAVE_INTEL_RDGEN
-    wc_InitRng_IntelRD();
-    if(IS_INTEL_RDRAND) return 0;
-#endif
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    ret = wolfAsync_DevCtxInit(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG, INVALID_DEVID);
-    if (ret != 0) return ret;
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    key = (byte*)XMALLOC(32, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (key == NULL)
-        return MEMORY_E;
-
-    junk = (byte*)XMALLOC(256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (junk == NULL) {
-        XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        key = NULL;
-        return MEMORY_E;
-    }
-#endif
-
-    ret = wc_GenerateSeed(&rng->seed, key, 32);
-
-    if (ret == 0) {
-        wc_Arc4SetKey(&rng->cipher, key, sizeof(key));
-
-        ret = wc_RNG_GenerateBlock(rng, junk, 256); /*rid initial state*/
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(key, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(junk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
-/* place a generated block in output */
-int wc_RNG_GenerateBlock(WC_RNG* rng, byte* output, word32 sz)
-{
-#ifdef HAVE_INTEL_RDGEN
-    if(IS_INTEL_RDRAND)
-        return wc_GenerateRand_IntelRD(NULL, output, sz) ;
-#endif
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RNG) {
-        return NitroxRngGenerateBlock(rng, output, sz);
-    }
-#endif
-    XMEMSET(output, 0, sz);
-    wc_Arc4Process(&rng->cipher, output, output, sz);
-
-    return 0;
-}
-
-
-int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
-{
-    return wc_RNG_GenerateBlock(rng, b, 1);
-}
-
-
-int wc_FreeRng(WC_RNG* rng)
-{
-    (void)rng;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    wolfAsync_DevCtxFree(&rng->asyncDev);
-#endif
-
-    return 0;
-}
-
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+#endif /* HAVE_HASHDRBG */
 
 
 #ifdef HAVE_WNR
@@ -992,7 +969,7 @@ int wc_FreeNetRandom(void)
 #endif /* HAVE_WNR */
 
 
-#if defined(HAVE_INTEL_RDGEN)
+#if defined(HAVE_INTEL_RDRAND) || defined(HAVE_INTEL_RDSEED)
 
 #ifndef _MSC_VER
     #define cpuid(reg, leaf, sub)\
@@ -1016,36 +993,40 @@ int wc_FreeNetRandom(void)
 #define EDX 3
 
 static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
-    int got_intel_cpu=0;
+    int got_intel_cpu = 0;
     unsigned int reg[5];
 
-    reg[4] = '\0' ;
+    reg[4] = '\0';
     cpuid(reg, 0, 0);
-    if(XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
-                XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
-                XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
+    if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+        XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+        XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0)
+    {
         got_intel_cpu = 1;
     }
     if (got_intel_cpu) {
         cpuid(reg, leaf, sub);
-        return((reg[num]>>bit)&0x1) ;
+        return ((reg[num] >> bit) & 0x1);
     }
-    return 0 ;
+    return 0;
 }
 
-static int wc_InitRng_IntelRD()
-{
-    if(cpuid_check==0) {
-        if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;}
-        if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;}
-        cpuid_check = 1 ;
+static void wc_InitRng_IntelRD(void) {
+    if (cpuid_check==0) {
+        if (cpuid_flag(1, 0, ECX, 30)) { cpuid_flags |= CPUID_RDRAND; }
+        if (cpuid_flag(7, 0, EBX, 18)) { cpuid_flags |= CPUID_RDSEED; }
+        cpuid_check = 1;
     }
-    return 1 ;
 }
 
-#define INTELRD_RETRY 32
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* need more retries if multiple cores */
+    #define INTELRD_RETRY (32 * 8)
+#else
+    #define INTELRD_RETRY 32
+#endif
 
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#ifdef HAVE_INTEL_RDSEED
 
 /* return 0 on success */
 static INLINE int IntelRDseed64(word64* seed)
@@ -1053,97 +1034,110 @@ static INLINE int IntelRDseed64(word64* seed)
     unsigned char ok;
 
     __asm__ volatile("rdseed %0; setc %1":"=r"(*seed), "=qm"(ok));
-    if(ok){
-        return 0 ;
-    } else
-        return 1;
+    return (ok) ? 0 : -1;
 }
 
 /* return 0 on success */
 static INLINE int IntelRDseed64_r(word64* rnd)
 {
     int i;
-    for(i=0; i 0; sz-=8, output+=8) {
-        if(IS_INTEL_RDSEED)ret = IntelRDseed64_r((word64*)output);
-        else return 1 ;
-        if(ret)
-             return 1 ;
+    (void)os;
+
+    if (!IS_INTEL_RDSEED)
+        return -1;
+
+    for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64),
+                                                    output += sizeof(word64)) {
+        ret = IntelRDseed64_r((word64*)output);
+        if (ret != 0)
+            return ret;
     }
-    if(sz == 0)return 0 ;
+    if (sz == 0)
+        return 0;
+
+    /* handle unaligned remainder */
+    ret = IntelRDseed64_r(&rndTmp);
+    if (ret != 0)
+        return ret;
+
+    XMEMCPY(output, &rndTmp, sz);
 
-    if(IS_INTEL_RDSEED)ret = IntelRDseed64_r(&rndTmp) ;
-    else return 1 ;
-    if(ret)
-         return 1 ;
-    XMEMCPY(output, &rndTmp, sz) ;
     return 0;
 }
 
-#else /* HAVE_HASHDRBG || NO_RC4 */
+#endif /* HAVE_INTEL_RDSEED */
+
+#ifdef HAVE_INTEL_RDRAND
 
 /* return 0 on success */
-static INLINE int IntelRDrand32(unsigned int *rnd)
+static INLINE int IntelRDrand64(word64 *rnd)
 {
-    int rdrand; unsigned char ok ;
-    __asm__ volatile("rdrand %0; setc %1":"=r"(rdrand), "=qm"(ok));
-    if(ok){
-        *rnd = rdrand;
-        return 0 ;
-    } else
-        return 1;
+    unsigned char ok;
+
+    __asm__ volatile("rdrand %0; setc %1":"=r"(*rnd), "=qm"(ok));
+
+    return (ok) ? 0 : -1;
 }
 
 /* return 0 on success */
-static INLINE int IntelRDrand32_r(unsigned int *rnd)
+static INLINE int IntelRDrand64_r(word64 *rnd)
 {
-    int i ;
-    for(i=0; i 0; sz-=4, output+=4) {
-        if(IS_INTEL_RDRAND)ret = IntelRDrand32_r((word32 *)output);
-        else return 1 ;
-        if(ret)
-             return 1 ;
+    (void)os;
+
+    if (!IS_INTEL_RDRAND)
+        return -1;
+
+    for (; (sz / sizeof(word64)) > 0; sz -= sizeof(word64),
+                                                    output += sizeof(word64)) {
+        ret = IntelRDrand64_r((word64 *)output);
+        if (ret != 0)
+            return ret;
     }
-    if(sz == 0)return 0 ;
+    if (sz == 0)
+        return 0;
+
+    /* handle unaligned remainder */
+    ret = IntelRDrand64_r(&rndTmp);
+    if (ret != 0)
+        return ret;
+
+    XMEMCPY(output, &rndTmp, sz);
 
-    if(IS_INTEL_RDRAND)ret = IntelRDrand32_r(&rndTmp);
-    else return 1 ;
-    if(ret)
-         return 1 ;
-    XMEMCPY(output, &rndTmp, sz) ;
     return 0;
 }
-#endif /* defined(HAVE_HASHDRBG) || defined(NO_RC4) */
 
-#endif /* HAVE_INTEL_RDGEN */
+#endif /* HAVE_INTEL_RDRAND */
+#endif /* HAVE_INTEL_RDRAND || HAVE_INTEL_RDSEED */
 
 
-/* wc_GenerateSeed Implementations */
+/* Begin wc_GenerateSeed Implementations */
 #if defined(CUSTOM_RAND_GENERATE_SEED)
 
     /* Implement your own random generation function
@@ -1170,7 +1164,6 @@ static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz)
         return CUSTOM_RAND_GENERATE_SEED_OS(os, output, sz);
     }
 
-
 #elif defined(CUSTOM_RAND_GENERATE)
 
    /* Implement your own random generation function
@@ -1267,49 +1260,50 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
 
 #elif defined(MICROCHIP_PIC32)
 
-#ifdef MICROCHIP_MPLAB_HARMONY
-    #define PIC32_SEED_COUNT _CP0_GET_COUNT
-#else
-    #if !defined(WOLFSSL_MICROCHIP_PIC32MZ)
-        #include 
+    #ifdef MICROCHIP_MPLAB_HARMONY
+        #define PIC32_SEED_COUNT _CP0_GET_COUNT
+    #else
+        #if !defined(WOLFSSL_MICROCHIP_PIC32MZ)
+            #include 
+        #endif
+        #define PIC32_SEED_COUNT ReadCoreTimer
     #endif
-    #define PIC32_SEED_COUNT ReadCoreTimer
-#endif
+
     #ifdef WOLFSSL_MIC32MZ_RNG
         #include "xc.h"
         int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
         {
-            int i ;
-            byte rnd[8] ;
-            word32 *rnd32 = (word32 *)rnd ;
-            word32 size = sz ;
-            byte* op = output ;
+            int i;
+            byte rnd[8];
+            word32 *rnd32 = (word32 *)rnd;
+            word32 size = sz;
+            byte* op = output;
 
             /* This part has to be replaced with better random seed */
             RNGNUMGEN1 = ReadCoreTimer();
             RNGPOLY1 = ReadCoreTimer();
             RNGPOLY2 = ReadCoreTimer();
             RNGNUMGEN2 = ReadCoreTimer();
-#ifdef DEBUG_WOLFSSL
-            printf("GenerateSeed::Seed=%08x, %08x\n", RNGNUMGEN1, RNGNUMGEN2) ;
-#endif
+        #ifdef DEBUG_WOLFSSL
+            printf("GenerateSeed::Seed=%08x, %08x\n", RNGNUMGEN1, RNGNUMGEN2);
+        #endif
             RNGCONbits.PLEN = 0x40;
             RNGCONbits.PRNGEN = 1;
             for(i=0; i<5; i++) { /* wait for RNGNUMGEN ready */
-                volatile int x ;
-                x = RNGNUMGEN1 ;
-                x = RNGNUMGEN2 ;
+                volatile int x;
+                x = RNGNUMGEN1;
+                x = RNGNUMGEN2;
             }
             do {
                 rnd32[0] = RNGNUMGEN1;
                 rnd32[1] = RNGNUMGEN2;
 
                 for(i=0; i<8; i++, op++) {
-                    *op = rnd[i] ;
-                    size -- ;
-                    if(size==0)break ;
+                    *op = rnd[i];
+                    size --;
+                    if(size==0)break;
                 }
-            } while(size) ;
+            } while(size);
             return 0;
         }
     #else  /* WOLFSSL_MIC32MZ_RNG */
@@ -1451,46 +1445,15 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
         }
 
     #else
-        #warning "write a real random seed!!!!, just for testing now"
-
-        int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-        {
-            int i;
-            for (i = 0; i < sz; i++ )
-                output[i] = i;
-
-            return 0;
-        }
+        #define USE_TEST_GENSEED
     #endif /* FREESCALE_K70_RNGA */
 
-#elif defined(WOLFSSL_SAFERTOS) || defined(WOLFSSL_LEANPSK) \
-   || defined(WOLFSSL_IAR_ARM)  || defined(WOLFSSL_MDK_ARM) \
-   || defined(WOLFSSL_uITRON4)  || defined(WOLFSSL_uTKERNEL2)\
-   || defined(WOLFSSL_GENSEED_FORTEST)
-
-#ifndef _MSC_VER
-#warning "write a real random seed!!!!, just for testing now"
-#else
-#pragma message("Warning: write a real random seed!!!!, just for testing now")
-#endif
-
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
-    word32 i;
-    for (i = 0; i < sz; i++ )
-        output[i] = i;
-
-    (void)os;
-
-    return 0;
-}
-
 #elif defined(STM32F2_RNG) || defined(STM32F4_RNG)
     /*
      * wc_Generate a RNG seed using the hardware random number generator
      * on the STM32F2/F4. */
 
-#ifdef WOLFSSL_STM32_CUBEMX
+    #ifdef WOLFSSL_STM32_CUBEMX
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
         RNG_HandleTypeDef hrng;
@@ -1511,7 +1474,7 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
 
         return 0;
     }
-#else
+    #else
     int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
     {
         int i;
@@ -1533,22 +1496,7 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
 
         return 0;
     }
-#endif /* WOLFSSL_STM32_CUBEMX */
-
-#elif defined(WOLFSSL_LPC43xx) || defined(WOLFSSL_STM32F2xx) || defined(MBED) \
-      || defined(WOLFSSL_EMBOS)
-
-    #warning "write a real random seed!!!!, just for testing now"
-
-    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-    {
-        int i;
-
-        for (i = 0; i < sz; i++ )
-            output[i] = i;
-
-        return 0;
-    }
+    #endif /* WOLFSSL_STM32_CUBEMX */
 
 #elif defined(WOLFSSL_TIRTOS)
 
@@ -1693,93 +1641,127 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
         return ret;
     }
 
-#elif defined(NO_DEV_RANDOM)
-
-#error "you need to write an os specific wc_GenerateSeed() here"
-
-/*
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
-    return 0;
-}
-*/
-
-
 #elif defined(IDIRECT_DEV_RANDOM)
 
-extern int getRandom( int sz, unsigned char *output );
+    extern int getRandom( int sz, unsigned char *output );
 
-int GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
-    int num_bytes_returned = 0;
+    int GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        int num_bytes_returned = 0;
 
-    num_bytes_returned = getRandom( (int) sz, (unsigned char *) output );
+        num_bytes_returned = getRandom( (int) sz, (unsigned char *) output );
 
-    return 0;
-}
+        return 0;
+    }
 
+#elif defined(CUSTOM_RAND_GENERATE_BLOCK)
+    /* #define CUSTOM_RAND_GENERATE_BLOCK myRngFunc
+     * extern int myRngFunc(byte* output, word32 sz);
+     */
 
-#else /* !USE_WINDOWS_API && !HAVE_RPT_SYS && !MICRIUM && !NO_DEV_RANDOM */
+#elif defined(WOLFSSL_SAFERTOS) || defined(WOLFSSL_LEANPSK) || \
+      defined(WOLFSSL_IAR_ARM)  || defined(WOLFSSL_MDK_ARM) || \
+      defined(WOLFSSL_uITRON4)  || defined(WOLFSSL_uTKERNEL2) || \
+      defined(WOLFSSL_LPC43xx)  || defined(WOLFSSL_STM32F2xx) || \
+      defined(MBED)             || defined(WOLFSSL_EMBOS) || \
+      defined(WOLFSSL_GENSEED_FORTEST)
 
-/* may block */
-int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
-{
-    int ret = 0;
+    /* these platforms do not have a default random seed and
+       you'll need to implement your own wc_GenerateSeed or define via
+       CUSTOM_RAND_GENERATE_BLOCK */
 
+    #define USE_TEST_GENSEED
 
-#if defined(HAVE_INTEL_RDGEN) && (defined(HAVE_HASHDRBG) || defined(NO_RC4))
-    wc_InitRng_IntelRD() ; /* set cpuid_flags if not yet */
-    if(IS_INTEL_RDSEED) {
-         ret = wc_GenerateSeed_IntelRD(NULL, output, sz);
-         if (ret == 0) {
-             /* success, we're done */
+#elif defined(NO_DEV_RANDOM)
+
+    #error "you need to write an os specific wc_GenerateSeed() here"
+
+    /*
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        return 0;
+    }
+    */
+
+#else
+
+    /* may block */
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        int ret = 0;
+
+    #ifdef HAVE_INTEL_RDSEED
+        if (IS_INTEL_RDSEED) {
+             ret = wc_GenerateSeed_IntelRD(NULL, output, sz);
+             if (ret == 0) {
+                 /* success, we're done */
+                 return ret;
+             }
+    #ifdef FORCE_FAILURE_RDSEED
+             /* don't fallback to /dev/urandom */
              return ret;
-         }
-#ifdef FORCE_FAILURE_RDSEED
-         /* don't fallback to /dev/urandom */
-         return ret;
-#else
-         /* fallback to /dev/urandom attempt */
-         ret = 0;
-#endif
-    }
-
-#endif
-
-    os->fd = open("/dev/urandom",O_RDONLY);
-    if (os->fd == -1) {
-        /* may still have /dev/random */
-        os->fd = open("/dev/random",O_RDONLY);
-        if (os->fd == -1)
-            return OPEN_RAN_E;
-    }
-
-    while (sz) {
-        int len = (int)read(os->fd, output, sz);
-        if (len == -1) {
-            ret = READ_RAN_E;
-            break;
+    #else
+             /* fallback to /dev/urandom attempt */
+             ret = 0;
+    #endif
         }
 
-        sz     -= len;
-        output += len;
+    #endif /* HAVE_INTEL_RDSEED */
 
-        if (sz) {
-#ifdef BLOCKING
-            sleep(0);             /* context switch */
-#else
-            ret = RAN_BLOCK_E;
-            break;
-#endif
+        os->fd = open("/dev/urandom",O_RDONLY);
+        if (os->fd == -1) {
+            /* may still have /dev/random */
+            os->fd = open("/dev/random",O_RDONLY);
+            if (os->fd == -1)
+                return OPEN_RAN_E;
         }
+
+        while (sz) {
+            int len = (int)read(os->fd, output, sz);
+            if (len == -1) {
+                ret = READ_RAN_E;
+                break;
+            }
+
+            sz     -= len;
+            output += len;
+
+            if (sz) {
+    #ifdef BLOCKING
+                sleep(0);             /* context switch */
+    #else
+                ret = RAN_BLOCK_E;
+                break;
+    #endif
+            }
+        }
+        close(os->fd);
+
+        return ret;
     }
-    close(os->fd);
 
-    return ret;
-}
+#endif
+
+#ifdef USE_TEST_GENSEED
+    #ifndef _MSC_VER
+        #warning "write a real random seed!!!!, just for testing now"
+    #else
+        #pragma message("Warning: write a real random seed!!!!, just for testing now")
+    #endif
+
+    int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
+    {
+        word32 i;
+        for (i = 0; i < sz; i++ )
+            output[i] = i;
+
+        (void)os;
+
+        return 0;
+    }
+#endif
+
+/* End wc_GenerateSeed */
 
-#endif /* USE_WINDOWS_API */
-#endif /* CUSTOM_RAND_GENERATE_BLOCK */
 #endif /* WC_NO_RNG */
 #endif /* HAVE_FIPS */
-
diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c
old mode 100644
new mode 100755
index 274bcc4be..158e3591e
--- a/wolfcrypt/src/rsa.c
+++ b/wolfcrypt/src/rsa.c
@@ -148,11 +148,6 @@ int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b,
 #define ERROR_OUT(x) { ret = (x); goto done;}
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    static int InitAsyncRsaKey(RsaKey* key);
-    static int FreeAsyncRsaKey(RsaKey* key);
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 enum {
     RSA_STATE_NONE = 0,
 
@@ -167,18 +162,18 @@ enum {
 
 static void wc_RsaCleanup(RsaKey* key)
 {
-    if (key && key->tmp) {
+    if (key && key->data) {
         /* make sure any allocated memory is free'd */
-        if (key->tmpIsAlloc) {
+        if (key->dataIsAlloc) {
             if (key->type == RSA_PRIVATE_DECRYPT ||
                 key->type == RSA_PRIVATE_ENCRYPT) {
-                ForceZero(key->tmp, key->tmpLen);
+                ForceZero(key->data, key->dataLen);
             }
-            XFREE(key->tmp, key->heap, DYNAMIC_TYPE_RSA);
-            key->tmpIsAlloc = 0;
+            XFREE(key->data, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+            key->dataIsAlloc = 0;
         }
-        key->tmp = NULL;
-        key->tmpLen = 0;
+        key->data = NULL;
+        key->dataLen = 0;
     }
 }
 
@@ -190,39 +185,35 @@ int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId)
         return BAD_FUNC_ARG;
     }
 
-    (void)devId;
-
     key->type = RSA_TYPE_UNKNOWN;
     key->state = RSA_STATE_NONE;
     key->heap = heap;
-    key->tmp = NULL;
-    key->tmpLen = 0;
-    key->tmpIsAlloc = 0;
+    key->data = NULL;
+    key->dataLen = 0;
+    key->dataIsAlloc = 0;
 #ifdef WC_RSA_BLINDING
     key->rng = NULL;
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (devId != INVALID_DEVID) {
-        /* handle as async */
-        ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA,
-                                                                        devId);
-        if (ret == 0) {
-            ret = InitAsyncRsaKey(key);
-        }
-    }
-    else
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    /* handle as async */
+    ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA,
+                                                            key->heap, devId);
+    #ifdef WOLFSSL_CERT_GEN
+        XMEMSET(&key->certSignCtx, 0, sizeof(CertSignCtx));
+    #endif
+#else
+    (void)devId;
 #endif
-    {
-        mp_init(&key->n);
-        mp_init(&key->e);
-        mp_init(&key->d);
-        mp_init(&key->p);
-        mp_init(&key->q);
-        mp_init(&key->dP);
-        mp_init(&key->dQ);
-        mp_init(&key->u);
-    }
+
+    mp_init(&key->n);
+    mp_init(&key->e);
+    mp_init(&key->d);
+    mp_init(&key->p);
+    mp_init(&key->q);
+    mp_init(&key->dP);
+    mp_init(&key->dQ);
+    mp_init(&key->u);
 
     return ret;
 }
@@ -242,36 +233,29 @@ int wc_FreeRsaKey(RsaKey* key)
 
     wc_RsaCleanup(key);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        ret = FreeAsyncRsaKey(key);
-        wolfAsync_DevCtxFree(&key->asyncDev);
-    }
-    else
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA);
 #endif
-    {
-        if (key->type == RSA_PRIVATE) {
-            mp_forcezero(&key->u);
-            mp_forcezero(&key->dQ);
-            mp_forcezero(&key->dP);
-            mp_forcezero(&key->q);
-            mp_forcezero(&key->p);
-            mp_forcezero(&key->d);
-        }
-    #ifndef USE_FAST_MATH
-        /* private part */
-        mp_clear(&key->u);
-        mp_clear(&key->dQ);
-        mp_clear(&key->dP);
-        mp_clear(&key->q);
-        mp_clear(&key->p);
-        mp_clear(&key->d);
 
-        /* public part */
-        mp_clear(&key->e);
-        mp_clear(&key->n);
-    #endif
+    if (key->type == RSA_PRIVATE) {
+        mp_forcezero(&key->u);
+        mp_forcezero(&key->dQ);
+        mp_forcezero(&key->dP);
+        mp_forcezero(&key->q);
+        mp_forcezero(&key->p);
+        mp_forcezero(&key->d);
     }
+    /* private part */
+    mp_clear(&key->u);
+    mp_clear(&key->dQ);
+    mp_clear(&key->dP);
+    mp_clear(&key->q);
+    mp_clear(&key->p);
+    mp_clear(&key->d);
+
+    /* public part */
+    mp_clear(&key->e);
+    mp_clear(&key->n);
 
     return ret;
 }
@@ -649,14 +633,14 @@ static int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock,
     switch (padType)
     {
         case WC_RSA_PKCSV15_PAD:
-            //WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 padding");
+            /*WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 padding");*/
             ret = RsaPad(input, inputLen, pkcsBlock, pkcsBlockLen,
                                                              padValue, rng);
             break;
 
     #ifndef WC_NO_RSA_OAEP
         case WC_RSA_OAEP_PAD:
-            //WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
+            WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
             ret = RsaPad_OAEP(input, inputLen, pkcsBlock, pkcsBlockLen,
                          padValue, rng, hType, mgf, optLabel, labelLen, heap);
             break;
@@ -795,7 +779,7 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen,
         while (i maxOutputLen) || invalid;
 
     if (invalid) {
-        WOLFSSL_MSG("RsaUnPad error, bad formatting");
+        WOLFSSL_MSG("RsaUnPad error, invalid formatting");
         return RSA_PAD_E;
     }
 
@@ -819,16 +803,15 @@ static int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out,
 {
     int ret;
 
-    switch (padType)
-    {
+    switch (padType) {
         case WC_RSA_PKCSV15_PAD:
-            WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 padding");
+            /*WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 un-padding");*/
             ret = RsaUnPad(pkcsBlock, pkcsBlockLen, out, padValue);
             break;
 
     #ifndef WC_NO_RSA_OAEP
         case WC_RSA_OAEP_PAD:
-            WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
+            WOLFSSL_MSG("wolfSSL Using RSA OAEP un-padding");
             ret = RsaUnPad_OAEP((byte*)pkcsBlock, pkcsBlockLen, out,
                                         hType, mgf, optLabel, labelLen, heap);
             break;
@@ -884,7 +867,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out,
         /* blind */
         ret = mp_rand(&rnd, get_digit_count(&key->n), rng);
         if (ret != MP_OKAY)
-            ERROR_OUT(ret);
+            goto done;
 
         /* rndi = 1/rnd mod n */
         if (mp_invmod(&rnd, &key->n, &rndi) != MP_OKAY)
@@ -1000,20 +983,19 @@ done:
         mp_clear(&rnd);
     }
 #endif
-    if (ret == MP_EXPTMOD_E) {
-        WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
-    }
     return ret;
 }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
 static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
                           word32* outLen, int type, RsaKey* key, WC_RNG* rng)
 {
     int ret = 0;
 
+    (void)rng;
+
 #ifdef WOLFSSL_ASYNC_CRYPT_TEST
-    AsyncCryptTestDev* testDev = &key->asyncDev.dev;
+    WC_ASYNC_TEST* testDev = &key->asyncDev.test;
     if (testDev->type == ASYNC_TEST_NONE) {
         testDev->type = ASYNC_TEST_RSA_FUNC;
         testDev->rsaFunc.in = in;
@@ -1031,11 +1013,22 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
     case RSA_PRIVATE_DECRYPT:
     case RSA_PRIVATE_ENCRYPT:
     #ifdef HAVE_CAVIUM
-        ret = NitroxRsaExptMod(in, inLen, key->d.dpraw, key->d.used,
-                               key->n.dpraw, key->n.used, out, outLen, key);
+        ret = NitroxRsaExptMod(in, inLen,
+                               key->d.raw.buf, key->d.raw.len,
+                               key->n.raw.buf, key->n.raw.len,
+                               out, outLen, key);
     #elif defined(HAVE_INTEL_QA)
-        /* TODO: Add support for Intel Quick Assist */
-        ret = -1;
+        #ifdef RSA_LOW_MEM
+            ret = IntelQaRsaPrivate(&key->asyncDev, in, inLen,
+                                    &key->d.raw, &key->n.raw,
+                                    out, outLen);
+        #else
+            ret = IntelQaRsaCrtPrivate(&key->asyncDev, in, inLen,
+                                &key->p.raw, &key->q.raw,
+                                &key->dP.raw, &key->dQ.raw,
+                                &key->u.raw,
+                                out, outLen);
+        #endif
     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     #endif
@@ -1044,11 +1037,14 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
     case RSA_PUBLIC_ENCRYPT:
     case RSA_PUBLIC_DECRYPT:
     #ifdef HAVE_CAVIUM
-        ret = NitroxRsaExptMod(in, inLen, key->e.dpraw, key->e.used,
-                               key->n.dpraw, key->n.used, out, outLen, key);
+        ret = NitroxRsaExptMod(in, inLen,
+                               key->e.raw.buf, key->e.raw.len,
+                               key->n.raw.buf, key->n.raw.len,
+                               out, outLen, key);
     #elif defined(HAVE_INTEL_QA)
-        /* TODO: Add support for Intel Quick Assist */
-        ret = -1;
+        ret = IntelQaRsaPublic(&key->asyncDev, in, inLen,
+                               &key->e.raw, &key->n.raw,
+                               out, outLen);
     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     #endif
@@ -1060,7 +1056,7 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
 
     return ret;
 }
-#endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* WOLFSSL_ASYNC_CRYPT && WC_ASYNC_ENABLE_RSA */
 
 int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
                           word32* outLen, int type, RsaKey* key, WC_RNG* rng)
@@ -1072,8 +1068,9 @@ int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
         return BAD_FUNC_ARG;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
+                                                        key->n.raw.len > 0) {
         ret = wc_RsaFunctionAsync(in, inLen, out, outLen, type, key, rng);
     }
     else
@@ -1082,10 +1079,17 @@ int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     }
 
-    if (ret == MP_EXPTMOD_E) {
-        /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */
-        WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
+    /* handle error */
+    if (ret < 0 && ret != WC_PENDING_E) {
+        if (ret == MP_EXPTMOD_E) {
+            /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */
+            WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
+        }
+
+        key->state = RSA_STATE_NONE;
+        wc_RsaCleanup(key);
     }
+
     return ret;
 }
 
@@ -1112,10 +1116,10 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
                             enum wc_HashType hash, int mgf,
                             byte* label, word32 labelSz, WC_RNG* rng)
 {
-    int ret = BAD_FUNC_ARG, sz;
+    int ret, sz;
 
     if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
-        return ret;
+        return BAD_FUNC_ARG;
     }
 
     sz = wc_RsaEncryptSize(key);
@@ -1134,46 +1138,59 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
     switch (key->state) {
     case RSA_STATE_NONE:
     case RSA_STATE_ENCRYPT_PAD:
+        key->state = RSA_STATE_ENCRYPT_PAD;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-        if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-            if (rsa_type == RSA_PUBLIC_ENCRYPT && pad_value == RSA_BLOCK_TYPE_2) {
-                key->state = RSA_STATE_ENCRYPT_RES;
-                key->tmpLen = key->n.used;
-                return NitroxRsaPublicEncrypt(in, inLen, out, outLen, key);
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+            defined(HAVE_CAVIUM)
+        if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && key->n.raw.buf) {
+            /* Async operations that include padding */
+            if (rsa_type == RSA_PUBLIC_ENCRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_2) {
+                key->state = RSA_STATE_ENCRYPT_EXPTMOD;
+                key->dataLen = key->n.raw.len;
+                ret = NitroxRsaPublicEncrypt(in, inLen, out, outLen, key);
+                break;
             }
-            else if (rsa_type == RSA_PRIVATE_ENCRYPT && pad_value == RSA_BLOCK_TYPE_1) {
-                key->state = RSA_STATE_ENCRYPT_RES;
-                key->tmpLen = key->n.used;
-                return NitroxRsaSSL_Sign(in, inLen, out, outLen, key);
+            else if (rsa_type == RSA_PRIVATE_ENCRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_1) {
+                key->state = RSA_STATE_ENCRYPT_EXPTMOD;
+                key->dataLen = key->n.raw.len;
+                ret = NitroxRsaSSL_Sign(in, inLen, out, outLen, key);
+                break;
             }
         }
     #endif
 
+        ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng, pad_type, hash,
+                                                mgf, label, labelSz, key->heap);
+        if (ret < 0) {
+            break;
+        }
+
         key->state = RSA_STATE_ENCRYPT_EXPTMOD;
-
-        ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng,
-                               pad_type, hash, mgf, label, labelSz, key->heap);
-        if (ret < 0) {
-            break;
-        }
         /* fall through */
+
     case RSA_STATE_ENCRYPT_EXPTMOD:
-        key->state = RSA_STATE_ENCRYPT_RES;
 
-        key->tmpLen = outLen;
-        ret = wc_RsaFunction(out, sz, out, &key->tmpLen, rsa_type, key, rng);
+        key->dataLen = outLen;
+        ret = wc_RsaFunction(out, sz, out, &key->dataLen, rsa_type, key, rng);
+
+        if (ret >= 0 || ret == WC_PENDING_E) {
+            key->state = RSA_STATE_ENCRYPT_RES;
+        }
         if (ret < 0) {
             break;
         }
+
         /* fall through */
+
     case RSA_STATE_ENCRYPT_RES:
-        key->state = RSA_STATE_NONE;
-        ret = key->tmpLen;
+        ret = key->dataLen;
         break;
 
     default:
         ret = BAD_STATE_E;
+        break;
     }
 
     /* if async pending then return and skip done cleanup below */
@@ -1182,6 +1199,7 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
     }
 
     key->state = RSA_STATE_NONE;
+    wc_RsaCleanup(key);
 
     return ret;
 }
@@ -1207,68 +1225,80 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
                             enum wc_HashType hash, int mgf,
                             byte* label, word32 labelSz, WC_RNG* rng)
 {
-    int ret = BAD_FUNC_ARG;
+    int ret = RSA_WRONG_TYPE_E;
 
     if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
-        return ret;
+        return BAD_FUNC_ARG;
     }
 
     switch (key->state) {
     case RSA_STATE_NONE:
     case RSA_STATE_DECRYPT_EXPTMOD:
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+        key->state = RSA_STATE_DECRYPT_EXPTMOD;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+            defined(HAVE_CAVIUM)
+        /* Async operations that include padding */
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-            key->tmpLen = 0;
-            if (rsa_type == RSA_PRIVATE_DECRYPT && pad_value == RSA_BLOCK_TYPE_2) {
-                key->state = RSA_STATE_DECRYPT_RES;
-                key->tmp = NULL;
+            key->dataLen = 0;
+            if (rsa_type == RSA_PRIVATE_DECRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_2) {
+                key->state = RSA_STATE_DECRYPT_UNPAD;
+                key->data = NULL;
                 ret = NitroxRsaPrivateDecrypt(in, inLen, out, outLen, key);
                 if (ret > 0) {
                     if (outPtr)
                         *outPtr = in;
                 }
-                return ret;
+                break;
             }
-            else if (rsa_type == RSA_PUBLIC_DECRYPT && pad_value == RSA_BLOCK_TYPE_1) {
-                key->state = RSA_STATE_DECRYPT_RES;
-                key->tmp = NULL;
-                return NitroxRsaSSL_Verify(in, inLen, out, outLen, key);
+            else if (rsa_type == RSA_PUBLIC_DECRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_1) {
+                key->state = RSA_STATE_DECRYPT_UNPAD;
+                key->data = NULL;
+                ret = NitroxRsaSSL_Verify(in, inLen, out, outLen, key);
+                break;
             }
         }
     #endif
 
-        key->state = RSA_STATE_DECRYPT_UNPAD;
-
         /* verify the tmp ptr is NULL, otherwise indicates bad state */
-        if (key->tmp != NULL) {
-            ERROR_OUT(BAD_STATE_E);
+        if (key->data != NULL) {
+            ret = BAD_STATE_E;
+            break;
         }
 
         /* if not doing this inline then allocate a buffer for it */
-        key->tmpLen = inLen;
+        key->dataLen = inLen;
         if (outPtr == NULL) {
-            key->tmp = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_RSA);
-            key->tmpIsAlloc = 1;
-            if (key->tmp == NULL) {
-                ERROR_OUT(MEMORY_E);
+            key->data = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+            key->dataIsAlloc = 1;
+            if (key->data == NULL) {
+                ret = MEMORY_E;
+                break;
             }
-            XMEMCPY(key->tmp, in, inLen);
+            XMEMCPY(key->data, in, inLen);
         }
         else {
-            key->tmp = out;
+            key->data = out;
+        }
+        ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen, rsa_type,
+                                                                      key, rng);
+
+        if (ret >= 0 || ret == WC_PENDING_E) {
+            key->state = RSA_STATE_DECRYPT_UNPAD;
         }
-        ret = wc_RsaFunction(key->tmp, inLen, key->tmp, &key->tmpLen,
-                                                        rsa_type, key, rng);
         if (ret < 0) {
             break;
         }
+
         /* fall through */
+
     case RSA_STATE_DECRYPT_UNPAD:
     {
         byte* pad = NULL;
-        key->state = RSA_STATE_DECRYPT_RES;
-        ret = wc_RsaUnPad_ex(key->tmp, key->tmpLen, &pad, pad_value, pad_type,
-                                        hash, mgf, label, labelSz, key->heap);
+        ret = wc_RsaUnPad_ex(key->data, key->dataLen, &pad, pad_value, pad_type,
+                                          hash, mgf, label, labelSz, key->heap);
         if (ret > 0 && ret <= (int)outLen && pad != NULL) {
             /* only copy output if not inline */
             if (outPtr == NULL) {
@@ -1284,18 +1314,23 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
         if (ret < 0) {
             break;
         }
+
+        key->state = RSA_STATE_DECRYPT_RES;
         /* fall through */
     }
     case RSA_STATE_DECRYPT_RES:
-        key->state = RSA_STATE_NONE;
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+            defined(HAVE_CAVIUM)
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-            ret = key->tmpLen;
+            /* return event ret */
+            ret = key->asyncDev.event.ret;
         }
     #endif
         break;
+
     default:
         ret = BAD_STATE_E;
+        break;
     }
 
     /* if async pending then return and skip done cleanup below */
@@ -1303,8 +1338,6 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
         return ret;
     }
 
-done:
-
     key->state = RSA_STATE_NONE;
     wc_RsaCleanup(key);
 
@@ -1427,11 +1460,6 @@ int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
 
 int wc_RsaEncryptSize(RsaKey* key)
 {
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        return key->n.used;
-    }
-#endif
     return mp_unsigned_bin_size(&key->n);
 }
 
@@ -1481,6 +1509,26 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
     if (e < 3 || (e & 1) == 0)
         return BAD_FUNC_ARG;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
+    #ifdef HAVE_CAVIUM
+        /* TODO: Not implemented */
+    #elif defined(HAVE_INTEL_QA)
+        /* TODO: Not implemented */
+    #else
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_RSA_MAKE;
+            testDev->rsaMake.rng = rng;
+            testDev->rsaMake.key = key;
+            testDev->rsaMake.size = size;
+            testDev->rsaMake.e = e;
+            return WC_PENDING_E;
+        }
+    #endif
+    }
+#endif
+
     if ((err = mp_init_multi(&p, &q, &tmp1, &tmp2, &tmp3, NULL)) != MP_OKAY)
         return err;
 
@@ -1589,116 +1637,6 @@ int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng)
 #endif /* WC_RSA_BLINDING */
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-int wc_RsaAsyncHandle(RsaKey* key, WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
-{
-    int ret;
-
-    if (key == NULL || queue == NULL || event == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* make sure this rsa context had "wc_RsaAsyncInit" called on it */
-    if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_RSA) {
-        return ASYNC_INIT_E;
-    }
-
-    /* setup the event and push to queue */
-    ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-    if (ret == 0) {
-        ret = wolfEventQueue_Push(queue, event);
-    }
-
-    /* check for error (helps with debugging) */
-    if (ret != 0) {
-        WOLFSSL_MSG("wc_RsaAsyncHandle failed");
-    }
-    return ret;
-}
-
-int wc_RsaAsyncWait(int ret, RsaKey* key)
-{
-    if (ret == WC_PENDING_E) {
-        WOLF_EVENT event;
-        XMEMSET(&event, 0, sizeof(event));
-        ret = wolfAsync_EventInit(&event, WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = wolfAsync_EventWait(&event);
-            if (ret == 0 && event.ret >= 0) {
-                ret = event.ret;
-            }
-        }
-    }
-    return ret;
-}
-
-/* Initialize async RSA key */
-static int InitAsyncRsaKey(RsaKey* key)
-{
-    XMEMSET(&key->n,  0, sizeof(key->n));
-    XMEMSET(&key->e,  0, sizeof(key->e));
-    XMEMSET(&key->d,  0, sizeof(key->d));
-    XMEMSET(&key->p,  0, sizeof(key->p));
-    XMEMSET(&key->q,  0, sizeof(key->q));
-    XMEMSET(&key->dP, 0, sizeof(key->dP));
-    XMEMSET(&key->dQ, 0, sizeof(key->dQ));
-    XMEMSET(&key->u,  0, sizeof(key->u));
-
-    return 0;
-}
-
-/* Free async RSA key */
-static int FreeAsyncRsaKey(RsaKey* key)
-{
-    if (key->type == RSA_PRIVATE) {
-        if (key->d.dpraw) {
-            ForceZero(key->d.dpraw, key->d.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->d.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->p.dpraw) {
-            ForceZero(key->p.dpraw, key->p.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->p.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->q.dpraw) {
-            ForceZero(key->q.dpraw, key->q.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->q.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->dP.dpraw) {
-            ForceZero(key->dP.dpraw, key->dP.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->dP.dpraw, key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->dQ.dpraw) {
-            ForceZero(key->dQ.dpraw, key->dQ.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->dQ.dpraw, key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->u.dpraw) {
-            ForceZero(key->u.dpraw, key->u.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->u.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-    }
-
-#ifndef USE_FAST_MATH
-    XFREE(key->n.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-    XFREE(key->e.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-#endif
-
-    return InitAsyncRsaKey(key);  /* reset pointers */
-}
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 #undef ERROR_OUT
 
 #endif /* HAVE_FIPS */
diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c
old mode 100644
new mode 100755
index 893e02fb4..f98cb7272
--- a/wolfcrypt/src/sha.c
+++ b/wolfcrypt/src/sha.c
@@ -26,7 +26,6 @@
 
 #include 
 
-
 #if !defined(NO_SHA)
 
 #include 
@@ -41,6 +40,12 @@
         }
 	    return InitSha_fips(sha);
 	}
+    int wc_InitSha_ex(Sha* sha, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+        return InitSha_fips(sha);
+    }
 
 	int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
 	{
@@ -57,9 +62,20 @@
         }
 	    return ShaFinal_fips(sha,out);
     }
+    void wc_ShaFree(Sha* sha)
+    {
+        (void)sha;
+        /* Not supported in FIPS */
+    }
 
 #else /* else build without fips */
 
+
+#if defined(WOLFSSL_TI_HASH)
+    /* #include  included by wc_port.c */
+
+#else
+
 #include 
 #ifdef NO_INLINE
     #include 
@@ -69,13 +85,8 @@
 #endif
 
 
-/****************************************/
-/* SHA Hardware Variations */
-/****************************************/
-#if defined(WOLFSSL_TI_HASH)
-    /* #include  included by wc_port.c */
-
-#elif defined(WOLFSSL_PIC32MZ_HASH)
+/* Hardware Acceleration */
+#if defined(WOLFSSL_PIC32MZ_HASH)
     #define USE_SHA_SOFTWARE_IMPL
     #define wc_InitSha   wc_InitSha_sw
     #define wc_ShaUpdate wc_ShaUpdate_sw
@@ -88,7 +99,7 @@
      * library. (See note in README).
      */
 
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         /* STM32 struct notes:
          * sha->buffer  = first 4 bytes used to hold partial block if needed
@@ -201,7 +212,7 @@
 #elif defined(FREESCALE_LTC_SHA)
 
     #include "fsl_ltc.h"
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         LTC_HASH_Init(LTC_BASE, &sha->ctx, kLTC_Sha1, NULL, 0);
         return 0;
@@ -227,7 +238,7 @@
     #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */
     #define XSHATRANSFORM   ShaTransform
 
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         int ret = 0;
         ret = wolfSSL_CryptHwMutexLock();
@@ -259,7 +270,7 @@
     /* Software implementation */
     #define USE_SHA_SOFTWARE_IMPL
 
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         int ret = 0;
 
@@ -279,7 +290,7 @@
         return ret;
     }
 
-#endif
+#endif /* End Hardware Acceleration */
 
 
 /* Software implementation */
@@ -396,6 +407,28 @@ static INLINE void AddLength(Sha* sha, word32 len)
         sha->hiLen++;                       /* carry low to high */
 }
 
+int wc_InitSha_ex(Sha* sha, void* heap, int devId)
+{
+    int ret = 0;
+
+    if (sha == NULL)
+        return BAD_FUNC_ARG;
+
+    sha->heap = heap;
+
+    ret = InitSha(sha);
+    if (ret != 0)
+        return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    ret = wolfAsync_DevCtxInit(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA,
+                                                            sha->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
+}
 
 int wc_ShaUpdate (Sha* sha, const byte* data, word32 len)
 {
@@ -408,6 +441,18 @@ int wc_ShaUpdate (Sha* sha, const byte* data, word32 len)
     /* do block size increments */
     local = (byte*)sha->buffer;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha(&sha->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    /* check that internal buffLen is valid */
+    if (sha->buffLen > SHA_BLOCK_SIZE)
+        return BUFFER_E;
+
     while (len) {
         word32 add = min(len, SHA_BLOCK_SIZE - sha->buffLen);
         XMEMCPY(&local[sha->buffLen], data, add);
@@ -439,6 +484,14 @@ int wc_ShaFinal(Sha* sha, byte* hash)
 
     local = (byte*)sha->buffer;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha(&sha->asyncDev, hash, NULL, SHA_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     AddLength(sha, sha->buffLen);  /* before adding pads */
 
     local[sha->buffLen++] = 0x80;  /* add 1 */
@@ -482,10 +535,61 @@ int wc_ShaFinal(Sha* sha, byte* hash)
 #endif
     XMEMCPY(hash, sha->digest, SHA_DIGEST_SIZE);
 
-    return wc_InitSha(sha);  /* reset state */
+    return InitSha(sha); /* reset state */
 }
 
 #endif /* USE_SHA_SOFTWARE_IMPL */
 
+
+int wc_InitSha(Sha* sha)
+{
+    return wc_InitSha_ex(sha, NULL, INVALID_DEVID);
+}
+
+void wc_ShaFree(Sha* sha)
+{
+    if (sha == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    wolfAsync_DevCtxFree(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
+#endif /* !WOLFSSL_TI_HASH */
 #endif /* HAVE_FIPS */
+
+#ifndef WOLFSSL_TI_HASH
+int wc_ShaGetHash(Sha* sha, byte* hash)
+{
+    int ret;
+    Sha tmpSha;
+
+    if (sha == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_ShaCopy(sha, &tmpSha);
+    if (ret == 0) {
+        ret = wc_ShaFinal(&tmpSha, hash);
+    }
+    return ret;
+}
+
+int wc_ShaCopy(Sha* src, Sha* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+#endif /* !WOLFSSL_TI_HASH */
+
 #endif /* !NO_SHA */
diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c
old mode 100644
new mode 100755
index 41ced5193..4b5fbd877
--- a/wolfcrypt/src/sha256.c
+++ b/wolfcrypt/src/sha256.c
@@ -27,259 +27,55 @@
 #endif
 
 #include 
+
+#if !defined(NO_SHA256)
+
 #include 
 #include 
 
-#if !defined(NO_SHA256)
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
 
-int wc_InitSha256(Sha256* sha)
-{
-    if (sha == NULL) {
-        return BAD_FUNC_ARG;
+    int wc_InitSha256(Sha256* sha)
+    {
+   		if (sha == NULL) {
+        	return BAD_FUNC_ARG;
+    	}
+        return InitSha256_fips(sha);
     }
-    return InitSha256_fips(sha);
-}
-
-
-int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
-{
-    if (sha == NULL ||  (data == NULL && len > 0)) {
-        return BAD_FUNC_ARG;
+    int wc_InitSha256_ex(Sha256* sha, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+        return InitSha256_fips(sha);
     }
-    return Sha256Update_fips(sha, data, len);
-}
-
-
-int wc_Sha256Final(Sha256* sha, byte* out)
-{
-    if (sha == NULL || out == NULL) {
-        return BAD_FUNC_ARG;
+    int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
+    {
+		if (sha == NULL ||  (data == NULL && len > 0)) {
+        	return BAD_FUNC_ARG;
+    	}
+        return Sha256Update_fips(sha, data, len);
+    }
+    int wc_Sha256Final(Sha256* sha, byte* out)
+    {
+		if (sha == NULL || out == NULL) {
+        	return BAD_FUNC_ARG;
+    	}
+        return Sha256Final_fips(sha, out);
+    }
+    void wc_Sha256Free(Sha256* sha)
+    {
+        (void)sha;
+        /* Not supported in FIPS */
     }
-    return Sha256Final_fips(sha, out);
-}
 
 #else /* else build without fips */
 
-#if !defined(NO_SHA256) && defined(WOLFSSL_TI_HASH)
+
+#if defined(WOLFSSL_TI_HASH)
     /* #include  included by wc_port.c */
 #else
 
-#if !defined (ALIGN32)
-    #if defined (__GNUC__)
-        #define ALIGN32 __attribute__ ( (aligned (32)))
-    #elif defined(_MSC_VER)
-        /* disable align warning, we want alignment ! */
-        #pragma warning(disable: 4324)
-        #define ALIGN32 __declspec (align (32))
-    #else
-        #define ALIGN32
-    #endif
-#endif
-
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitSha256   wc_InitSha256_sw
-#define wc_Sha256Update wc_Sha256Update_sw
-#define wc_Sha256Final  wc_Sha256Final_sw
-#endif
-
-#ifdef HAVE_FIPS
-    /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
-    #define FIPS_NO_WRAPPERS
-#endif
-
-#if defined(USE_INTEL_SPEEDUP)
-#define HAVE_INTEL_AVX1
-#define HAVE_INTEL_AVX2
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
-#endif
-
-
-/*****
-Intel AVX1/AVX2 Macro Control Structure
-
-#define HAVE_INTEL_AVX1
-#define HAVE_INTEL_AVX2
-
-#define HAVE_INTEL_RORX
-
-
-int InitSha256(Sha256* sha256) {
-     Save/Recover XMM, YMM
-     ...
-}
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-  Transform() ; Function prototype
-#else
-  Transform() {   }
-  int Sha256Final() {
-     Save/Recover XMM, YMM
-     ...
-  }
-#endif
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    #if defined(HAVE_INTEL_RORX
-         #define RND with rorx instuction
-    #else
-        #define RND
-    #endif
-#endif
-
-#if defined(HAVE_INTEL_AVX1)
-
-   #define XMM Instructions/inline asm
-
-   int Transform() {
-       Stitched Message Sched/Round
-    }
-
-#elif defined(HAVE_INTEL_AVX2)
-
-  #define YMM Instructions/inline asm
-
-  int Transform() {
-      More granural Stitched Message Sched/Round
-  }
-
-*/
-
-
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-    #define cpuid(reg, leaf, sub)\
-            __asm__ __volatile__ ("cpuid":\
-             "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
-             "a" (leaf), "c"(sub));
-
-    #define XASM_LINK(f) asm(f)
-#else
-
-    #include 
-    #define cpuid(a,b) __cpuid((int*)a,b)
-
-    #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-#define EAX 0
-#define EBX 1
-#define ECX 2
-#define EDX 3
-
-#define CPUID_AVX1   0x1
-#define CPUID_AVX2   0x2
-#define CPUID_RDRAND 0x4
-#define CPUID_RDSEED 0x8
-#define CPUID_BMI2   0x10   /* MULX, RORX */
-
-#define IS_INTEL_AVX1       (cpuid_flags&CPUID_AVX1)
-#define IS_INTEL_AVX2       (cpuid_flags&CPUID_AVX2)
-#define IS_INTEL_BMI2       (cpuid_flags&CPUID_BMI2)
-#define IS_INTEL_RDRAND     (cpuid_flags&CPUID_RDRAND)
-#define IS_INTEL_RDSEED     (cpuid_flags&CPUID_RDSEED)
-
-static word32 cpuid_check = 0 ;
-static word32 cpuid_flags = 0 ;
-
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
-    int got_intel_cpu=0;
-    unsigned int reg[5];
-
-    reg[4] = '\0' ;
-    cpuid(reg, 0, 0);
-    if(XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
-                XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
-                XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
-        got_intel_cpu = 1;
-    }
-    if (got_intel_cpu) {
-        cpuid(reg, leaf, sub);
-        return((reg[num]>>bit)&0x1) ;
-    }
-    return 0 ;
-}
-
-static int set_cpuid_flags(void) {
-    if(cpuid_check==0) {
-        if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
-        if(cpuid_flag(7, 0, EBX, 5)){  cpuid_flags |= CPUID_AVX2 ; }
-        if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
-        if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;  }
-        if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;  }
-        cpuid_check = 1 ;
-        return 0 ;
-    }
-    return 1 ;
-}
-
-
-/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
-static int Transform(Sha256* sha256);
-
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_AVX1(Sha256 *sha256) ;
-#endif
-#if defined(HAVE_INTEL_AVX2)
-static int Transform_AVX2(Sha256 *sha256) ;
-static int Transform_AVX1_RORX(Sha256 *sha256) ;
-#endif
-
-static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
-
-#define XTRANSFORM(sha256, B)  (*Transform_p)(sha256)
-
-static void set_Transform(void) {
-     if(set_cpuid_flags())return ;
-
-#if defined(HAVE_INTEL_AVX2)
-     if(IS_INTEL_AVX2 && IS_INTEL_BMI2){
-         Transform_p = Transform_AVX1_RORX; return ;
-         Transform_p = Transform_AVX2      ;
-                  /* for avoiding warning,"not used" */
-     }
-#endif
-#if defined(HAVE_INTEL_AVX1)
-     Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ;
-#endif
-     Transform_p = Transform ; return ;
-}
-
-#else
-   #if defined(FREESCALE_MMCAU_SHA)
-      #define XTRANSFORM(sha256, B) Transform(sha256, B)
-   #else
-      #define XTRANSFORM(sha256, B) Transform(sha256)
-   #endif
-#endif
-
-/* Dummy for saving MM_REGs on behalf of Transform */
-#if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1)
-#define  SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
-  "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
-#elif defined(HAVE_INTEL_AVX1)
-#define  SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
-    "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
-    "xmm11","xmm12","xmm13","xmm14","xmm15")
-#else
-#define  SAVE_XMM_YMM
-#endif
-
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define InitSha256   InitSha256_sw
-#define Sha256Update Sha256Update_sw
-#define Sha256Final  Sha256Final_sw
-#endif
-
 #include 
 
 #ifdef NO_INLINE
@@ -289,34 +85,24 @@ static void set_Transform(void) {
     #include 
 #endif
 
-#ifdef FREESCALE_MMCAU_SHA
-    #include "fsl_mmcau.h"
+
+#if defined(USE_INTEL_SPEEDUP)
+    #define HAVE_INTEL_AVX1
+    #define HAVE_INTEL_AVX2
+#endif /* USE_INTEL_SPEEDUP */
+
+#if defined(HAVE_INTEL_AVX2)
+    #define HAVE_INTEL_RORX
 #endif
 
 
-#ifdef FREESCALE_LTC_SHA
-int wc_InitSha256(Sha256* sha256)
-{
-    LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
-    return 0;
-}
-#else
-int wc_InitSha256(Sha256* sha256)
+static int InitSha256(Sha256* sha256)
 {
     int ret = 0;
 
-    if (sha256 == NULL) {
+    if (sha256 == NULL)
         return BAD_FUNC_ARG;
-    }
 
-#ifdef FREESCALE_MMCAU_SHA
-    ret = wolfSSL_CryptHwMutexLock();
-    if(ret != 0) {
-        return ret;
-    }
-    MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
-    wolfSSL_CryptHwMutexUnLock();
-#else
     sha256->digest[0] = 0x6A09E667L;
     sha256->digest[1] = 0xBB67AE85L;
     sha256->digest[2] = 0x3C6EF372L;
@@ -325,302 +111,582 @@ int wc_InitSha256(Sha256* sha256)
     sha256->digest[5] = 0x9B05688CL;
     sha256->digest[6] = 0x1F83D9ABL;
     sha256->digest[7] = 0x5BE0CD19L;
-#endif
 
     sha256->buffLen = 0;
     sha256->loLen   = 0;
     sha256->hiLen   = 0;
 
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ; /* choose best Transform function under this runtime environment */
-#endif
-
     return ret;
 }
-#endif /* FREESCALE_LTC_SHA */
 
-#if !defined(FREESCALE_LTC_SHA)
-#if !defined(FREESCALE_MMCAU_SHA)
-static const ALIGN32 word32 K[64] = {
-    0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
-    0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
-    0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
-    0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
-    0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
-    0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
-    0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
-    0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
-    0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
-    0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
-    0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
-    0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
-    0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
-};
 
-#endif
+/* Hardware Acceleration */
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
 
-#if defined(FREESCALE_MMCAU_SHA)
+    /* in case intel instructions aren't available, plus we need the K[] global */
+    #define NEED_SOFT_SHA256
 
-static int Transform(Sha256* sha256, byte* buf)
-{
-    int ret = wolfSSL_CryptHwMutexLock();
-    if(ret == 0) {
-        MMCAU_SHA256_HashN(buf, 1, (uint32_t*)sha256->digest);
+    /*****
+    Intel AVX1/AVX2 Macro Control Structure
+
+    #define HAVE_INTEL_AVX1
+    #define HAVE_INTEL_AVX2
+
+    #define HAVE_INTEL_RORX
+
+
+    int InitSha256(Sha256* sha256) {
+         Save/Recover XMM, YMM
+         ...
+    }
+
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+      Transform(); Function prototype
+    #else
+      Transform() {   }
+      int Sha256Final() {
+         Save/Recover XMM, YMM
+         ...
+      }
+    #endif
+
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+        #if defined(HAVE_INTEL_RORX
+             #define RND with rorx instuction
+        #else
+            #define RND
+        #endif
+    #endif
+
+    #if defined(HAVE_INTEL_AVX1)
+
+       #define XMM Instructions/inline asm
+
+       int Transform() {
+           Stitched Message Sched/Round
+        }
+
+    #elif defined(HAVE_INTEL_AVX2)
+
+      #define YMM Instructions/inline asm
+
+      int Transform() {
+          More granural Stitched Message Sched/Round
+      }
+
+    */
+
+    /* Each platform needs to query info type 1 from cpuid to see if aesni is
+     * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+     */
+
+    #ifndef _MSC_VER
+        #define cpuid(reg, leaf, sub)\
+                __asm__ __volatile__ ("cpuid":\
+                 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+                 "a" (leaf), "c"(sub));
+
+        #define XASM_LINK(f) asm(f)
+    #else
+        #include 
+        #define cpuid(a,b) __cpuid((int*)a,b)
+
+        #define XASM_LINK(f)
+    #endif /* _MSC_VER */
+
+    #define EAX 0
+    #define EBX 1
+    #define ECX 2
+    #define EDX 3
+
+    #define CPUID_AVX1   0x1
+    #define CPUID_AVX2   0x2
+    #define CPUID_RDRAND 0x4
+    #define CPUID_RDSEED 0x8
+    #define CPUID_BMI2   0x10   /* MULX, RORX */
+
+    #define IS_INTEL_AVX1       (cpuid_flags & CPUID_AVX1)
+    #define IS_INTEL_AVX2       (cpuid_flags & CPUID_AVX2)
+    #define IS_INTEL_BMI2       (cpuid_flags & CPUID_BMI2)
+    #define IS_INTEL_RDRAND     (cpuid_flags & CPUID_RDRAND)
+    #define IS_INTEL_RDSEED     (cpuid_flags & CPUID_RDSEED)
+
+    static word32 cpuid_check = 0;
+    static word32 cpuid_flags = 0;
+
+    static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
+        int got_intel_cpu=0;
+        unsigned int reg[5];
+
+        reg[4] = '\0';
+        cpuid(reg, 0, 0);
+        if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+            XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+            XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
+            got_intel_cpu = 1;
+        }
+        if (got_intel_cpu) {
+            cpuid(reg, leaf, sub);
+            return ((reg[num] >> bit) & 0x1);
+        }
+        return 0;
+    }
+
+    static int set_cpuid_flags(void) {
+        if (cpuid_check==0) {
+            if (cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1; }
+            if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2; }
+            if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2; }
+            if (cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND; }
+            if (cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED; }
+            cpuid_check = 1;
+            return 0;
+        }
+        return 1;
+    }
+
+    /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
+    static int Transform(Sha256* sha256);
+    #if defined(HAVE_INTEL_AVX1)
+        static int Transform_AVX1(Sha256 *sha256);
+    #endif
+    #if defined(HAVE_INTEL_AVX2)
+        static int Transform_AVX2(Sha256 *sha256);
+        static int Transform_AVX1_RORX(Sha256 *sha256);
+    #endif
+    static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
+    #define XTRANSFORM(sha256, B)  (*Transform_p)(sha256)
+
+    static void set_Transform(void) {
+         if (set_cpuid_flags()) return;
+
+    #if defined(HAVE_INTEL_AVX2)
+         if (IS_INTEL_AVX2 && IS_INTEL_BMI2) {
+             Transform_p = Transform_AVX1_RORX; return;
+             Transform_p = Transform_AVX2;
+                      /* for avoiding warning,"not used" */
+         }
+    #endif
+    #if defined(HAVE_INTEL_AVX1)
+         Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform); return;
+    #endif
+         Transform_p = Transform; return;
+    }
+
+    /* Dummy for saving MM_REGs on behalf of Transform */
+    #if defined(HAVE_INTEL_AVX2) && !defined(HAVE_INTEL_AVX1)
+        #define SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
+          "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
+    #elif defined(HAVE_INTEL_AVX1)
+        #define SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
+            "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
+            "xmm11","xmm12","xmm13","xmm14","xmm15")
+    #endif
+
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha256->heap = heap;
+
+        ret = InitSha256(sha256);
+        if (ret != 0)
+            return ret;
+
+        /* choose best Transform function under this runtime environment */
+        set_Transform();
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+
+#elif defined(FREESCALE_LTC_SHA)
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+
+        LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
+
+        return 0;
+    }
+
+#elif defined(FREESCALE_MMCAU_SHA)
+    #include "fsl_mmcau.h"
+    #define XTRANSFORM(sha256, B) Transform(sha256, B)
+
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+
+        (void)heap;
+        (void)devId;
+
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+        MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
         wolfSSL_CryptHwMutexUnLock();
+
+        sha256->buffLen = 0;
+        sha256->loLen   = 0;
+        sha256->hiLen   = 0;
+
+        return ret;
     }
-    return ret;
-}
 
-#endif /* FREESCALE_MMCAU_SHA */
+    static int Transform(Sha256* sha256, byte* buf)
+    {
+        int ret = wolfSSL_CryptHwMutexLock();
+        if (ret == 0) {
+            MMCAU_SHA256_HashN(buf, 1, sha256->digest);
+            wolfSSL_CryptHwMutexUnLock();
+        }
+        return ret;
+    }
 
-#define Ch(x,y,z)       ((z) ^ ((x) & ((y) ^ (z))))
-#define Maj(x,y,z)      ((((x) | (y)) & (z)) | ((x) & (y)))
-#define R(x, n)         (((x)&0xFFFFFFFFU)>>(n))
+#elif defined(WOLFSSL_PIC32MZ_HASH)
+    #define NEED_SOFT_SHA256
 
-#define S(x, n)         rotrFixed(x, n)
-#define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
-#define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
-#define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
-#define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+    #define wc_InitSha256   wc_InitSha256_sw
+    #define wc_Sha256Update wc_Sha256Update_sw
+    #define wc_Sha256Final  wc_Sha256Final_sw
 
-#define RND(a,b,c,d,e,f,g,h,i) \
-     t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
-     t1 = Sigma0((a)) + Maj((a), (b), (c)); \
-     (d) += t0; \
-     (h)  = t0 + t1;
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
 
-#if !defined(FREESCALE_MMCAU_SHA)
-static int Transform(Sha256* sha256)
-{
-    word32 S[8], t0, t1;
-    int i;
+        sha256->heap = heap;
 
-#ifdef WOLFSSL_SMALL_STACK
-    word32* W;
+        return InitSha256(sha256);
+    }
 
-    W = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (W == NULL)
-        return MEMORY_E;
 #else
-    word32 W[64];
+    #define NEED_SOFT_SHA256
+
+    #define XTRANSFORM(sha256, B) Transform(sha256)
+
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha256->heap = heap;
+
+        ret = InitSha256(sha256);
+        if (ret != 0)
+            return ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+#endif /* End Hardware Acceleration */
+
+#ifndef SAVE_XMM_YMM
+    #define SAVE_XMM_YMM
 #endif
 
-    /* Copy context->state[] to working vars */
-    for (i = 0; i < 8; i++)
-        S[i] = sha256->digest[i];
+#ifdef NEED_SOFT_SHA256
 
-    for (i = 0; i < 16; i++)
-        W[i] = sha256->buffer[i];
+    static const ALIGN32 word32 K[64] = {
+        0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
+        0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
+        0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
+        0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
+        0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
+        0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
+        0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
+        0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
+        0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
+        0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
+        0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
+        0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
+        0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
+    };
 
-    for (i = 16; i < 64; i++)
-        W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
+    #define Ch(x,y,z)       ((z) ^ ((x) & ((y) ^ (z))))
+    #define Maj(x,y,z)      ((((x) | (y)) & (z)) | ((x) & (y)))
+    #define R(x, n)         (((x) & 0xFFFFFFFFU) >> (n))
 
-    for (i = 0; i < 64; i += 8) {
-        RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
-        RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
-        RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
-        RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
-        RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
-        RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
-        RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
-        RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
+    #define S(x, n)         rotrFixed(x, n)
+    #define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
+    #define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
+    #define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
+    #define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+
+    #define RND(a,b,c,d,e,f,g,h,i) \
+         t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
+         t1 = Sigma0((a)) + Maj((a), (b), (c)); \
+         (d) += t0; \
+         (h)  = t0 + t1;
+
+    static int Transform(Sha256* sha256)
+    {
+        word32 S[8], t0, t1;
+        int i;
+
+    #ifdef WOLFSSL_SMALL_STACK
+        word32* W;
+
+        W = (word32*)XMALLOC(sizeof(word32) * SHA256_BLOCK_SIZE, NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (W == NULL)
+            return MEMORY_E;
+    #else
+        word32 W[SHA256_BLOCK_SIZE];
+    #endif
+
+        /* Copy context->state[] to working vars */
+        for (i = 0; i < 8; i++)
+            S[i] = sha256->digest[i];
+
+        for (i = 0; i < 16; i++)
+            W[i] = sha256->buffer[i];
+
+        for (i = 16; i < SHA256_BLOCK_SIZE; i++)
+            W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
+
+        for (i = 0; i < SHA256_BLOCK_SIZE; i += 8) {
+            RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
+            RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
+            RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
+            RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
+            RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
+            RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
+            RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
+            RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
+        }
+
+        /* Add the working vars back into digest state[] */
+        for (i = 0; i < 8; i++) {
+            sha256->digest[i] += S[i];
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return 0;
     }
-
-    /* Add the working vars back into digest state[] */
-    for (i = 0; i < 8; i++) {
-        sha256->digest[i] += S[i];
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
+/* End wc_ software implementation */
 
-    return 0;
-}
 
-#endif /* #if !defined(FREESCALE_MMCAU_SHA) */
+#ifdef XTRANSFORM
 
-static INLINE void AddLength(Sha256* sha256, word32 len)
-{
-    word32 tmp = sha256->loLen;
-    if ( (sha256->loLen += len) < tmp)
-        sha256->hiLen++;                       /* carry low to high */
-}
-#endif /* FREESCALE_LTC_SHA */
+    static INLINE void AddLength(Sha256* sha256, word32 len)
+    {
+        word32 tmp = sha256->loLen;
+        if ( (sha256->loLen += len) < tmp)
+            sha256->hiLen++;                       /* carry low to high */
+    }
 
-#ifdef FREESCALE_LTC_SHA
-int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
-    LTC_HASH_Update(&sha256->ctx, data, len);
-    return 0;
-}
-#else
-static INLINE int Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
-    byte* local;
+    static INLINE int Sha256Update(Sha256* sha256, const byte* data, word32 len)
+    {
+        int ret = 0;
+        byte* local;
 
-    /* do block size increments */
-    local = (byte*)sha256->buffer;
+        if (sha256 == NULL || (data == NULL && len > 0)) {
+            return BAD_FUNC_ARG;
+        }
 
-    SAVE_XMM_YMM ; /* for Intel AVX */
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    while (len) {
-        word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
-        XMEMCPY(&local[sha256->buffLen], data, add);
+        /* do block size increments */
+        local = (byte*)sha256->buffer;
 
-        sha256->buffLen += add;
-        data            += add;
-        len             -= add;
+        /* check that internal buffLen is valid */
+        if (sha256->buffLen > SHA256_BLOCK_SIZE)
+            return BUFFER_E;
 
-        if (sha256->buffLen == SHA256_BLOCK_SIZE) {
-            int ret;
+        SAVE_XMM_YMM; /* for Intel AVX */
 
-            #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-                #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-                if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-                #endif
-                ByteReverseWords(sha256->buffer, sha256->buffer,
-                                 SHA256_BLOCK_SIZE);
+        while (len) {
+            word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
+            XMEMCPY(&local[sha256->buffLen], data, add);
+
+            sha256->buffLen += add;
+            data            += add;
+            len             -= add;
+
+            if (sha256->buffLen == SHA256_BLOCK_SIZE) {
+        #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+                if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
             #endif
+                {
+                    ByteReverseWords(sha256->buffer, sha256->buffer,
+                                                             SHA256_BLOCK_SIZE);
+                }
+        #endif
+                ret = XTRANSFORM(sha256, local);
+                if (ret != 0) {
+                    break;
+                }
+
+                AddLength(sha256, SHA256_BLOCK_SIZE);
+                sha256->buffLen = 0;
+            }
+        }
+
+        return ret;
+    }
+
+    int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
+    {
+        return Sha256Update(sha256, data, len);
+    }
+
+    static INLINE int Sha256Final(Sha256* sha256)
+    {
+        int ret;
+        byte* local = (byte*)sha256->buffer;
+
+        SAVE_XMM_YMM; /* for Intel AVX */
+
+        AddLength(sha256, sha256->buffLen);  /* before adding pads */
+        local[sha256->buffLen++] = 0x80;     /* add 1 */
+
+        /* pad with zeros */
+        if (sha256->buffLen > SHA256_PAD_SIZE) {
+            XMEMSET(&local[sha256->buffLen], 0,
+                SHA256_BLOCK_SIZE - sha256->buffLen);
+            sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
+
+    #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+            if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+        #endif
+            {
+                ByteReverseWords(sha256->buffer, sha256->buffer,
+                    SHA256_BLOCK_SIZE);
+            }
+    #endif
+
             ret = XTRANSFORM(sha256, local);
             if (ret != 0)
                 return ret;
 
-            AddLength(sha256, SHA256_BLOCK_SIZE);
             sha256->buffLen = 0;
         }
-    }
+        XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
 
-    return 0;
-}
+        /* put lengths in bits */
+        sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
+                                                         (sha256->hiLen << 3);
+        sha256->loLen = sha256->loLen << 3;
 
-int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
-    if (sha256 == NULL || (data == NULL && len > 0)) {
-        return BAD_FUNC_ARG;
-    }
-
-    return Sha256Update(sha256, data, len);
-}
-
-#endif /* FREESCALE_LTC_SHA */
-
-#ifdef FREESCALE_LTC_SHA
-int wc_Sha256Final(Sha256* sha256, byte* hash)
-{
-    uint32_t hashlen = SHA256_DIGEST_SIZE;
-    LTC_HASH_Finish(&sha256->ctx, hash, &hashlen);
-    return wc_InitSha256(sha256);  /* reset state */
-}
-#else
-static INLINE int Sha256Final(Sha256* sha256)
-{
-    byte* local = (byte*)sha256->buffer;
-    int ret;
-
-    SAVE_XMM_YMM ; /* for Intel AVX */
-
-    AddLength(sha256, sha256->buffLen);  /* before adding pads */
-
-    local[sha256->buffLen++] = 0x80;     /* add 1 */
-
-    /* pad with zeros */
-    if (sha256->buffLen > SHA256_PAD_SIZE) {
-        XMEMSET(&local[sha256->buffLen], 0, SHA256_BLOCK_SIZE - sha256->buffLen);
-        sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
-
-        #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-            if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-            #endif
-            ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
-        #endif
-
-        ret = XTRANSFORM(sha256, local);
-        if (ret != 0)
-            return ret;
-
-        sha256->buffLen = 0;
-    }
-    XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
-
-    /* put lengths in bits */
-    sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) +
-                 (sha256->hiLen << 3);
-    sha256->loLen = sha256->loLen << 3;
-
-    /* store lengths */
+        /* store lengths */
     #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
         #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+            if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
         #endif
-            ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
+            {
+                ByteReverseWords(sha256->buffer, sha256->buffer,
+                    SHA256_BLOCK_SIZE);
+            }
     #endif
-    /* ! length ordering dependent on digest endian type ! */
-    XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
-    XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
-            sizeof(word32));
+        /* ! length ordering dependent on digest endian type ! */
+        XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
+        XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
+                sizeof(word32));
 
-    #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \
+            defined(HAVE_INTEL_AVX2)
         /* Kinetis requires only these bytes reversed */
         #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
+            if (IS_INTEL_AVX1 || IS_INTEL_AVX2)
         #endif
-        ByteReverseWords(&sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
-                         &sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
-                         2 * sizeof(word32));
+            {
+                ByteReverseWords(
+                    &sha256->buffer[SHA256_PAD_SIZE / sizeof(word32)],
+                    &sha256->buffer[SHA256_PAD_SIZE / sizeof(word32)],
+                    2 * sizeof(word32));
+            }
     #endif
 
-    return XTRANSFORM(sha256, local);
-}
-
-int wc_Sha256Final(Sha256* sha256, byte* hash)
-{
-    int ret;
-
-    if (sha256 == NULL || hash == NULL) {
-        return BAD_FUNC_ARG;
+        return XTRANSFORM(sha256, local);
     }
 
-    ret = Sha256Final(sha256);
-    if (ret != 0)
-        return ret;
+    int wc_Sha256Final(Sha256* sha256, byte* hash)
+    {
+        int ret;
+
+        if (sha256 == NULL || hash == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha256(&sha256->asyncDev, hash, NULL,
+                                            SHA256_DIGEST_SIZE);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        ret = Sha256Final(sha256);
+        if (ret != 0)
+            return ret;
 
     #if defined(LITTLE_ENDIAN_ORDER)
         ByteReverseWords(sha256->digest, sha256->digest, SHA256_DIGEST_SIZE);
     #endif
-    XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
+        XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
 
-    return wc_InitSha256(sha256);  /* reset state */
-}
-#endif /* FREESCALE_LTC_SHA */
+        return InitSha256(sha256);  /* reset state */
+    }
 
+#endif /* XTRANSFORM */
 
 
 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
 
 #define _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-    { word32 d ;\
-    d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs) ;\
+{ word32 d;\
+    d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs);\
+    d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs);\
+    d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs);\
+    d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs);\
+    d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs);\
+    d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs);\
+    d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs);\
+    d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs);\
 }
 
 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-    { word32 d ; \
-    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ; sha256->digest[0] += d;\
-    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ; sha256->digest[1] += d;\
-    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ; sha256->digest[2] += d;\
-    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ; sha256->digest[3] += d;\
-    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ; sha256->digest[4] += d;\
-    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ; sha256->digest[5] += d;\
-    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ; sha256->digest[6] += d;\
-    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ; sha256->digest[7] += d;\
+{ word32 d; \
+    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs); sha256->digest[0] += d;\
+    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs); sha256->digest[1] += d;\
+    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs); sha256->digest[2] += d;\
+    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs); sha256->digest[3] += d;\
+    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs); sha256->digest[4] += d;\
+    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs); sha256->digest[5] += d;\
+    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs); sha256->digest[6] += d;\
+    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs); sha256->digest[7] += d;\
 }
 
 
@@ -631,8 +697,6 @@ int wc_Sha256Final(Sha256* sha256, byte* hash)
     _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
 
 
-
-
 #define S_0 %r15d
 #define S_1 %r10d
 #define S_2 %r11d
@@ -688,8 +752,7 @@ __asm__ volatile("addl  "#h", "#d"\n\t");  /* d += h + w_k + Sigma1(e) + Ch(e,f,
 __asm__ volatile("addl  %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
 __asm__ volatile("addl  %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs); \
 __asm__ volatile("movl  %r8d, "#h"\n\t");
-
-#endif
+#endif /* HAVE_INTEL_RORX */
 
 #define RND_STEP_1(a,b,c,d,e,f,g,h,i)\
 __asm__ volatile("movl  %"#e", %%edx\n\t":::"%edx",SSE_REGs);\
@@ -820,7 +883,7 @@ __asm__ volatile("movl  %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
 #define FOR(cnt, init, max, inc, loop)  \
     __asm__ volatile("movl $"#init", %0\n\t"#loop":"::"m"(cnt):)
 #define END(cnt, init, max, inc, loop)  \
-    __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::) ;
+    __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::);
 
 #endif  /* defined(HAVE_INTEL_AVX1) ||  defined(HAVE_INTEL_AVX2) */
 
@@ -839,171 +902,171 @@ __asm__ volatile("movl  %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
 #define MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, SHUF_00BA, SHUF_DC00,\
      a,b,c,d,e,f,g,h,_i)\
             RND_STEP_1(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP0, X3, X2, 4) ;\
+    VPALIGNR (XTMP0, X3, X2, 4);\
             RND_STEP_2(a,b,c,d,e,f,g,h,_i);\
-    VPADDD   (XTMP0, XTMP0, X0) ;\
+    VPADDD   (XTMP0, XTMP0, X0);\
             RND_STEP_3(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP1, X1, X0, 4) ;   /* XTMP1 = W[-15] */\
+    VPALIGNR (XTMP1, X1, X0, 4);   /* XTMP1 = W[-15] */\
             RND_STEP_4(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1, 7) ;\
+    VPSRLD   (XTMP2, XTMP1, 7);\
             RND_STEP_5(a,b,c,d,e,f,g,h,_i);\
-    VPSLLD   (XTMP3, XTMP1, 25) ; /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
+    VPSLLD   (XTMP3, XTMP1, 25); /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
             RND_STEP_6(a,b,c,d,e,f,g,h,_i);\
-    VPOR     (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 */\
+    VPOR     (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 */\
             RND_STEP_7(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1,18) ;\
+    VPSRLD   (XTMP2, XTMP1,18);\
             RND_STEP_8(a,b,c,d,e,f,g,h,_i);\
 \
             RND_STEP_1(h,a,b,c,d,e,f,g,_i+1);\
-    VPSRLD   (XTMP4, XTMP1, 3)      ;  /* XTMP4 = W[-15] >> 3 */\
+    VPSRLD   (XTMP4, XTMP1, 3);  /* XTMP4 = W[-15] >> 3 */\
             RND_STEP_2(h,a,b,c,d,e,f,g,_i+1);\
-    VPSLLD   (XTMP1, XTMP1, 14) ; /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
+    VPSLLD   (XTMP1, XTMP1, 14); /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
             RND_STEP_3(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP1)  ;\
+    VPXOR    (XTMP3, XTMP3, XTMP1);\
             RND_STEP_4(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
+    VPXOR    (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
             RND_STEP_5(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP1, XTMP3, XTMP4)  ;  /* XTMP1 = s0 */\
+    VPXOR    (XTMP1, XTMP3, XTMP4);  /* XTMP1 = s0 */\
             RND_STEP_6(h,a,b,c,d,e,f,g,_i+1);\
-    VPSHUFD(XTMP2, X3, 0b11111010)  ;  /* XTMP2 = W[-2] {BBAA}*/\
+    VPSHUFD(XTMP2, X3, 0b11111010);  /* XTMP2 = W[-2] {BBAA}*/\
             RND_STEP_7(h,a,b,c,d,e,f,g,_i+1);\
-    VPADDD   (XTMP0, XTMP0, XTMP1)  ;  /* XTMP0 = W[-16] + W[-7] + s0 */\
+    VPADDD   (XTMP0, XTMP0, XTMP1);  /* XTMP0 = W[-16] + W[-7] + s0 */\
             RND_STEP_8(h,a,b,c,d,e,f,g,_i+1);\
 \
             RND_STEP_1(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLD   (XTMP4, XTMP2, 10) ;      /* XTMP4 = W[-2] >> 10 {BBAA} */\
+    VPSRLD   (XTMP4, XTMP2, 10);      /* XTMP4 = W[-2] >> 10 {BBAA} */\
             RND_STEP_2(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP3, XTMP2, 19) ;      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
+    VPSRLQ   (XTMP3, XTMP2, 19);      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
             RND_STEP_3(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
             RND_STEP_4(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_5(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP4, XTMP4, XTMP2) ;   /* XTMP4 = s1 {xBxA} */\
+    VPXOR    (XTMP4, XTMP4, XTMP2);   /* XTMP4 = s1 {xBxA} */\
             RND_STEP_6(g,h,a,b,c,d,e,f,_i+2);\
-    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA)  ;  /* XTMP4 = s1 {00BA} */\
+    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA);  /* XTMP4 = s1 {00BA} */\
             RND_STEP_7(g,h,a,b,c,d,e,f,_i+2);\
-    VPADDD   (XTMP0, XTMP0, XTMP4)  ;  /* XTMP0 = {..., ..., W[1], W[0]} */\
+    VPADDD   (XTMP0, XTMP0, XTMP4);  /* XTMP0 = {..., ..., W[1], W[0]} */\
             RND_STEP_8(g,h,a,b,c,d,e,f,_i+2);\
 \
             RND_STEP_1(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFD  (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
+    VPSHUFD  (XTMP2, XTMP0, 0b01010000); /* XTMP2 = W[-2] {DDCC} */\
             RND_STEP_2(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLD   (XTMP5, XTMP2, 10);       /* XTMP5 = W[-2] >> 10 {DDCC} */\
             RND_STEP_3(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLQ   (XTMP3, XTMP2, 19);       /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
             RND_STEP_4(f,g,h,a,b,c,d,e,_i+3);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
             RND_STEP_5(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_6(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP5, XTMP5, XTMP2) ;   /* XTMP5 = s1 {xDxC} */\
+    VPXOR    (XTMP5, XTMP5, XTMP2);   /* XTMP5 = s1 {xDxC} */\
             RND_STEP_7(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
+    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00); /* XTMP5 = s1 {DC00} */\
             RND_STEP_8(f,g,h,a,b,c,d,e,_i+3);\
-    VPADDD   (X0, XTMP5, XTMP0) ;      /* X0 = {W[3], W[2], W[1], W[0]} */\
+    VPADDD   (X0, XTMP5, XTMP0);      /* X0 = {W[3], W[2], W[1], W[0]} */\
 
 #if defined(HAVE_INTEL_RORX)
 
 #define MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, \
                           XFER, SHUF_00BA, SHUF_DC00,a,b,c,d,e,f,g,h,_i)\
             RND_STEP_RORX_1(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP0, X3, X2, 4) ;\
+    VPALIGNR (XTMP0, X3, X2, 4);\
             RND_STEP_RORX_2(a,b,c,d,e,f,g,h,_i);\
-    VPADDD   (XTMP0, XTMP0, X0) ;\
+    VPADDD   (XTMP0, XTMP0, X0);\
             RND_STEP_RORX_3(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP1, X1, X0, 4) ;   /* XTMP1 = W[-15] */\
+    VPALIGNR (XTMP1, X1, X0, 4);   /* XTMP1 = W[-15] */\
             RND_STEP_RORX_4(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1, 7) ;\
+    VPSRLD   (XTMP2, XTMP1, 7);\
             RND_STEP_RORX_5(a,b,c,d,e,f,g,h,_i);\
-    VPSLLD   (XTMP3, XTMP1, 25) ; /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
+    VPSLLD   (XTMP3, XTMP1, 25); /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
             RND_STEP_RORX_6(a,b,c,d,e,f,g,h,_i);\
-    VPOR     (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 */\
+    VPOR     (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 */\
             RND_STEP_RORX_7(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1,18) ;\
+    VPSRLD   (XTMP2, XTMP1,18);\
             RND_STEP_RORX_8(a,b,c,d,e,f,g,h,_i);\
 \
             RND_STEP_RORX_1(h,a,b,c,d,e,f,g,_i+1);\
-    VPSRLD   (XTMP4, XTMP1, 3)      ;  /* XTMP4 = W[-15] >> 3 */\
+    VPSRLD   (XTMP4, XTMP1, 3);  /* XTMP4 = W[-15] >> 3 */\
             RND_STEP_RORX_2(h,a,b,c,d,e,f,g,_i+1);\
-    VPSLLD   (XTMP1, XTMP1, 14) ; /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
+    VPSLLD   (XTMP1, XTMP1, 14); /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
             RND_STEP_RORX_3(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP1)  ;\
+    VPXOR    (XTMP3, XTMP3, XTMP1);\
             RND_STEP_RORX_4(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
+    VPXOR    (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
             RND_STEP_RORX_5(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP1, XTMP3, XTMP4)  ;  /* XTMP1 = s0 */\
+    VPXOR    (XTMP1, XTMP3, XTMP4);  /* XTMP1 = s0 */\
             RND_STEP_RORX_6(h,a,b,c,d,e,f,g,_i+1);\
-    VPSHUFD(XTMP2, X3, 0b11111010)  ;  /* XTMP2 = W[-2] {BBAA}*/\
+    VPSHUFD(XTMP2, X3, 0b11111010);  /* XTMP2 = W[-2] {BBAA}*/\
             RND_STEP_RORX_7(h,a,b,c,d,e,f,g,_i+1);\
-    VPADDD   (XTMP0, XTMP0, XTMP1)  ;  /* XTMP0 = W[-16] + W[-7] + s0 */\
+    VPADDD   (XTMP0, XTMP0, XTMP1);  /* XTMP0 = W[-16] + W[-7] + s0 */\
             RND_STEP_RORX_8(h,a,b,c,d,e,f,g,_i+1);\
 \
             RND_STEP_RORX_1(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLD   (XTMP4, XTMP2, 10) ;      /* XTMP4 = W[-2] >> 10 {BBAA} */\
+    VPSRLD   (XTMP4, XTMP2, 10);      /* XTMP4 = W[-2] >> 10 {BBAA} */\
             RND_STEP_RORX_2(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP3, XTMP2, 19) ;      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
+    VPSRLQ   (XTMP3, XTMP2, 19);      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
             RND_STEP_RORX_3(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
             RND_STEP_RORX_4(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_RORX_5(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP4, XTMP4, XTMP2) ;   /* XTMP4 = s1 {xBxA} */\
+    VPXOR    (XTMP4, XTMP4, XTMP2);   /* XTMP4 = s1 {xBxA} */\
             RND_STEP_RORX_6(g,h,a,b,c,d,e,f,_i+2);\
-    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA)  ;  /* XTMP4 = s1 {00BA} */\
+    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA);  /* XTMP4 = s1 {00BA} */\
             RND_STEP_RORX_7(g,h,a,b,c,d,e,f,_i+2);\
-    VPADDD   (XTMP0, XTMP0, XTMP4)  ;  /* XTMP0 = {..., ..., W[1], W[0]} */\
+    VPADDD   (XTMP0, XTMP0, XTMP4);  /* XTMP0 = {..., ..., W[1], W[0]} */\
             RND_STEP_RORX_8(g,h,a,b,c,d,e,f,_i+2);\
 \
             RND_STEP_RORX_1(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFD  (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
+    VPSHUFD  (XTMP2, XTMP0, 0b01010000); /* XTMP2 = W[-2] {DDCC} */\
             RND_STEP_RORX_2(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLD   (XTMP5, XTMP2, 10);       /* XTMP5 = W[-2] >> 10 {DDCC} */\
             RND_STEP_RORX_3(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLQ   (XTMP3, XTMP2, 19);       /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
             RND_STEP_RORX_4(f,g,h,a,b,c,d,e,_i+3);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
             RND_STEP_RORX_5(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_RORX_6(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP5, XTMP5, XTMP2) ;   /* XTMP5 = s1 {xDxC} */\
+    VPXOR    (XTMP5, XTMP5, XTMP2);   /* XTMP5 = s1 {xDxC} */\
             RND_STEP_RORX_7(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
+    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00); /* XTMP5 = s1 {DC00} */\
             RND_STEP_RORX_8(f,g,h,a,b,c,d,e,_i+3);\
-    VPADDD   (X0, XTMP5, XTMP0) ;      /* X0 = {W[3], W[2], W[1], W[0]} */\
+    VPADDD   (X0, XTMP5, XTMP0);      /* X0 = {W[3], W[2], W[1], W[0]} */\
 
-#endif
+#endif /* HAVE_INTEL_RORX */
 
 
 #define W_K_from_buff\
          __asm__ volatile("vmovdqu %0, %%xmm4\n\t"\
                           "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t"\
-                          :: "m"(sha256->buffer[0]):"%xmm4") ;\
+                          :: "m"(sha256->buffer[0]):"%xmm4");\
          __asm__ volatile("vmovdqu %0, %%xmm5\n\t"\
                           "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t"\
-                          ::"m"(sha256->buffer[4]):"%xmm5") ;\
+                          ::"m"(sha256->buffer[4]):"%xmm5");\
          __asm__ volatile("vmovdqu %0, %%xmm6\n\t"\
                           "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t"\
-                          ::"m"(sha256->buffer[8]):"%xmm6") ;\
+                          ::"m"(sha256->buffer[8]):"%xmm6");\
          __asm__ volatile("vmovdqu %0, %%xmm7\n\t"\
                           "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t"\
-                          ::"m"(sha256->buffer[12]):"%xmm7") ;\
+                          ::"m"(sha256->buffer[12]):"%xmm7");\
 
 #define _SET_W_K_XFER(reg, i)\
-    __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs) ;\
-    __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs) ;
+    __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs);\
+    __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs);
 
 #define SET_W_K_XFER(reg, i) _SET_W_K_XFER(reg, i)
 
-static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF } ; /* shuffle xBxA -> 00BA */
-static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 } ; /* shuffle xDxC -> DC00 */
-static const ALIGN32 word64 mBYTE_FLIP_MASK[] =  { 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
+static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
+static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
+static const ALIGN32 word64 mBYTE_FLIP_MASK[] =  { 0x0405060700010203, 0x0c0d0e0f08090a0b };
 
 
 #define _Init_Masks(mask1, mask2, mask3)\
-__asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0])) ;\
-__asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0])) ;\
-__asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0])) ;
+__asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0]));\
+__asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0]));\
+__asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0]));
 
 #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)\
     _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
@@ -1031,77 +1094,77 @@ __asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0])) ;
 
 static int Transform_AVX1(Sha256* sha256)
 {
-    ALIGN32 word32 W_K[64] ;  /* temp for W+K */
+    ALIGN32 word32 W_K[64];  /* temp for W+K */
 
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
-    W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
+    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00);
+    W_K_from_buff; /* X0, X1, X2, X3 = W[0..15]; */
 
-    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
+    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
+
+    SET_W_K_XFER(X0, 0);
 
-    SET_W_K_XFER(X0, 0) ;
     MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
-    SET_W_K_XFER(X1, 4) ;
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
+    SET_W_K_XFER(X1, 4);
     MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
-    SET_W_K_XFER(X2, 8) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4);
+    SET_W_K_XFER(X2, 8);
     MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-    SET_W_K_XFER(X3, 12) ;
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    SET_W_K_XFER(X3, 12);
     MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
-    SET_W_K_XFER(X0, 16) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12);
+    SET_W_K_XFER(X0, 16);
     MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-    SET_W_K_XFER(X1, 20) ;
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    SET_W_K_XFER(X1, 20);
     MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
-    SET_W_K_XFER(X2, 24) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20);
+    SET_W_K_XFER(X2, 24);
     MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-    SET_W_K_XFER(X3, 28) ;
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    SET_W_K_XFER(X3, 28);
     MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
-    SET_W_K_XFER(X0, 32) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28);
+    SET_W_K_XFER(X0, 32);
     MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-    SET_W_K_XFER(X1, 36) ;
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    SET_W_K_XFER(X1, 36);
     MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
-    SET_W_K_XFER(X2, 40) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36);
+    SET_W_K_XFER(X2, 40);
     MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-    SET_W_K_XFER(X3, 44) ;
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    SET_W_K_XFER(X3, 44);
     MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44);
 
-    SET_W_K_XFER(X0, 48) ;
-    SET_W_K_XFER(X1, 52) ;
-    SET_W_K_XFER(X2, 56) ;
-    SET_W_K_XFER(X3, 60) ;
+    SET_W_K_XFER(X0, 48);
+    SET_W_K_XFER(X1, 52);
+    SET_W_K_XFER(X2, 56);
+    SET_W_K_XFER(X3, 60);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-
-    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
 
+    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
     return 0;
 }
@@ -1109,122 +1172,120 @@ static int Transform_AVX1(Sha256* sha256)
 #if defined(HAVE_INTEL_RORX)
 static int Transform_AVX1_RORX(Sha256* sha256)
 {
-    ALIGN32 word32 W_K[64] ;  /* temp for W+K */
+    ALIGN32 word32 W_K[64];  /* temp for W+K */
 
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
-    W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
+    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00);
+    W_K_from_buff; /* X0, X1, X2, X3 = W[0..15]; */
 
-    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-    SET_W_K_XFER(X0, 0) ;
+    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
+    SET_W_K_XFER(X0, 0);
     MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
-    SET_W_K_XFER(X1, 4) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
+    SET_W_K_XFER(X1, 4);
     MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
-    SET_W_K_XFER(X2, 8) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4);
+    SET_W_K_XFER(X2, 8);
     MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-    SET_W_K_XFER(X3, 12) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    SET_W_K_XFER(X3, 12);
     MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
-    SET_W_K_XFER(X0, 16) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12);
+    SET_W_K_XFER(X0, 16);
     MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-    SET_W_K_XFER(X1, 20) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    SET_W_K_XFER(X1, 20);
     MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
-    SET_W_K_XFER(X2, 24) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20);
+    SET_W_K_XFER(X2, 24);
     MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-    SET_W_K_XFER(X3, 28) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    SET_W_K_XFER(X3, 28);
     MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
-    SET_W_K_XFER(X0, 32) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28);
+    SET_W_K_XFER(X0, 32);
     MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-    SET_W_K_XFER(X1, 36) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    SET_W_K_XFER(X1, 36);
     MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
-    SET_W_K_XFER(X2, 40) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36);
+    SET_W_K_XFER(X2, 40);
     MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-    SET_W_K_XFER(X3, 44) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    SET_W_K_XFER(X3, 44);
     MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44);
 
-    SET_W_K_XFER(X0, 48) ;
-    SET_W_K_XFER(X1, 52) ;
-    SET_W_K_XFER(X2, 56) ;
-    SET_W_K_XFER(X3, 60) ;
+    SET_W_K_XFER(X0, 48);
+    SET_W_K_XFER(X1, 52);
+    SET_W_K_XFER(X2, 56);
+    SET_W_K_XFER(X3, 60);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-
-    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
 
+    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
     return 0;
 }
 #endif  /* HAVE_INTEL_RORX */
-
 #endif  /* HAVE_INTEL_AVX1 */
 
 
 #if defined(HAVE_INTEL_AVX2)
 
-#define _MOVE_to_REG(ymm, mem)       __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs) ;
-#define _MOVE_to_MEM(mem, ymm)       __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs) ;
+#define _MOVE_to_REG(ymm, mem)       __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs);
+#define _MOVE_to_MEM(mem, ymm)       __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs);
 #define _BYTE_SWAP(ymm, map)              __asm__ volatile("vpshufb %0, %%"#ymm", %%"#ymm"\n\t"\
-                                                       :: "m"(map):YMM_REGs) ;
+                                                       :: "m"(map):YMM_REGs);
 #define _MOVE_128(ymm0, ymm1, ymm2, map)   __asm__ volatile("vperm2i128  $"#map", %%"\
-                                  #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
+                                  #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs);
 #define _MOVE_BYTE(ymm0, ymm1, map)  __asm__ volatile("vpshufb %0, %%"#ymm1", %%"\
-                                  #ymm0"\n\t":: "m"(map):YMM_REGs) ;
+                                  #ymm0"\n\t":: "m"(map):YMM_REGs);
 #define _S_TEMP(dest, src, bits, temp)    __asm__ volatile("vpsrld  $"#bits", %%"\
          #src", %%"#dest"\n\tvpslld  $32-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
-         #temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
+         #temp",%%"#dest", %%"#dest" ":::YMM_REGs);
 #define _AVX2_R(dest, src, bits)          __asm__ volatile("vpsrld  $"#bits", %%"\
-                                  #src", %%"#dest" ":::YMM_REGs) ;
+                                  #src", %%"#dest" ":::YMM_REGs);
 #define _XOR(dest, src1, src2)       __asm__ volatile("vpxor   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _OR(dest, src1, src2)       __asm__ volatile("vpor    %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _ADD(dest, src1, src2)       __asm__ volatile("vpaddd   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _ADD_MEM(dest, src1, mem)    __asm__ volatile("vpaddd   %0, %%"#src1", %%"\
-         #dest" "::"m"(mem):YMM_REGs) ;
+         #dest" "::"m"(mem):YMM_REGs);
 #define _BLEND(map, dest, src1, src2)    __asm__ volatile("vpblendd    $"#map", %%"\
-         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs) ;
+         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs);
 
-#define    _EXTRACT_XMM_0(xmm, mem)  __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_1(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_2(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_3(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
+#define    _EXTRACT_XMM_0(xmm, mem)  __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_1(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_2(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_3(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
 #define    _EXTRACT_XMM_4(ymm, xmm, mem)\
-      __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;\
-      __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_5(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_6(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_7(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
+      __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs);\
+      __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_5(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_6(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_7(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
 
-#define    _SWAP_YMM_HL(ymm)   __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;
+#define    _SWAP_YMM_HL(ymm)   __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs);
 #define     SWAP_YMM_HL(ymm)   _SWAP_YMM_HL(ymm)
 
 #define MOVE_to_REG(ymm, mem)      _MOVE_to_REG(ymm, mem)
@@ -1243,26 +1304,26 @@ static int Transform_AVX1_RORX(Sha256* sha256)
 #define AVX2_R(dest, src, bits)      _AVX2_R(dest, src, bits)
 
 #define GAMMA0(dest, src)      AVX2_S(dest, src, 7);  AVX2_S(G_TEMP, src, 18); \
-    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3);  XOR(dest, G_TEMP, dest) ;
+    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 3);  XOR(dest, G_TEMP, dest);
 #define GAMMA0_1(dest, src)    AVX2_S(dest, src, 7);  AVX2_S(G_TEMP, src, 18);
-#define GAMMA0_2(dest, src)    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3);  \
-    XOR(dest, G_TEMP, dest) ;
+#define GAMMA0_2(dest, src)    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 3);  \
+    XOR(dest, G_TEMP, dest);
 
 #define GAMMA1(dest, src)      AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19); \
-    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest) ;
+    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest);
 #define GAMMA1_1(dest, src)    AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19);
-#define GAMMA1_2(dest, src)    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); \
-    XOR(dest, G_TEMP, dest) ;
+#define GAMMA1_2(dest, src)    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 10); \
+    XOR(dest, G_TEMP, dest);
 
-#define    FEEDBACK1_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]) ; \
-    BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2) ;
-#define    FEEDBACK2_to_W_I_2    MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ;  \
-    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]) ; BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2) ;
-#define    FEEDBACK3_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]) ; \
-    BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2) ;
+#define    FEEDBACK1_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]); \
+    BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2);
+#define    FEEDBACK2_to_W_I_2    MOVE_128(YMM_TEMP0, W_I, W_I, 0x08);  \
+    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]); BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2);
+#define    FEEDBACK3_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]); \
+    BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2);
 
-#define    FEEDBACK_to_W_I_7     MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ;\
-    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]) ; BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7) ;
+#define    FEEDBACK_to_W_I_7     MOVE_128(YMM_TEMP0, W_I, W_I, 0x08);\
+    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]); BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7);
 
 #undef voitle
 
@@ -1284,69 +1345,69 @@ static int Transform_AVX1_RORX(Sha256* sha256)
 
 
 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
-    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
 
 #define MOVE_7_to_15(w_i_15, w_i_7)\
-    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs) ;\
+    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs);\
 
 #define MOVE_I_to_7(w_i_7, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs);\
 
 #define MOVE_I_to_2(w_i_2, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs);\
 
 #define ROTATE_W(w_i_16, w_i_15, w_i_7, w_i_2, w_i)\
-    MOVE_15_to_16(w_i_16, w_i_15, w_i_7) ; \
-    MOVE_7_to_15(w_i_15, w_i_7) ; \
-    MOVE_I_to_7(w_i_7, w_i) ; \
-    MOVE_I_to_2(w_i_2, w_i) ;\
+    MOVE_15_to_16(w_i_16, w_i_15, w_i_7); \
+    MOVE_7_to_15(w_i_15, w_i_7); \
+    MOVE_I_to_7(w_i_7, w_i); \
+    MOVE_I_to_2(w_i_2, w_i);\
 
 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-    { word32 d ;\
-    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ;\
+    { word32 d;\
+    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[0] += d;\
-    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[1] += d;\
-    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[2] += d;\
-    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[3] += d;\
-    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[4] += d;\
-    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[5] += d;\
-    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[6] += d;\
-    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[7] += d;\
 }
 
 #define _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-  { word32 d[8] ;\
-    __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs) ;\
+  { word32 d[8];\
+    __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs);\
         printf("S[0..7]=%08x,%08x,%08x,%08x,%08x,%08x,%08x,%08x\n", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7]);\
-    __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs) ;\
+    __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs);\
 }
 
 
@@ -1362,380 +1423,379 @@ static int Transform_AVX1_RORX(Sha256* sha256)
 
     /* Byte swap Masks to ensure that rest of the words are filled with zero's. */
     static const unsigned long mBYTE_FLIP_MASK_16[] =
-        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
+        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b };
     static const unsigned long mBYTE_FLIP_MASK_15[] =
-        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
+        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b };
     static const unsigned long mBYTE_FLIP_MASK_7 [] =
-        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b } ;
+        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b };
     static const unsigned long mBYTE_FLIP_MASK_2 [] =
-        { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 } ;
+        { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 };
 
     static const unsigned long mMAPtoW_I_7[] =
-        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 } ;
+        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 };
     static const unsigned long mMAP1toW_I_2[] =
-        { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 } ;
+        { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 };
     static const unsigned long mMAP2toW_I_2[] =
-        { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 } ;
+        { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 };
     static const unsigned long mMAP3toW_I_2[] =
-        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 } ;
+        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 };
 
 static int Transform_AVX2(Sha256* sha256)
 {
+#ifdef WOLFSSL_SMALL_STACK
+    word32* W_K;
+    W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (W_K == NULL)
+        return MEMORY_E;
+#else
+    word32 W_K[64];
+#endif
 
-    #ifdef WOLFSSL_SMALL_STACK
-        word32* W_K;
-        W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        if (W_K == NULL)
-            return MEMORY_E;
-    #else
-        word32 W_K[64]  ;
-    #endif
+    MOVE_to_REG(W_I_16, sha256->buffer[0]);     BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]);
+    MOVE_to_REG(W_I_15, sha256->buffer[1]);     BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]);
+    MOVE_to_REG(W_I,    sha256->buffer[8]);    BYTE_SWAP(W_I,    mBYTE_FLIP_MASK_16[0]);
+    MOVE_to_REG(W_I_7,  sha256->buffer[16-7]); BYTE_SWAP(W_I_7,  mBYTE_FLIP_MASK_7[0]);
+    MOVE_to_REG(W_I_2,  sha256->buffer[16-2]); BYTE_SWAP(W_I_2,  mBYTE_FLIP_MASK_2[0]);
 
-    MOVE_to_REG(W_I_16, sha256->buffer[0]);     BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]) ;
-    MOVE_to_REG(W_I_15, sha256->buffer[1]);     BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]) ;
-    MOVE_to_REG(W_I,    sha256->buffer[8]) ;    BYTE_SWAP(W_I,    mBYTE_FLIP_MASK_16[0]) ;
-    MOVE_to_REG(W_I_7,  sha256->buffer[16-7]) ; BYTE_SWAP(W_I_7,  mBYTE_FLIP_MASK_7[0])  ;
-    MOVE_to_REG(W_I_2,  sha256->buffer[16-2]) ; BYTE_SWAP(W_I_2,  mBYTE_FLIP_MASK_2[0])  ;
+    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
-    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
+    ADD_MEM(W_K_TEMP, W_I_16, K[0]);
+    MOVE_to_MEM(W_K[0], W_K_TEMP);
 
-    ADD_MEM(W_K_TEMP, W_I_16, K[0]) ;
-    MOVE_to_MEM(W_K[0], W_K_TEMP) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3);
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3) ;
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7) ;
+    ADD_MEM(YMM_TEMP0, W_I, K[8]);
+    MOVE_to_MEM(W_K[8], YMM_TEMP0);
 
-    ADD_MEM(YMM_TEMP0, W_I, K[8]) ;
-    MOVE_to_MEM(W_K[8], YMM_TEMP0) ;
+    /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
+    GAMMA1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
 
-        /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
-        FEEDBACK_to_W_I_7 ;
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
-        GAMMA1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
+    MOVE_to_REG(YMM_TEMP0, K[16]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[16], YMM_TEMP0);
 
-        MOVE_to_REG(YMM_TEMP0, K[16]) ;
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[16], YMM_TEMP0) ;
+    /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
+    GAMMA1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
 
-        /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
-        FEEDBACK_to_W_I_7 ;
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
-        GAMMA1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
+    MOVE_to_REG(YMM_TEMP0, K[24]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[24], YMM_TEMP0);
 
-        MOVE_to_REG(YMM_TEMP0, K[24]) ;
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[24], YMM_TEMP0) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
+    GAMMA1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
 
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
-        FEEDBACK_to_W_I_7 ;
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
-        GAMMA1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
-
-        MOVE_to_REG(YMM_TEMP0, K[32]) ;
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[32], YMM_TEMP0) ;
+    MOVE_to_REG(YMM_TEMP0, K[32]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[32], YMM_TEMP0);
 
 
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
-        FEEDBACK_to_W_I_7 ;
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
 
-        MOVE_to_REG(YMM_TEMP0, K[40]) ;
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[40], YMM_TEMP0) ;
+    MOVE_to_REG(YMM_TEMP0, K[40]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[40], YMM_TEMP0);
 
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
-        FEEDBACK_to_W_I_7 ;
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
 
-        MOVE_to_REG(YMM_TEMP0, K[48]) ;
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[48], YMM_TEMP0) ;
+    MOVE_to_REG(YMM_TEMP0, K[48]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[48], YMM_TEMP0);
 
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-        FEEDBACK_to_W_I_7 ;
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
 
-        MOVE_to_REG(YMM_TEMP0, K[56]) ;
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[56], YMM_TEMP0) ;
+    MOVE_to_REG(YMM_TEMP0, K[56]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[56], YMM_TEMP0);
 
-        RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
-        RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
-        RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
-        RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
 
-        RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
-        RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
-        RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
-        RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
 
-    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
+    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(W_K, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -1746,53 +1806,197 @@ static int Transform_AVX2(Sha256* sha256)
 
 #endif   /* HAVE_INTEL_AVX2 */
 
+
 #ifdef WOLFSSL_SHA224
-int wc_InitSha224(Sha224* sha224)
-{
-    sha224->digest[0] = 0xc1059ed8;
-    sha224->digest[1] = 0x367cd507;
-    sha224->digest[2] = 0x3070dd17;
-    sha224->digest[3] = 0xf70e5939;
-    sha224->digest[4] = 0xffc00b31;
-    sha224->digest[5] = 0x68581511;
-    sha224->digest[6] = 0x64f98fa7;
-    sha224->digest[7] = 0xbefa4fa4;
+    static int InitSha224(Sha224* sha224)
+    {
+        int ret = 0;
 
-    sha224->buffLen = 0;
-    sha224->loLen   = 0;
-    sha224->hiLen   = 0;
+        sha224->digest[0] = 0xc1059ed8;
+        sha224->digest[1] = 0x367cd507;
+        sha224->digest[2] = 0x3070dd17;
+        sha224->digest[3] = 0xf70e5939;
+        sha224->digest[4] = 0xffc00b31;
+        sha224->digest[5] = 0x68581511;
+        sha224->digest[6] = 0x64f98fa7;
+        sha224->digest[7] = 0xbefa4fa4;
 
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ;
-#endif
+        sha224->buffLen = 0;
+        sha224->loLen   = 0;
+        sha224->hiLen   = 0;
 
-    return 0;
-}
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+        /* choose best Transform function under this runtime environment */
+        set_Transform();
+    #endif
 
-int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
-{
-    return Sha256Update((Sha256 *)sha224, data, len);
-}
-
-
-int wc_Sha224Final(Sha224* sha224, byte* hash)
-{
-    int ret = Sha256Final((Sha256 *)sha224);
-    if (ret != 0)
         return ret;
+    }
+
+    int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId)
+    {
+        int ret = 0;
+
+        if (sha224 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha224->heap = heap;
+
+        ret = InitSha224(sha224);
+        if (ret != 0)
+            return ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        ret = wolfAsync_DevCtxInit(&sha224->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+
+    int wc_InitSha224(Sha224* sha224)
+    {
+        return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
+    }
+
+    int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
+    {
+        int ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        ret = Sha256Update((Sha256 *)sha224, data, len);
+
+        return ret;
+    }
+
+    int wc_Sha224Final(Sha224* sha224, byte* hash)
+    {
+        int ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha224(&sha224->asyncDev, hash, NULL,
+                                            SHA224_DIGEST_SIZE);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        ret = Sha256Final((Sha256*)sha224);
+        if (ret != 0)
+            return ret;
 
     #if defined(LITTLE_ENDIAN_ORDER)
         ByteReverseWords(sha224->digest, sha224->digest, SHA224_DIGEST_SIZE);
     #endif
-    XMEMCPY(hash, sha224->digest, SHA224_DIGEST_SIZE);
+        XMEMCPY(hash, sha224->digest, SHA224_DIGEST_SIZE);
+
+        return InitSha224(sha224);  /* reset state */
+    }
+
+    void wc_Sha224Free(Sha224* sha224)
+    {
+        if (sha224 == NULL)
+            return;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+    }
 
-    return wc_InitSha224(sha224);  /* reset state */
-}
 #endif /* WOLFSSL_SHA224 */
 
-#endif   /* HAVE_FIPS */
 
-#endif   /* WOLFSSL_TI_HAHS */
+int wc_InitSha256(Sha256* sha256)
+{
+    return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
+}
+
+void wc_Sha256Free(Sha256* sha256)
+{
+    if (sha256 == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+    wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
+#endif /* !WOLFSSL_TI_HASH */
+#endif /* HAVE_FIPS */
+
+
+#ifndef WOLFSSL_TI_HASH
+#ifdef WOLFSSL_SHA224
+    int wc_Sha224GetHash(Sha224* sha224, byte* hash)
+    {
+        int ret;
+        Sha224 tmpSha224;
+
+        if (sha224 == NULL || hash == NULL)
+            return BAD_FUNC_ARG;
+
+        ret = wc_Sha224Copy(sha224, &tmpSha224);
+        if (ret == 0) {
+            ret = wc_Sha224Final(&tmpSha224, hash);
+        }
+        return ret;
+    }
+    int wc_Sha224Copy(Sha224* src, Sha224* dst)
+    {
+        int ret = 0;
+
+        if (src == NULL || dst == NULL)
+            return BAD_FUNC_ARG;
+
+        XMEMCPY(dst, src, sizeof(Sha224));
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+    #endif
+
+        return ret;
+    }
+#endif /* WOLFSSL_SHA224 */
+
+int wc_Sha256GetHash(Sha256* sha256, byte* hash)
+{
+    int ret;
+    Sha256 tmpSha256;
+
+    if (sha256 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha256Copy(sha256, &tmpSha256);
+    if (ret == 0) {
+        ret = wc_Sha256Final(&tmpSha256, hash);
+    }
+    return ret;
+}
+int wc_Sha256Copy(Sha256* src, Sha256* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha256));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+#endif /* !WOLFSSL_TI_HASH */
 
 #endif /* NO_SHA256 */
-
diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c
old mode 100644
new mode 100755
index b7c405a6b..9d9233605
--- a/wolfcrypt/src/sha512.c
+++ b/wolfcrypt/src/sha512.c
@@ -25,70 +25,79 @@
 #endif
 
 #include 
-#include 
 
 #ifdef WOLFSSL_SHA512
 #include 
 
+#include 
+#include 
+
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
-int wc_InitSha512(Sha512* sha)
-{
-    if (sha == NULL) {
-        return BAD_FUNC_ARG;
+    int wc_InitSha512(Sha512* sha)
+    {
+		if (sha == NULL) {
+        	return BAD_FUNC_ARG;
+    	}
+
+        return InitSha512_fips(sha);
     }
-    return InitSha512_fips(sha);
-}
-
-
-int wc_Sha512Update(Sha512* sha, const byte* data, word32 len)
-{
-    if (sha == NULL || (data == NULL && len > 0)) {
-        return BAD_FUNC_ARG;
+    int wc_InitSha512_ex(Sha512* sha, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+        return InitSha512_fips(sha);
     }
-    return Sha512Update_fips(sha, data, len);
-}
+    int wc_Sha512Update(Sha512* sha, const byte* data, word32 len)
+    {
+		if (sha == NULL || (data == NULL && len > 0)) {
+        	return BAD_FUNC_ARG;
+    	}
 
-
-int wc_Sha512Final(Sha512* sha, byte* out)
-{
-    if (sha == NULL || out == NULL) {
-        return BAD_FUNC_ARG;
+        return Sha512Update_fips(sha, data, len);
     }
-    return Sha512Final_fips(sha, out);
-}
+    int wc_Sha512Final(Sha512* sha, byte* out)
+    {
+		if (sha == NULL || out == NULL) {
+        	return BAD_FUNC_ARG;
+    	}
 
-
-#if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
-
-int wc_InitSha384(Sha384* sha)
-{
-    if (sha == NULL) {
-        return BAD_FUNC_ARG;
+        return Sha512Final_fips(sha, out);
     }
-    return InitSha384_fips(sha);
-}
-
-
-int wc_Sha384Update(Sha384* sha, const byte* data, word32 len)
-{
-    if (sha == NULL || (data == NULL && len > 0)) {
-        return BAD_FUNC_ARG;
+    void wc_Sha512Free(Sha512* sha)
+    {
+        (void)sha;
+        /* Not supported in FIPS */
     }
-    return Sha384Update_fips(sha, data, len);
-}
 
+    #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
+        int wc_InitSha384(Sha384* sha)
+        {
+            return InitSha384_fips(sha);
+        }
+        int wc_InitSha384_ex(Sha384* sha, void* heap, int devId)
+        {
+            (void)heap;
+            (void)devId;
+            return InitSha384_fips(sha);
+        }
+        int wc_Sha384Update(Sha384* sha, const byte* data, word32 len)
+        {
+            return Sha384Update_fips(sha, data, len);
+        }
+        int wc_Sha384Final(Sha384* sha, byte* out)
+        {
+            return Sha384Final_fips(sha, out);
+        }
+        void wc_Sha384Free(Sha384* sha)
+        {
+            (void)sha;
+            /* Not supported in FIPS */
+        }
+    #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */
 
-int wc_Sha384Final(Sha384* sha, byte* out)
-{
-    if (sha == NULL || out == NULL) {
-        return BAD_FUNC_ARG;
-    }
-    return Sha384Final_fips(sha, out);
-}
-
-
-#endif /* WOLFSSL_SHA384 */
 #else /* else build without using fips */
+
 #include 
 
 #ifdef NO_INLINE
@@ -100,256 +109,54 @@ int wc_Sha384Final(Sha384* sha, byte* out)
 
 
 #if defined(USE_INTEL_SPEEDUP)
-  #define HAVE_INTEL_AVX1
-  #define HAVE_INTEL_AVX2
-#endif
-
-#if defined(HAVE_INTEL_AVX1)
-/* #define DEBUG_XMM  */
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
-/* #define DEBUG_YMM  */
-#endif
-
-/*****
-Intel AVX1/AVX2 Macro Control Structure
-
-#if defined(HAVE_INteL_SPEEDUP)
     #define HAVE_INTEL_AVX1
     #define HAVE_INTEL_AVX2
 #endif
 
-int InitSha512(Sha512* sha512) {
-     Save/Recover XMM, YMM
-     ...
-
-     Check Intel AVX cpuid flags
-}
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-  Transform_AVX1() ; # Function prototype
-  Transform_AVX2() ; #
-#endif
-
-  _Transform() {     # Native Transform Function body
-
-  }
-
-  int Sha512Update() {
-     Save/Recover XMM, YMM
-     ...
-  }
-
-  int Sha512Final() {
-     Save/Recover XMM, YMM
-     ...
-  }
-
-
 #if defined(HAVE_INTEL_AVX1)
-
-   XMM Instructions/INLINE asm Definitions
-
+    /* #define DEBUG_XMM  */
 #endif
 
 #if defined(HAVE_INTEL_AVX2)
-
-   YMM Instructions/INLINE asm Definitions
-
+    #define HAVE_INTEL_RORX
+    /* #define DEBUG_YMM  */
 #endif
 
-#if defnied(HAVE_INTEL_AVX1)
-
-  int Transform_AVX1() {
-      Stitched Message Sched/Round
-  }
-
-#endif
-
-#if defnied(HAVE_INTEL_AVX2)
-
-  int Transform_AVX2() {
-      Stitched Message Sched/Round
-  }
-#endif
-
-
-*/
-
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-    #define cpuid(reg, leaf, sub)\
-            __asm__ __volatile__ ("cpuid":\
-             "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
-             "a" (leaf), "c"(sub));
-
-    #define XASM_LINK(f) asm(f)
-#else
-
-    #include 
-    #define cpuid(a,b) __cpuid((int*)a,b)
-
-    #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-#define EAX 0
-#define EBX 1
-#define ECX 2
-#define EDX 3
-
-#define CPUID_AVX1   0x1
-#define CPUID_AVX2   0x2
-#define CPUID_RDRAND 0x4
-#define CPUID_RDSEED 0x8
-#define CPUID_BMI2   0x10   /* MULX, RORX */
-
-#define IS_INTEL_AVX1       (cpuid_flags&CPUID_AVX1)
-#define IS_INTEL_AVX2       (cpuid_flags&CPUID_AVX2)
-#define IS_INTEL_BMI2       (cpuid_flags&CPUID_BMI2)
-#define IS_INTEL_RDRAND     (cpuid_flags&CPUID_RDRAND)
-#define IS_INTEL_RDSEED     (cpuid_flags&CPUID_RDSEED)
-
-static word32 cpuid_check = 0 ;
-static word32 cpuid_flags = 0 ;
-
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
-    int got_intel_cpu=0;
-    unsigned int reg[5];
-
-    reg[4] = '\0' ;
-    cpuid(reg, 0, 0);
-    if(XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
-                XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
-                XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
-        got_intel_cpu = 1;
-    }
-    if (got_intel_cpu) {
-        cpuid(reg, leaf, sub);
-        return((reg[num]>>bit)&0x1) ;
-    }
-    return 0 ;
-}
-
-
-static int set_cpuid_flags() {
-    if(cpuid_check ==0) {
-        if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
-        if(cpuid_flag(7, 0, EBX, 5)){  cpuid_flags |= CPUID_AVX2 ; }
-        if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
-        if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;  }
-        if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;  }
-		cpuid_check = 1 ;
-		return 0 ;
-    }
-    return 1 ;
-}
-
-
-/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */
-
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_AVX1(Sha512 *sha512) ;
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-static int Transform_AVX2(Sha512 *sha512) ;
-
-#if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
-static int Transform_AVX1_RORX(Sha512 *sha512) ;
-#endif
-
-#endif
-
-static int _Transform(Sha512 *sha512) ;
-
-static int (*Transform_p)(Sha512* sha512) = _Transform ;
-
-#define Transform(sha512) (*Transform_p)(sha512)
-
-static void set_Transform(void) {
-     if(set_cpuid_flags()) return ;
-
-#if defined(HAVE_INTEL_AVX2)
-     if(IS_INTEL_AVX2 && IS_INTEL_BMI2){
-         Transform_p = Transform_AVX1_RORX; return ;
-         Transform_p = Transform_AVX2      ;
-                  /* for avoiding warning,"not used" */
-     }
-#endif
-#if defined(HAVE_INTEL_AVX1)
-     Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; return ;
-#endif
-     Transform_p = _Transform ; return ;
-}
-
-#else
-   #define Transform(sha512) _Transform(sha512)
-#endif
-
-/* Dummy for saving MM_REGs on behalf of Transform */
-/* #if defined(HAVE_INTEL_AVX2)
- #define  SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
-   "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
-   "%ymm12","%ymm13","%ymm14","%ymm15")
-*/
-#if defined(HAVE_INTEL_AVX1)
-   #define  SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
-    "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
-#else
-#define  SAVE_XMM_YMM
-#endif
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-
-#include 
-
-#endif /* defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) */
-
 
 #if defined(HAVE_INTEL_RORX)
-#define ROTR(func, bits, x) \
-word64 func(word64 x) {  word64 ret ;\
-    __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\
-    return ret ;\
-}
-
-static INLINE ROTR(rotrFixed64_28, 28, x)
-static INLINE ROTR(rotrFixed64_34, 34, x)
-static INLINE ROTR(rotrFixed64_39, 39, x)
-static INLINE ROTR(rotrFixed64_14, 14, x)
-static INLINE ROTR(rotrFixed64_18, 18, x)
-static INLINE ROTR(rotrFixed64_41, 41, x)
-
-#define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x))
-#define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x))
-#endif
-
-#if defined(HAVE_BYTEREVERSE64) && !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
-#define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
-#define ByteReverseWords64_1(buf, size)\
- { unsigned int i ;\
-   for(i=0; i< size/sizeof(word64); i++){\
-       __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
-   }\
-}
-#endif
-
-
-int wc_InitSha512(Sha512* sha512)
-{
-    if (sha512 == NULL) {
-        return BAD_FUNC_ARG;
+    #define ROTR(func, bits, x) \
+    word64 func(word64 x) {  word64 ret ;\
+        __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\
+        return ret ;\
     }
 
+    static INLINE ROTR(rotrFixed64_28, 28, x);
+    static INLINE ROTR(rotrFixed64_34, 34, x);
+    static INLINE ROTR(rotrFixed64_39, 39, x);
+    static INLINE ROTR(rotrFixed64_14, 14, x);
+    static INLINE ROTR(rotrFixed64_18, 18, x);
+    static INLINE ROTR(rotrFixed64_41, 41, x);
+
+    #define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x))
+    #define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x))
+#endif /* HAVE_INTEL_RORX */
+
+#if defined(HAVE_BYTEREVERSE64) && \
+        !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
+    #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
+    #define ByteReverseWords64_1(buf, size) \
+        { unsigned int i ;\
+            for(i=0; i< size/sizeof(word64); i++){\
+                __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
+            }\
+        }
+#endif
+
+static int InitSha512(Sha512* sha512)
+{
+    if (sha512 == NULL)
+        return BAD_FUNC_ARG;
+
     sha512->digest[0] = W64LIT(0x6a09e667f3bcc908);
     sha512->digest[1] = W64LIT(0xbb67ae8584caa73b);
     sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b);
@@ -363,14 +170,232 @@ int wc_InitSha512(Sha512* sha512)
     sha512->loLen   = 0;
     sha512->hiLen   = 0;
 
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ; /* choose best Transform function under this runtime environment */
-#endif
-
-    return 0 ;
+    return 0;
 }
 
 
+/* Hardware Acceleration */
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+
+    /*****
+    Intel AVX1/AVX2 Macro Control Structure
+
+    #if defined(HAVE_INteL_SPEEDUP)
+        #define HAVE_INTEL_AVX1
+        #define HAVE_INTEL_AVX2
+    #endif
+
+    int InitSha512(Sha512* sha512) {
+         Save/Recover XMM, YMM
+         ...
+
+         Check Intel AVX cpuid flags
+    }
+
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+      Transform_AVX1(); # Function prototype
+      Transform_AVX2(); #
+    #endif
+
+      _Transform() {     # Native Transform Function body
+
+      }
+
+      int Sha512Update() {
+         Save/Recover XMM, YMM
+         ...
+      }
+
+      int Sha512Final() {
+         Save/Recover XMM, YMM
+         ...
+      }
+
+
+    #if defined(HAVE_INTEL_AVX1)
+
+       XMM Instructions/INLINE asm Definitions
+
+    #endif
+
+    #if defined(HAVE_INTEL_AVX2)
+
+       YMM Instructions/INLINE asm Definitions
+
+    #endif
+
+    #if defnied(HAVE_INTEL_AVX1)
+
+      int Transform_AVX1() {
+          Stitched Message Sched/Round
+      }
+
+    #endif
+
+    #if defnied(HAVE_INTEL_AVX2)
+
+      int Transform_AVX2() {
+          Stitched Message Sched/Round
+      }
+    #endif
+
+    */
+
+
+    /* Each platform needs to query info type 1 from cpuid to see if aesni is
+     * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+     */
+
+    #ifndef _MSC_VER
+        #define cpuid(reg, leaf, sub)\
+            __asm__ __volatile__ ("cpuid":\
+                "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+                "a" (leaf), "c"(sub));
+
+        #define XASM_LINK(f) asm(f)
+    #else
+
+        #include 
+        #define cpuid(a,b) __cpuid((int*)a,b)
+
+        #define XASM_LINK(f)
+    #endif /* _MSC_VER */
+
+    #define EAX 0
+    #define EBX 1
+    #define ECX 2
+    #define EDX 3
+
+    #define CPUID_AVX1   0x1
+    #define CPUID_AVX2   0x2
+    #define CPUID_RDRAND 0x4
+    #define CPUID_RDSEED 0x8
+    #define CPUID_BMI2   0x10   /* MULX, RORX */
+
+    #define IS_INTEL_AVX1       (cpuid_flags & CPUID_AVX1)
+    #define IS_INTEL_AVX2       (cpuid_flags & CPUID_AVX2)
+    #define IS_INTEL_BMI2       (cpuid_flags & CPUID_BMI2)
+    #define IS_INTEL_RDRAND     (cpuid_flags & CPUID_RDRAND)
+    #define IS_INTEL_RDSEED     (cpuid_flags & CPUID_RDSEED)
+
+    static word32 cpuid_check = 0;
+    static word32 cpuid_flags = 0;
+
+    static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
+        int got_intel_cpu = 0;
+        unsigned int reg[5];
+
+        reg[4] = '\0';
+        cpuid(reg, 0, 0);
+        if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+            XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+            XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
+            got_intel_cpu = 1;
+        }
+        if (got_intel_cpu) {
+            cpuid(reg, leaf, sub);
+            return ((reg[num] >> bit) & 0x1);
+        }
+        return 0;
+    }
+
+
+    static int set_cpuid_flags() {
+        if(cpuid_check ==0) {
+            if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
+            if(cpuid_flag(7, 0, EBX, 5)){  cpuid_flags |= CPUID_AVX2 ; }
+            if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
+            if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;  }
+            if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;  }
+    		cpuid_check = 1 ;
+    		return 0 ;
+        }
+        return 1 ;
+    }
+
+
+    #if defined(HAVE_INTEL_AVX1)
+        static int Transform_AVX1(Sha512 *sha512);
+    #endif
+    #if defined(HAVE_INTEL_AVX2)
+        static int Transform_AVX2(Sha512 *sha512);
+        #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
+            static int Transform_AVX1_RORX(Sha512 *sha512);
+        #endif
+    #endif
+    static int _Transform(Sha512 *sha512);
+    static int (*Transform_p)(Sha512* sha512) = _Transform;
+    #define Transform(sha512) (*Transform_p)(sha512)
+
+    /* Dummy for saving MM_REGs on behalf of Transform */
+    /* #if defined(HAVE_INTEL_AVX2)
+     #define SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
+       "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
+       "%ymm12","%ymm13","%ymm14","%ymm15")
+    */
+    #if defined(HAVE_INTEL_AVX1)
+        #define SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
+            "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
+    #endif
+
+
+    int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId)
+    {
+        int ret = InitSha512(sha512);
+
+        (void)heap;
+        (void)devId;
+
+        if (set_cpuid_flags())
+            return ret;
+
+    #if defined(HAVE_INTEL_AVX2)
+        if (IS_INTEL_AVX2 && IS_INTEL_BMI2) {
+            Transform_p = Transform_AVX1_RORX; return ret;
+            Transform_p = Transform_AVX2;
+                /* for avoiding warning,"not used" */
+        }
+    #endif
+    #if defined(HAVE_INTEL_AVX1)
+        Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform); return ret;
+    #endif
+        Transform_p = _Transform;
+
+        return ret;
+    }
+
+#else
+    #define Transform(sha512) _Transform(sha512)
+
+    int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId)
+    {
+        int ret = 0;
+
+        if (sha512 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha512->heap = heap;
+
+        ret = InitSha512(sha512);
+        if (ret != 0)
+            return ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+        ret = wolfAsync_DevCtxInit(&sha512->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+
+#endif /* Hardware Acceleration */
+
+#ifndef SAVE_XMM_YMM
+    #define SAVE_XMM_YMM
+#endif
+
 static const word64 K512[80] = {
 	W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
 	W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
@@ -511,11 +536,16 @@ static INLINE void AddLength(Sha512* sha512, word32 len)
 
 static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len)
 {
-    byte* local;
+    int ret = 0;
 
     /* do block size increments */
-    local = (byte*)sha512->buffer;
-    SAVE_XMM_YMM ; /* for Intel AVX */
+    byte* local = (byte*)sha512->buffer;
+
+    /* check that internal buffLen is valid */
+    if (sha512->buffLen > SHA512_BLOCK_SIZE)
+        return BUFFER_E;
+
+    SAVE_XMM_YMM; /* for Intel AVX */
 
     while (len) {
         word32 add = min(len, SHA512_BLOCK_SIZE - sha512->buffLen);
@@ -526,23 +556,23 @@ static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len)
         len          -= add;
 
         if (sha512->buffLen == SHA512_BLOCK_SIZE) {
-            int ret;
-            #if defined(LITTLE_ENDIAN_ORDER)
-                #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-                if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-                #endif
-                    ByteReverseWords64(sha512->buffer, sha512->buffer,
-                                   SHA512_BLOCK_SIZE);
-            #endif
+    #if defined(LITTLE_ENDIAN_ORDER)
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+            if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+        #endif
+                ByteReverseWords64(sha512->buffer, sha512->buffer,
+                               SHA512_BLOCK_SIZE);
+    #endif
             ret = Transform(sha512);
             if (ret != 0)
-                return ret;
+                break;
 
             AddLength(sha512, SHA512_BLOCK_SIZE);
             sha512->buffLen = 0;
         }
     }
-    return 0;
+
+    return ret;
 }
 
 int wc_Sha512Update(Sha512* sha512, const byte* data, word32 len)
@@ -550,6 +580,15 @@ int wc_Sha512Update(Sha512* sha512, const byte* data, word32 len)
     if (sha512 == NULL ||(data == NULL && len > 0)) {
         return BAD_FUNC_ARG;
     }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     return Sha512Update(sha512, data, len);
 }
 
@@ -566,14 +605,15 @@ static INLINE int Sha512Final(Sha512* sha512)
 
     /* pad with zeros */
     if (sha512->buffLen > SHA512_PAD_SIZE) {
-        XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE -sha512->buffLen);
+        XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE - sha512->buffLen);
         sha512->buffLen += SHA512_BLOCK_SIZE - sha512->buffLen;
-        #if defined(LITTLE_ENDIAN_ORDER)
-            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-            if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-            #endif
+#if defined(LITTLE_ENDIAN_ORDER)
+    #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+        if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+    #endif
             ByteReverseWords64(sha512->buffer,sha512->buffer,SHA512_BLOCK_SIZE);
-        #endif
+
+#endif /* LITTLE_ENDIAN_ORDER */
         ret = Transform(sha512);
         if (ret != 0)
             return ret;
@@ -583,27 +623,27 @@ static INLINE int Sha512Final(Sha512* sha512)
     XMEMSET(&local[sha512->buffLen], 0, SHA512_PAD_SIZE - sha512->buffLen);
 
     /* put lengths in bits */
-    sha512->hiLen = (sha512->loLen >> (8*sizeof(sha512->loLen) - 3)) +
-                 (sha512->hiLen << 3);
+    sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) +
+                                                         (sha512->hiLen << 3);
     sha512->loLen = sha512->loLen << 3;
 
     /* store lengths */
-    #if defined(LITTLE_ENDIAN_ORDER)
-        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-        #endif
+#if defined(LITTLE_ENDIAN_ORDER)
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+#endif
         ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE);
-    #endif
+#endif
     /* ! length ordering dependent on digest endian type ! */
 
     sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
     sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
-    #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-    if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    if (IS_INTEL_AVX1 || IS_INTEL_AVX2)
         ByteReverseWords64(&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
                            &(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
                            SHA512_BLOCK_SIZE - SHA512_PAD_SIZE);
-    #endif
+#endif
     ret = Transform(sha512);
     if (ret != 0)
         return ret;
@@ -623,84 +663,107 @@ int wc_Sha512Final(Sha512* sha512, byte* hash)
         return BAD_FUNC_ARG;
     }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha512(&sha512->asyncDev, hash, NULL,
+                                            SHA512_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     ret = Sha512Final(sha512);
     if (ret != 0)
         return ret;
 
     XMEMCPY(hash, sha512->digest, SHA512_DIGEST_SIZE);
 
-    return wc_InitSha512(sha512);  /* reset state */
+    return InitSha512(sha512);  /* reset state */
+}
+
+
+int wc_InitSha512(Sha512* sha512)
+{
+    return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID);
+}
+
+void wc_Sha512Free(Sha512* sha512)
+{
+    if (sha512 == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512);
+#endif /* WOLFSSL_ASYNC_CRYPT */
 }
 
 
 #if defined(HAVE_INTEL_AVX1)
 
-#define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ;
+#define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i];
 #define Rx_2(i) d(i)+=h(i);
 #define Rx_3(i) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
 
 #if defined(HAVE_INTEL_RORX)
-#define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ;
-#define Rx_RORX_2(i) d(i)+=h(i);
-#define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i));
-#endif
 
-#endif
+    #define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i];
+    #define Rx_RORX_2(i) d(i)+=h(i);
+    #define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i));
+#endif /* HAVE_INTEL_RORX */
+
+#endif /* HAVE_INTEL_AVX1 */
 
 #if defined(HAVE_INTEL_AVX2)
-#define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w ;
+#define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w;
 #define Ry_2(i, w) d(i)+=h(i);
 #define Ry_3(i, w) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
-#endif
+#endif /* HAVE_INTEL_AVX2 */
 
-#if defined(HAVE_INTEL_AVX1) /* INLINE Assember for Intel AVX1 instructions */
+/* INLINE Assember for Intel AVX1 instructions */
+#if defined(HAVE_INTEL_AVX1)
 #if defined(DEBUG_XMM)
+    #define SAVE_REG(i)     __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs);
+    #define RECV_REG(i)     __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs);
 
-#define SAVE_REG(i)     __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs);
-#define RECV_REG(i)     __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs);
-
-#define _DUMP_REG(REG, name)\
-    { word64 buf[16] ;word64 reg[16][2];int k ;\
-      SAVE_REG(0); SAVE_REG(1); SAVE_REG(2);  SAVE_REG(3);  SAVE_REG(4);  \
-      SAVE_REG(5);   SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\
-       SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \
-      __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\
-      printf(" "#name":\t") ; for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n") ; \
-      RECV_REG(0); RECV_REG(1); RECV_REG(2);  RECV_REG(3);  RECV_REG(4);\
-      RECV_REG(5);   RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\
-      RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\
-    }
-
-#define DUMP_REG(REG) _DUMP_REG(REG, #REG)
-#define PRINTF(fmt, ...)
+    #define _DUMP_REG(REG, name)\
+        { word64 buf[16];word64 reg[16][2];int k;\
+          SAVE_REG(0); SAVE_REG(1); SAVE_REG(2);  SAVE_REG(3);  SAVE_REG(4);  \
+          SAVE_REG(5);   SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\
+           SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \
+          __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\
+          printf(" "#name":\t"); for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n"); \
+          RECV_REG(0); RECV_REG(1); RECV_REG(2);  RECV_REG(3);  RECV_REG(4);\
+          RECV_REG(5);   RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\
+          RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\
+        }
 
+    #define DUMP_REG(REG) _DUMP_REG(REG, #REG)
+    #define PRINTF(fmt, ...)
 #else
-
-#define DUMP_REG(REG)
-#define PRINTF(fmt, ...)
-
-#endif
+    #define DUMP_REG(REG)
+    #define PRINTF(fmt, ...)
+#endif /* DEBUG_XMM */
 
 #define _MOVE_to_REG(xymm, mem)       __asm__ volatile("vmovdqu %0, %%"#xymm" "\
-        :: "m"(mem):XMM_REGs) ;
+        :: "m"(mem):XMM_REGs);
 #define _MOVE_to_MEM(mem,i, xymm)     __asm__ volatile("vmovdqu %%"#xymm", %0" :\
-         "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs) ;
+         "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs);
 #define _MOVE(dest, src)              __asm__ volatile("vmovdqu %%"#src",  %%"\
-        #dest" ":::XMM_REGs) ;
+        #dest" ":::XMM_REGs);
 
 #define _S_TEMP(dest, src, bits, temp)  __asm__ volatile("vpsrlq  $"#bits", %%"\
         #src", %%"#dest"\n\tvpsllq  $64-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
-        #temp",%%"#dest", %%"#dest" ":::XMM_REGs) ;
+        #temp",%%"#dest", %%"#dest" ":::XMM_REGs);
 #define _AVX1_R(dest, src, bits)      __asm__ volatile("vpsrlq  $"#bits", %%"\
-        #src", %%"#dest" ":::XMM_REGs) ;
+        #src", %%"#dest" ":::XMM_REGs);
 #define _XOR(dest, src1, src2)        __asm__ volatile("vpxor   %%"#src1", %%"\
-        #src2", %%"#dest" ":::XMM_REGs) ;
+        #src2", %%"#dest" ":::XMM_REGs);
 #define _OR(dest, src1, src2)         __asm__ volatile("vpor    %%"#src1", %%"\
-        #src2", %%"#dest" ":::XMM_REGs) ;
+        #src2", %%"#dest" ":::XMM_REGs);
 #define _ADD(dest, src1, src2)        __asm__ volatile("vpaddq   %%"#src1", %%"\
-        #src2", %%"#dest" ":::XMM_REGs) ;
+        #src2", %%"#dest" ":::XMM_REGs);
 #define _ADD_MEM(dest, src1, mem)     __asm__ volatile("vpaddq   %0, %%"#src1", %%"\
-        #dest" "::"m"(mem):XMM_REGs) ;
+        #dest" "::"m"(mem):XMM_REGs);
 
 #define MOVE_to_REG(xymm, mem)      _MOVE_to_REG(xymm, mem)
 #define MOVE_to_MEM(mem, i, xymm)   _MOVE_to_MEM(mem, i, xymm)
@@ -715,19 +778,19 @@ int wc_Sha512Final(Sha512* sha512, byte* hash)
 #define AVX1_R(dest, src, bits)      _AVX1_R(dest, src, bits)
 
 #define Init_Mask(mask) \
-     __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1") ;
+     __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1");
 
 #define _W_from_buff1(w, buff, xmm) \
     /* X0..3(xmm4..7), W[0..15] = sha512->buffer[0.15];  */\
      __asm__ volatile("vmovdqu %1, %%"#xmm"\n\t"\
                       "vpshufb %%xmm1, %%"#xmm", %%"#xmm"\n\t"\
                       "vmovdqu %%"#xmm", %0"\
-                      :"=m"(w): "m"(buff):"%xmm0") ;
+                      :"=m"(w): "m"(buff):"%xmm0");
 
 #define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm)
 
 #define W_from_buff(w, buff)\
-     Init_Mask(mBYTE_FLIP_MASK[0]) ;\
+     Init_Mask(mBYTE_FLIP_MASK[0]);\
      W_from_buff1(w[0], buff[0], W_0);\
      W_from_buff1(w[2], buff[2], W_2);\
      W_from_buff1(w[4], buff[4], W_4);\
@@ -737,7 +800,7 @@ int wc_Sha512Final(Sha512* sha512, byte* hash)
      W_from_buff1(w[12],buff[12],W_12);\
      W_from_buff1(w[14],buff[14],W_14);
 
-static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
+static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f };
 
 #define W_I_15  xmm14
 #define W_I_7   xmm11
@@ -759,187 +822,183 @@ static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
 #define XMM_REGs
 
 #define s0_1(dest, src)      AVX1_S(dest, src, 1);
-#define s0_2(dest, src)      AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest) ;
-#define s0_3(dest, src)      AVX1_R(G_TEMP, src, 7);  XOR(dest, G_TEMP, dest) ;
+#define s0_2(dest, src)      AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest);
+#define s0_3(dest, src)      AVX1_R(G_TEMP, src, 7);  XOR(dest, G_TEMP, dest);
 
 #define s1_1(dest, src)      AVX1_S(dest, src, 19);
-#define s1_2(dest, src)      AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest) ;
-#define s1_3(dest, src)      AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest) ;
+#define s1_2(dest, src)      AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest);
+#define s1_3(dest, src)      AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest);
 
-#define s0_(dest, src)       s0_1(dest, src) ; s0_2(dest, src) ; s0_3(dest, src)
-#define s1_(dest, src)       s1_1(dest, src) ; s1_2(dest, src) ; s1_3(dest, src)
+#define s0_(dest, src)       s0_1(dest, src); s0_2(dest, src); s0_3(dest, src)
+#define s1_(dest, src)       s1_1(dest, src); s1_2(dest, src); s1_3(dest, src)
 
 #define Block_xx_1(i) \
-    MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
-    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]) ;\
+    MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\
+    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]);\
 
 #define Block_xx_2(i) \
-    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]) ;\
-    MOVE_to_REG(W_I,    W_X[(i)]) ;\
+    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]);\
+    MOVE_to_REG(W_I,    W_X[(i)]);\
 
 #define Block_xx_3(i) \
-    s0_ (XMM_TEMP0, W_I_15) ;\
+    s0_ (XMM_TEMP0, W_I_15);\
 
 #define Block_xx_4(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    ADD(W_I, W_I, W_I_7) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    ADD(W_I, W_I, W_I_7);\
 
 #define Block_xx_5(i) \
-    s1_ (XMM_TEMP0, W_I_2) ;\
+    s1_ (XMM_TEMP0, W_I_2);\
 
 #define Block_xx_6(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    MOVE_to_MEM(W_X,i, W_I) ;\
-    if(i==0)\
-        MOVE_to_MEM(W_X,16, W_I) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    MOVE_to_MEM(W_X,i, W_I);\
+    if (i==0)\
+        MOVE_to_MEM(W_X,16, W_I);\
 
 #define Block_xx_7(i) \
-    MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
-    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]) ;\
+    MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\
+    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]);\
 
 #define Block_xx_8(i) \
-    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]) ;\
-    MOVE_to_REG(W_I,    W_X[(i)]) ;\
+    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]);\
+    MOVE_to_REG(W_I,    W_X[(i)]);\
 
 #define Block_xx_9(i) \
-    s0_ (XMM_TEMP0, W_I_15) ;\
+    s0_ (XMM_TEMP0, W_I_15);\
 
 #define Block_xx_10(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    ADD(W_I, W_I, W_I_7) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    ADD(W_I, W_I, W_I_7);\
 
 #define Block_xx_11(i) \
-    s1_ (XMM_TEMP0, W_I_2) ;\
+    s1_ (XMM_TEMP0, W_I_2);\
 
 #define Block_xx_12(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    MOVE_to_MEM(W_X,i, W_I) ;\
-    if((i)==0)\
-        MOVE_to_MEM(W_X,16, W_I) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    MOVE_to_MEM(W_X,i, W_I);\
+    if ((i)==0)\
+        MOVE_to_MEM(W_X,16, W_I);\
 
-static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0) ; }
-static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0) ; }
-static INLINE void Block_0_3(void) { Block_xx_3(0) ; }
-static INLINE void Block_0_4(void) { Block_xx_4(0) ; }
-static INLINE void Block_0_5(void) { Block_xx_5(0) ; }
-static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0) ; }
-static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2) ; }
-static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2) ; }
-static INLINE void Block_0_9(void) { Block_xx_9(2) ; }
-static INLINE void Block_0_10(void){ Block_xx_10(2) ; }
-static INLINE void Block_0_11(void){ Block_xx_11(2) ; }
-static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2) ; }
+static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0); }
+static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0); }
+static INLINE void Block_0_3(void) { Block_xx_3(0); }
+static INLINE void Block_0_4(void) { Block_xx_4(0); }
+static INLINE void Block_0_5(void) { Block_xx_5(0); }
+static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0); }
+static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2); }
+static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2); }
+static INLINE void Block_0_9(void) { Block_xx_9(2); }
+static INLINE void Block_0_10(void){ Block_xx_10(2); }
+static INLINE void Block_0_11(void){ Block_xx_11(2); }
+static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2); }
 
-static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4) ; }
-static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4) ; }
-static INLINE void Block_4_3(void) { Block_xx_3(4) ; }
-static INLINE void Block_4_4(void) { Block_xx_4(4) ; }
-static INLINE void Block_4_5(void) { Block_xx_5(4) ; }
-static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4) ; }
-static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6) ; }
-static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6) ; }
-static INLINE void Block_4_9(void) { Block_xx_9(6) ; }
-static INLINE void Block_4_10(void){ Block_xx_10(6) ; }
-static INLINE void Block_4_11(void){ Block_xx_11(6) ; }
-static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6) ; }
+static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4); }
+static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4); }
+static INLINE void Block_4_3(void) { Block_xx_3(4); }
+static INLINE void Block_4_4(void) { Block_xx_4(4); }
+static INLINE void Block_4_5(void) { Block_xx_5(4); }
+static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4); }
+static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6); }
+static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6); }
+static INLINE void Block_4_9(void) { Block_xx_9(6); }
+static INLINE void Block_4_10(void){ Block_xx_10(6); }
+static INLINE void Block_4_11(void){ Block_xx_11(6); }
+static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6); }
 
-static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8) ; }
-static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8) ; }
-static INLINE void Block_8_3(void) { Block_xx_3(8) ; }
-static INLINE void Block_8_4(void) { Block_xx_4(8) ; }
-static INLINE void Block_8_5(void) { Block_xx_5(8) ; }
-static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8) ; }
-static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10) ; }
-static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10) ; }
-static INLINE void Block_8_9(void) { Block_xx_9(10) ; }
-static INLINE void Block_8_10(void){ Block_xx_10(10) ; }
-static INLINE void Block_8_11(void){ Block_xx_11(10) ; }
-static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10) ; }
+static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8); }
+static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8); }
+static INLINE void Block_8_3(void) { Block_xx_3(8); }
+static INLINE void Block_8_4(void) { Block_xx_4(8); }
+static INLINE void Block_8_5(void) { Block_xx_5(8); }
+static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8); }
+static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10); }
+static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10); }
+static INLINE void Block_8_9(void) { Block_xx_9(10); }
+static INLINE void Block_8_10(void){ Block_xx_10(10); }
+static INLINE void Block_8_11(void){ Block_xx_11(10); }
+static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10); }
 
-static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12) ; }
-static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12) ; }
-static INLINE void Block_12_3(void) { Block_xx_3(12) ; }
-static INLINE void Block_12_4(void) { Block_xx_4(12) ; }
-static INLINE void Block_12_5(void) { Block_xx_5(12) ; }
-static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12) ; }
-static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14) ; }
-static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14) ; }
-static INLINE void Block_12_9(void) { Block_xx_9(14) ; }
-static INLINE void Block_12_10(void){ Block_xx_10(14) ; }
-static INLINE void Block_12_11(void){ Block_xx_11(14) ; }
-static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14) ; }
+static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12); }
+static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12); }
+static INLINE void Block_12_3(void) { Block_xx_3(12); }
+static INLINE void Block_12_4(void) { Block_xx_4(12); }
+static INLINE void Block_12_5(void) { Block_xx_5(12); }
+static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12); }
+static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14); }
+static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14); }
+static INLINE void Block_12_9(void) { Block_xx_9(14); }
+static INLINE void Block_12_10(void){ Block_xx_10(14); }
+static INLINE void Block_12_11(void){ Block_xx_11(14); }
+static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14); }
 
-#endif
+#endif /* HAVE_INTEL_AVX1 */
 
 #if defined(HAVE_INTEL_AVX2)
 static const unsigned long mBYTE_FLIP_MASK_Y[] =
-   { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
+   { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f };
 
 #define W_from_buff_Y(buff)\
     { /* X0..3(ymm9..12), W_X[0..15] = sha512->buffer[0.15];  */\
-     __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs) ;\
+     __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs);\
      __asm__ volatile("vmovdqu %0, %%ymm12\n\t"\
                       "vmovdqu %1, %%ymm4\n\t"\
                       "vpshufb %%ymm8, %%ymm12, %%ymm12\n\t"\
                       "vpshufb %%ymm8, %%ymm4, %%ymm4\n\t"\
-                      :: "m"(buff[0]),  "m"(buff[4]):YMM_REGs) ;\
+                      :: "m"(buff[0]),  "m"(buff[4]):YMM_REGs);\
      __asm__ volatile("vmovdqu %0, %%ymm5\n\t"\
                       "vmovdqu %1, %%ymm6\n\t"\
                       "vpshufb %%ymm8, %%ymm5, %%ymm5\n\t"\
                       "vpshufb %%ymm8, %%ymm6, %%ymm6\n\t"\
-                      :: "m"(buff[8]),  "m"(buff[12]):YMM_REGs) ;\
+                      :: "m"(buff[8]),  "m"(buff[12]):YMM_REGs);\
     }
 
 #if defined(DEBUG_YMM)
+    #define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs);
+    #define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs);
 
-#define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs);
-#define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs);
-
-#define _DUMP_REG_Y(REG, name)\
-    { word64 buf[16] ;word64 reg[16][2];int k ;\
-      SAVE_REG_Y(4);  SAVE_REG_Y(5);   SAVE_REG_Y(6); SAVE_REG_Y(7); \
-      SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\
-      SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \
-      __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\
-      printf(" "#name":\t") ; for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]) ; printf("\n") ; \
-      RECV_REG_Y(4);  RECV_REG_Y(5);   RECV_REG_Y(6); RECV_REG_Y(7); \
-      RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \
-      RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\
-    }
-
-#define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG)
-#define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG)
-#define PRINTF_Y(fmt, ...)
+    #define _DUMP_REG_Y(REG, name)\
+        { word64 buf[16];word64 reg[16][2];int k;\
+          SAVE_REG_Y(4);  SAVE_REG_Y(5);   SAVE_REG_Y(6); SAVE_REG_Y(7); \
+          SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\
+          SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \
+          __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\
+          printf(" "#name":\t"); for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]); printf("\n"); \
+          RECV_REG_Y(4);  RECV_REG_Y(5);   RECV_REG_Y(6); RECV_REG_Y(7); \
+          RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \
+          RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\
+        }
 
+    #define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG)
+    #define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG)
+    #define PRINTF_Y(fmt, ...)
 #else
-
-#define DUMP_REG_Y(REG)
-#define DUMP_REG2_Y(REG)
-#define PRINTF_Y(fmt, ...)
-
-#endif
+    #define DUMP_REG_Y(REG)
+    #define DUMP_REG2_Y(REG)
+    #define PRINTF_Y(fmt, ...)
+#endif /* DEBUG_YMM */
 
 #define _MOVE_to_REGy(ymm, mem)         __asm__ volatile("vmovdqu %0, %%"#ymm" "\
-                                        :: "m"(mem):YMM_REGs) ;
+                                        :: "m"(mem):YMM_REGs);
 #define _MOVE_to_MEMy(mem,i, ymm)       __asm__ volatile("vmovdqu %%"#ymm", %0" \
-        : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs) ;
+        : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs);
 #define _MOVE_128y(ymm0, ymm1, ymm2, map)  __asm__ volatile("vperm2i128  $"\
-        #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
+        #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs);
 #define _S_TEMPy(dest, src, bits, temp) \
          __asm__ volatile("vpsrlq  $"#bits", %%"#src", %%"#dest"\n\tvpsllq  $64-"#bits\
-        ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
+        ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs);
 #define _AVX2_R(dest, src, bits)        __asm__ volatile("vpsrlq  $"#bits", %%"\
-         #src", %%"#dest" ":::YMM_REGs) ;
+         #src", %%"#dest" ":::YMM_REGs);
 #define _XORy(dest, src1, src2)         __asm__ volatile("vpxor   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _ADDy(dest, src1, src2)         __asm__ volatile("vpaddq   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _BLENDy(map, dest, src1, src2)  __asm__ volatile("vpblendd    $"#map", %%"\
-         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs) ;
+         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs);
 #define _BLENDQy(map, dest, src1, src2) __asm__ volatile("vblendpd   $"#map", %%"\
-         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs) ;
+         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs);
 #define _PERMQy(map, dest, src)         __asm__ volatile("vpermq  $"#map", %%"\
-         #src", %%"#dest" ":::YMM_REGs) ;
+         #src", %%"#dest" ":::YMM_REGs);
 
 #define MOVE_to_REGy(ymm, mem)      _MOVE_to_REGy(ymm, mem)
 #define MOVE_to_MEMy(mem, i, ymm)   _MOVE_to_MEMy(mem, i, ymm)
@@ -957,15 +1016,15 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] =
 #define AVX2_R(dest, src, bits)      _AVX2_R(dest, src, bits)
 
 
-#define    FEEDBACK1_to_W_I_2(w_i_2, w_i)    MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08) ;\
-                                       BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2) ;
+#define    FEEDBACK1_to_W_I_2(w_i_2, w_i)    MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08);\
+                                       BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2);
 
-#define    MOVE_W_to_W_I_15(w_i_15, w_0, w_4)  BLENDQy(0x1, w_i_15, w_4, w_0) ;\
-                                       PERMQy(0x39, w_i_15, w_i_15) ;
-#define    MOVE_W_to_W_I_7(w_i_7,  w_8, w_12)  BLENDQy(0x1, w_i_7, w_12, w_8) ;\
-                                       PERMQy(0x39, w_i_7, w_i_7) ;
-#define    MOVE_W_to_W_I_2(w_i_2,  w_12)       BLENDQy(0xc, w_i_2, w_12, w_i_2) ;\
-                                       PERMQy(0x0e, w_i_2, w_i_2) ;
+#define    MOVE_W_to_W_I_15(w_i_15, w_0, w_4)  BLENDQy(0x1, w_i_15, w_4, w_0);\
+                                       PERMQy(0x39, w_i_15, w_i_15);
+#define    MOVE_W_to_W_I_7(w_i_7,  w_8, w_12)  BLENDQy(0x1, w_i_7, w_12, w_8);\
+                                       PERMQy(0x39, w_i_7, w_i_7);
+#define    MOVE_W_to_W_I_2(w_i_2,  w_12)       BLENDQy(0xc, w_i_2, w_12, w_i_2);\
+                                       PERMQy(0x0e, w_i_2, w_i_2);
 
 
 #define W_I_16y  ymm8
@@ -990,40 +1049,40 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] =
                  /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/
 
 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
-    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
 
 #define MOVE_7_to_15(w_i_15, w_i_7)\
-    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs) ;\
+    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs);\
 
 #define MOVE_I_to_7(w_i_7, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs);\
 
 #define MOVE_I_to_2(w_i_2, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs);\
 
-#endif
+#endif /* HAVE_INTEL_AVX2 */
 
 
 /***  Transform Body ***/
 #if defined(HAVE_INTEL_AVX1)
-
 static int Transform_AVX1(Sha512* sha512)
 {
     const word64* K = K512;
-    word64 W_X[16+4];
+    word64 W_X[16+4] = {0};
     word32 j;
     word64 T[8];
+
     /* Copy digest to working vars */
     XMEMCPY(T, sha512->digest, sizeof(T));
 
-    W_from_buff(W_X, sha512->buffer) ;
+    W_from_buff(W_X, sha512->buffer);
     for (j = 0; j < 80; j += 16) {
         Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3();
         Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X);
@@ -1047,7 +1106,6 @@ static int Transform_AVX1(Sha512* sha512)
     }
 
     /* Add the working vars back into digest */
-
     sha512->digest[0] += a(0);
     sha512->digest[1] += b(0);
     sha512->digest[2] += c(0);
@@ -1058,28 +1116,27 @@ static int Transform_AVX1(Sha512* sha512)
     sha512->digest[7] += h(0);
 
     /* Wipe variables */
-    #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
+#if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
     XMEMSET(W_X, 0, sizeof(word64) * 16);
-    #endif
+#endif
     XMEMSET(T, 0, sizeof(T));
 
     return 0;
 }
-
-#endif
+#endif /* HAVE_INTEL_AVX1 */
 
 #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX)
-
 static int Transform_AVX1_RORX(Sha512* sha512)
 {
     const word64* K = K512;
-    word64 W_X[16+4];
+    word64 W_X[16+4] = {0};
     word32 j;
     word64 T[8];
+
     /* Copy digest to working vars */
     XMEMCPY(T, sha512->digest, sizeof(T));
 
-    W_from_buff(W_X, sha512->buffer) ;
+    W_from_buff(W_X, sha512->buffer);
     for (j = 0; j < 80; j += 16) {
         Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X);
                                     Rx_RORX_3( 0); Block_0_3();
@@ -1117,8 +1174,8 @@ static int Transform_AVX1_RORX(Sha512* sha512)
         Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); Block_12_11();
                                      Rx_RORX_3(15); Block_12_12(W_X);
     }
-    /* Add the working vars back into digest */
 
+    /* Add the working vars back into digest */
     sha512->digest[0] += a(0);
     sha512->digest[1] += b(0);
     sha512->digest[2] += c(0);
@@ -1129,136 +1186,137 @@ static int Transform_AVX1_RORX(Sha512* sha512)
     sha512->digest[7] += h(0);
 
     /* Wipe variables */
-    #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
+#if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
     XMEMSET(W_X, 0, sizeof(word64) * 16);
-    #endif
+#endif
     XMEMSET(T, 0, sizeof(T));
 
     return 0;
 }
-#endif
+#endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_AVX1 && HAVE_INTEL_RORX */
 
 #if defined(HAVE_INTEL_AVX2)
 
 #define s0_1y(dest, src)      AVX2_S(dest, src, 1);
-#define s0_2y(dest, src)      AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest) ;
-#define s0_3y(dest, src)      AVX2_R(G_TEMPy, src, 7);  XORy(dest, G_TEMPy, dest) ;
+#define s0_2y(dest, src)      AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest);
+#define s0_3y(dest, src)      AVX2_R(G_TEMPy, src, 7);  XORy(dest, G_TEMPy, dest);
 
 #define s1_1y(dest, src)      AVX2_S(dest, src, 19);
-#define s1_2y(dest, src)      AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest) ;
-#define s1_3y(dest, src)      AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest) ;
+#define s1_2y(dest, src)      AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest);
+#define s1_3y(dest, src)      AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest);
 
-#define s0_y(dest, src)       s0_1y(dest, src) ; s0_2y(dest, src) ; s0_3y(dest, src)
-#define s1_y(dest, src)       s1_1y(dest, src) ; s1_2y(dest, src) ; s1_3y(dest, src)
+#define s0_y(dest, src)       s0_1y(dest, src); s0_2y(dest, src); s0_3y(dest, src)
+#define s1_y(dest, src)       s1_1y(dest, src); s1_2y(dest, src); s1_3y(dest, src)
 
 
 #define Block_Y_xx_1(i, w_0, w_4, w_8, w_12)\
-    MOVE_W_to_W_I_15(W_I_15y, w_0, w_4) ;\
-    MOVE_W_to_W_I_7 (W_I_7y,  w_8, w_12) ;\
-    MOVE_W_to_W_I_2 (W_I_2y,  w_12) ;\
+    MOVE_W_to_W_I_15(W_I_15y, w_0, w_4);\
+    MOVE_W_to_W_I_7 (W_I_7y,  w_8, w_12);\
+    MOVE_W_to_W_I_2 (W_I_2y,  w_12);\
 
 #define Block_Y_xx_2(i, w_0, w_4, w_8, w_12)\
-    s0_1y (YMM_TEMP0, W_I_15y) ;\
+    s0_1y (YMM_TEMP0, W_I_15y);\
 
 #define Block_Y_xx_3(i, w_0, w_4, w_8, w_12)\
-    s0_2y (YMM_TEMP0, W_I_15y) ;\
+    s0_2y (YMM_TEMP0, W_I_15y);\
 
 #define Block_Y_xx_4(i, w_0, w_4, w_8, w_12)\
-    s0_3y (YMM_TEMP0, W_I_15y) ;\
+    s0_3y (YMM_TEMP0, W_I_15y);\
 
 #define Block_Y_xx_5(i, w_0, w_4, w_8, w_12)\
-    ADDy(W_I_TEMPy, w_0, YMM_TEMP0) ;\
+    ADDy(W_I_TEMPy, w_0, YMM_TEMP0);\
 
 #define Block_Y_xx_6(i, w_0, w_4, w_8, w_12)\
-    ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y) ;\
-    s1_1y (YMM_TEMP0, W_I_2y) ;\
+    ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y);\
+    s1_1y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_7(i, w_0, w_4, w_8, w_12)\
-    s1_2y (YMM_TEMP0, W_I_2y) ;\
+    s1_2y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_8(i, w_0, w_4, w_8, w_12)\
-    s1_3y (YMM_TEMP0, W_I_2y) ;\
-    ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\
+    s1_3y (YMM_TEMP0, W_I_2y);\
+    ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\
 
 #define Block_Y_xx_9(i, w_0, w_4, w_8, w_12)\
-    FEEDBACK1_to_W_I_2(W_I_2y, w_0) ;\
+    FEEDBACK1_to_W_I_2(W_I_2y, w_0);\
 
 #define Block_Y_xx_10(i, w_0, w_4, w_8, w_12) \
-    s1_1y (YMM_TEMP0, W_I_2y) ;\
+    s1_1y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_11(i, w_0, w_4, w_8, w_12) \
-    s1_2y (YMM_TEMP0, W_I_2y) ;\
+    s1_2y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_12(i, w_0, w_4, w_8, w_12)\
-    s1_3y (YMM_TEMP0, W_I_2y) ;\
-    ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\
-    MOVE_to_MEMy(w,0, w_4) ;\
+    s1_3y (YMM_TEMP0, W_I_2y);\
+    ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\
+    MOVE_to_MEMy(w,0, w_4);\
 
 
-static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y) ; }
+static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y); }
 
-static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y) ; }
+static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y); }
 
-static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y) ; }
+static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y); }
 
-static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y) ; }
+static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y); }
 
 
 static int Transform_AVX2(Sha512* sha512)
 {
     const word64* K = K512;
-    word64 w[4] ;
-    word32 j /*, k*/;
+    word64 w[4];
+    word32 j;
     word64 T[8];
+
     /* Copy digest to working vars */
     XMEMCPY(T, sha512->digest, sizeof(T));
 
-    W_from_buff_Y(sha512->buffer) ;
-    MOVE_to_MEMy(w,0, W_0y) ;
+    W_from_buff_Y(sha512->buffer);
+    MOVE_to_MEMy(w,0, W_0y);
     for (j = 0; j < 80; j += 16) {
         Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2();
                                        Ry_3( 0, w[0]); Block_Y_0_3();
@@ -1298,7 +1356,6 @@ static int Transform_AVX2(Sha512* sha512)
     }
 
     /* Add the working vars back into digest */
-
     sha512->digest[0] += a(0);
     sha512->digest[1] += b(0);
     sha512->digest[2] += c(0);
@@ -1309,19 +1366,22 @@ static int Transform_AVX2(Sha512* sha512)
     sha512->digest[7] += h(0);
 
     /* Wipe variables */
-    #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
+#if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
     XMEMSET(W, 0, sizeof(word64) * 16);
-    #endif
+#endif
     XMEMSET(T, 0, sizeof(T));
 
     return 0;
 }
-
-#endif
+#endif /* HAVE_INTEL_AVX2 */
 
 
+
+/* -------------------------------------------------------------------------- */
+/* SHA384 */
+/* -------------------------------------------------------------------------- */
 #ifdef WOLFSSL_SHA384
-int wc_InitSha384(Sha384* sha384)
+static int InitSha384(Sha384* sha384)
 {
     if (sha384 == NULL) {
         return BAD_FUNC_ARG;
@@ -1340,10 +1400,6 @@ int wc_InitSha384(Sha384* sha384)
     sha384->loLen   = 0;
     sha384->hiLen   = 0;
 
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ;
-#endif
-
     return 0;
 }
 
@@ -1353,29 +1409,148 @@ int wc_Sha384Update(Sha384* sha384, const byte* data, word32 len)
         return BAD_FUNC_ARG;
     }
 
-    return Sha512Update((Sha512 *)sha384, data, len);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return Sha512Update((Sha512*)sha384, data, len);
 }
 
 
 int wc_Sha384Final(Sha384* sha384, byte* hash)
 {
-    int ret;
-
     if (sha384 == NULL || hash == NULL) {
         return BAD_FUNC_ARG;
     }
+    int ret;
 
-    ret = Sha512Final((Sha512 *)sha384);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha384(&sha384->asyncDev, hash, NULL,
+                                            SHA384_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    ret = Sha512Final((Sha512*)sha384);
     if (ret != 0)
         return ret;
 
     XMEMCPY(hash, sha384->digest, SHA384_DIGEST_SIZE);
 
-    return wc_InitSha384(sha384);  /* reset state */
+    return InitSha384(sha384);  /* reset state */
 }
+
+
+int wc_InitSha384_ex(Sha384* sha384, void* heap, int devId)
+{
+    int ret;
+
+    if (sha384 == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    sha384->heap = heap;
+    ret = InitSha384(sha384);
+    if (ret != 0)
+        return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384,
+                                                           sha384->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
+}
+
+int wc_InitSha384(Sha384* sha384)
+{
+    return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID);
+}
+
+void wc_Sha384Free(Sha384* sha384)
+{
+    if (sha384 == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
 #endif /* WOLFSSL_SHA384 */
 
 #endif /* HAVE_FIPS */
 
-#endif /* WOLFSSL_SHA512 */
 
+int wc_Sha512GetHash(Sha512* sha512, byte* hash)
+{
+    int ret;
+    Sha512 tmpSha512;
+
+    if (sha512 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha512Copy(sha512, &tmpSha512);
+    if (ret == 0) {
+        ret = wc_Sha512Final(&tmpSha512, hash);
+    }
+    return ret;
+}
+
+int wc_Sha512Copy(Sha512* src, Sha512* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha512));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+
+#ifdef WOLFSSL_SHA384
+int wc_Sha384GetHash(Sha384* sha384, byte* hash)
+{
+    int ret;
+    Sha384 tmpSha384;
+
+    if (sha384 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha384Copy(sha384, &tmpSha384);
+    if (ret == 0) {
+        ret = wc_Sha384Final(&tmpSha384, hash);
+    }
+    return ret;
+}
+int wc_Sha384Copy(Sha384* src, Sha384* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha384));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+#endif /* WOLFSSL_SHA384 */
+
+#endif /* WOLFSSL_SHA512 */
diff --git a/wolfcrypt/src/signature.c b/wolfcrypt/src/signature.c
index 388aafde0..247d5d931 100644
--- a/wolfcrypt/src/signature.c
+++ b/wolfcrypt/src/signature.c
@@ -175,7 +175,15 @@ int wc_SignatureVerify(
                 int is_valid_sig = 0;
 
                 /* Perform verification of signature using provided ECC key */
-                ret = wc_ecc_verify_hash(sig, sig_len, hash_data, hash_len, &is_valid_sig, (ecc_key*)key);
+                do {
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+                #endif
+                if (ret >= 0)
+                    ret = wc_ecc_verify_hash(sig, sig_len, hash_data, hash_len,
+                        &is_valid_sig, (ecc_key*)key);
+                } while (ret == WC_PENDING_E);
                 if (ret != 0 || is_valid_sig != 1) {
                     ret = SIG_VERIFY_E;
                 }
@@ -212,8 +220,15 @@ int wc_SignatureVerify(
                 plain_data = (byte*)XMALLOC(plain_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
                 if (plain_data) {
                     /* Perform verification of signature using provided RSA key */
-                    ret = wc_RsaSSL_Verify(sig, sig_len, plain_data, plain_len,
-                        (RsaKey*)key);
+                    do {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev,
+                            WC_ASYNC_FLAG_CALL_AGAIN);
+                    #endif
+                    if (ret >= 0)
+                        ret = wc_RsaSSL_Verify(sig, sig_len, plain_data,
+                            plain_len, (RsaKey*)key);
+                    } while (ret == WC_PENDING_E);
                     if (ret >= 0) {
                         if ((word32)ret == hash_len &&
                                 XMEMCMP(plain_data, hash_data, hash_len) == 0) {
@@ -296,7 +311,15 @@ int wc_SignatureGenerate(
             case WC_SIGNATURE_TYPE_ECC:
 #if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN)
                 /* Create signature using provided ECC key */
-                ret = wc_ecc_sign_hash(hash_data, hash_len, sig, sig_len, rng, (ecc_key*)key);
+                do {
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+                #endif
+                if (ret >= 0)
+                    ret = wc_ecc_sign_hash(hash_data, hash_len, sig, sig_len,
+                        rng, (ecc_key*)key);
+                } while (ret == WC_PENDING_E);
 #else
                 ret = SIG_TYPE_E;
 #endif
@@ -319,7 +342,15 @@ int wc_SignatureGenerate(
             case WC_SIGNATURE_TYPE_RSA:
 #ifndef NO_RSA
                 /* Create signature using provided RSA key */
-                ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, (RsaKey*)key, rng);
+                do {
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+                #endif
+                    if (ret >= 0)
+                        ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+                            (RsaKey*)key, rng);
+                } while (ret == WC_PENDING_E);
                 if (ret >= 0) {
                     *sig_len = ret;
                     ret = 0; /* Success */
diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c
index cc74abd06..54f0dce54 100644
--- a/wolfcrypt/src/tfm.c
+++ b/wolfcrypt/src/tfm.c
@@ -988,7 +988,7 @@ int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 
   fp_init(&t);
   fp_mul(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
     err = fp_mod(&t, c, &t);
     fp_copy(&t, d);
@@ -1009,7 +1009,7 @@ int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 
   fp_init(&t);
   fp_sub(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
     err = fp_mod(&t, c, &t);
     fp_copy(&t, d);
@@ -1030,7 +1030,7 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 
   fp_init(&t);
   fp_add(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
     err = fp_mod(&t, c, &t);
     fp_copy(&t, d);
@@ -2194,7 +2194,7 @@ void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
    fp_int tmp;
    fp_init(&tmp);
    fp_set(&tmp, b);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
    if (c->size < FP_SIZE) {
      fp_sub(a, &tmp, &tmp);
      fp_copy(&tmp, c);
@@ -2218,8 +2218,11 @@ int mp_init (mp_int * a)
 
 void fp_init(fp_int *a)
 {
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     a->size = FP_SIZE;
+#endif
+#ifdef HAVE_WOLF_BIGINT
+    wc_bigint_init(&a->raw);
 #endif
     fp_zero(a);
 }
@@ -2229,7 +2232,7 @@ void fp_zero(fp_int *a)
     int size = FP_SIZE;
     a->used = 0;
     a->sign = FP_ZPOS;
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
 #endif
     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
@@ -2240,17 +2243,52 @@ void fp_clear(fp_int *a)
     int size = FP_SIZE;
     a->used = 0;
     a->sign = FP_ZPOS;
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+    size = a->size;
+#endif
+    XMEMSET(a->dp, 0, size * sizeof(fp_digit));
+    fp_free(a);
+}
+
+void fp_forcezero (mp_int * a)
+{
+    int size = FP_SIZE;
+    a->used = 0;
+    a->sign = FP_ZPOS;
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
 #endif
     ForceZero(a->dp, size * sizeof(fp_digit));
+#ifdef HAVE_WOLF_BIGINT
+    wc_bigint_zero(&a->raw);
+#endif
+    fp_free(a);
+}
+
+void mp_forcezero (mp_int * a)
+{
+    fp_forcezero(a);
+}
+
+void fp_free(fp_int* a)
+{
+#ifdef HAVE_WOLF_BIGINT
+    wc_bigint_free(&a->raw);
+#else
+    (void)a;
+#endif
 }
 
 
 /* clear one (frees)  */
 void mp_clear (mp_int * a)
 {
-    fp_zero(a);
+    fp_clear(a);
+}
+
+void mp_free(mp_int* a)
+{
+    fp_free(a);
 }
 
 /* handle up to 6 inits */
@@ -2425,7 +2463,7 @@ void fp_copy(fp_int *a, fp_int *b)
 {
     /* if source and destination are different */
     if (a != b) {
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
         /* verify a will fit in b */
         if (b->size >= a->used) {
             int x, oldused;
@@ -2524,7 +2562,7 @@ int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
   fp_init(&t);
   fp_sqr(a, &t);
 
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (c->size < FP_SIZE) {
     err = fp_mod(&t, b, &t);
     fp_copy(&t, c);
@@ -3271,7 +3309,7 @@ void mp_dump(const char* desc, mp_int* a, byte verbose)
   char buffer[FP_SIZE * sizeof(fp_digit) * 2];
   int size = FP_SIZE;
 
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   size = a->size;
 #endif
 
diff --git a/wolfcrypt/src/wc_port.c b/wolfcrypt/src/wc_port.c
index 4d75a2253..05feaac96 100755
--- a/wolfcrypt/src/wc_port.c
+++ b/wolfcrypt/src/wc_port.c
@@ -29,6 +29,12 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_ECC
+    #include 
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
 
 /* IPP header files for library initialization */
 #ifdef HAVE_FAST_RSA
@@ -48,6 +54,10 @@
     #include 
 #endif
 
+#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
+    #include 
+#endif
+
 #ifdef _MSC_VER
     /* 4996 warning to use MS extensions e.g., strcpy_s instead of strncpy */
     #pragma warning(disable: 4996)
@@ -64,6 +74,14 @@ int wolfCrypt_Init(void)
     int ret = 0;
 
     if (initRefCount == 0) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        wolfAsync_HardwareStart();
+    #endif
+
+    #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
+        InitMemoryTracker();
+    #endif
+
     #if WOLFSSL_CRYPT_HW_MUTEX
         /* If crypto hardware mutex protection is enabled, then initialize it */
         wolfSSL_CryptHwMutexInit();
@@ -96,7 +114,7 @@ int wolfCrypt_Init(void)
     #endif
 
     #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-            wolfSSL_EVP_init();
+        wolfSSL_EVP_init();
     #endif
 
     #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
@@ -106,6 +124,15 @@ int wolfCrypt_Init(void)
         }
     #endif
 
+    #ifdef HAVE_ECC
+        #ifdef ECC_CACHE_CURVE
+            if ((ret = wc_ecc_curve_cache_init()) != 0) {
+                WOLFSSL_MSG("Error creating curve cache");
+                return ret;
+            }
+        #endif
+    #endif
+
         initRefCount = 1;
     }
 
@@ -120,9 +147,28 @@ int wolfCrypt_Cleanup(void)
 
     WOLFSSL_ENTER("wolfCrypt_Cleanup");
 
-    #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
-        ret = wc_LoggingCleanup();
+#ifdef HAVE_ECC
+    #ifdef FP_ECC
+        wc_ecc_fp_free();
     #endif
+    #ifdef ECC_CACHE_CURVE
+        wc_ecc_curve_cache_free();
+    #endif
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+    ret = wc_LoggingCleanup();
+#endif
+
+#if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
+    ShowMemoryTracker();
+#endif
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    wolfAsync_HardwareStop();
+#endif
+
+    initRefCount = 0; /* allow re-init */
 
     return ret;
 }
diff --git a/wolfcrypt/src/wolfevent.c b/wolfcrypt/src/wolfevent.c
index 6a8379bfe..6e3eae2b3 100644
--- a/wolfcrypt/src/wolfevent.c
+++ b/wolfcrypt/src/wolfevent.c
@@ -103,16 +103,7 @@ int wolfEventQueue_Push(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
     event->next = NULL;
     event->pending = 1;
 
-    if (queue->tail == NULL)  {
-        queue->head = event;
-    }
-    else {
-        queue->tail->next = event;
-        event->prev = queue->tail;
-    }
-    queue->tail = event;      /* add to the end either way */
-    queue->count++;
-    ret = 0;
+    ret = wolfEventQueue_Add(queue, event);
 
 #ifndef SINGLE_THREADED
     wc_UnLockMutex(&queue->lock);
@@ -147,6 +138,26 @@ int wolfEventQueue_Pop(WOLF_EVENT_QUEUE* queue, WOLF_EVENT** event)
     return ret;
 }
 
+/* assumes queue is locked by caller */
+int wolfEventQueue_Add(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
+{
+    if (queue == NULL || event == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (queue->tail == NULL)  {
+        queue->head = event;
+    }
+    else {
+        queue->tail->next = event;
+        event->prev = queue->tail;
+    }
+    queue->tail = event;      /* add to the end either way */
+    queue->count++;
+
+    return 0;
+}
+
 /* assumes queue is locked by caller */
 int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
 {
diff --git a/wolfcrypt/src/wolfmath.c b/wolfcrypt/src/wolfmath.c
index 462e89fe0..2f368989d 100644
--- a/wolfcrypt/src/wolfmath.c
+++ b/wolfcrypt/src/wolfmath.c
@@ -40,6 +40,18 @@
 
 #if defined(USE_FAST_MATH) || !defined(NO_BIG_INT)
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
+#ifdef NO_INLINE
+    #include 
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include 
+#endif
+
+
 int get_digit_count(mp_int* a)
 {
     if (a == NULL)
@@ -61,6 +73,7 @@ int get_rand_digit(WC_RNG* rng, mp_digit* d)
     return wc_RNG_GenerateBlock(rng, (byte*)d, sizeof(mp_digit));
 }
 
+#ifdef WC_RSA_BLINDING
 int mp_rand(mp_int* a, int digits, WC_RNG* rng)
 {
     int ret;
@@ -103,5 +116,134 @@ int mp_rand(mp_int* a, int digits, WC_RNG* rng)
 
     return ret;
 }
+#endif /* WC_RSA_BLINDING */
 
-#endif
+
+#ifdef HAVE_WOLF_BIGINT
+void wc_bigint_init(WC_BIGINT* a)
+{
+    if (a != NULL) {
+        a->buf = NULL;
+        a->len = 0;
+        a->heap = NULL;
+    }
+}
+
+int wc_bigint_alloc(WC_BIGINT* a, word32 sz)
+{
+    int err = MP_OKAY;
+
+    if (a == NULL)
+        return BAD_FUNC_ARG;
+
+    if (sz > 0) {
+        if (a->buf && sz > a->len) {
+            wc_bigint_free(a);
+        }
+        if (a->buf == NULL) {
+            a->buf = (byte*)XMALLOC(sz, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+        }
+        if (a->buf == NULL) {
+            err = MP_MEM;
+        }
+        else {
+            XMEMSET(a->buf, 0, sz);
+        }
+    }
+    a->len = sz;
+
+    return err;
+}
+
+/* assumes input is big endian format */
+int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen)
+{
+    int err;
+
+    if (a == NULL || in == NULL || inlen == 0)
+        return BAD_FUNC_ARG;
+
+    err = wc_bigint_alloc(a, inlen);
+    if (err == 0) {
+        XMEMCPY(a->buf, in, inlen);
+    }
+
+    return err;
+}
+
+int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen)
+{
+    word32 sz;
+
+    if (a == NULL || out == NULL || outlen == NULL || *outlen == 0)
+        return BAD_FUNC_ARG;
+
+    /* trim to fit into output buffer */
+    sz = a->len;
+    if (a->len > *outlen) {
+        WOLFSSL_MSG("wc_bigint_export: Truncating output");
+        sz = *outlen;
+    }
+
+    if (a->buf) {
+        XMEMCPY(out, a->buf, sz);
+    }
+
+    *outlen = sz;
+
+    return MP_OKAY;
+}
+
+void wc_bigint_zero(WC_BIGINT* a)
+{
+    if (a && a->buf) {
+        ForceZero(a->buf, a->len);
+    }
+}
+
+void wc_bigint_free(WC_BIGINT* a)
+{
+    if (a) {
+        if (a->buf) {
+          XFREE(a->buf, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+        }
+        a->buf = NULL;
+        a->len = 0;
+    }
+}
+
+int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst)
+{
+    int err;
+    word32 sz;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    sz = mp_unsigned_bin_size(src);
+    err = wc_bigint_alloc(dst, sz);
+    if (err == MP_OKAY)
+        err = mp_to_unsigned_bin(src, dst->buf);
+
+    return err;
+}
+
+int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst)
+{
+    int err;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    if (src->buf == NULL)
+        return BAD_FUNC_ARG;
+
+    err = mp_read_unsigned_bin(dst, src->buf, src->len);
+    wc_bigint_free(src);
+
+    return err;
+}
+
+#endif /* HAVE_WOLF_BIGINT */
+
+#endif /* USE_FAST_MATH || !NO_BIG_INT */
diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c
index 654742bfa..ce24a6511 100644
--- a/wolfcrypt/test/test.c
+++ b/wolfcrypt/test/test.c
@@ -110,6 +110,14 @@
     #include 
 #endif
 
+/* only for stack size check */
+#ifdef HAVE_STACK_SIZE
+    #include 
+    #define err_sys err_sys_remap /* remap err_sys */
+    #include 
+    #undef err_sys
+#endif
+
 #ifdef _MSC_VER
     /* 4996 warning to use MS extensions e.g., strcpy_s instead of strncpy */
     #pragma warning(disable: 4996)
@@ -169,9 +177,6 @@
 
 #include "wolfcrypt/test/test.h"
 
-#ifdef USE_WOLFSSL_MEMORY
-    #include "wolfssl/wolfcrypt/mem_track.h"
-#endif
 
 /* for async devices */
 static int devId = INVALID_DEVID;
@@ -285,7 +290,8 @@ int mutex_test(void);
 int memcb_test(void);
 #endif
 
-#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND) && !defined(OPENSSL_EXTRA)
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND) && \
+        !defined(OPENSSL_EXTRA) && !defined(HAVE_STACK_SIZE)
     int  wolfSSL_Debugging_ON(void);
 #endif
 
@@ -295,22 +301,25 @@ int memcb_test(void);
 
 #define ERROR_OUT(err, eLabel) { ret = (err); goto eLabel; }
 
-
+#ifdef HAVE_STACK_SIZE
+static THREAD_RETURN err_sys(const char* msg, int es)
+#else
 static int err_sys(const char* msg, int es)
-
+#endif
 {
     printf("%s error = %d\n", msg, es);
 
     EXIT_TEST(-1);
 }
 
-/* func_args from test.h, so don't have to pull in other junk */
+#ifndef HAVE_STACK_SIZE
+/* func_args from test.h, so don't have to pull in other stuff */
 typedef struct func_args {
     int    argc;
     char** argv;
     int    return_code;
 } func_args;
-
+#endif /* !HAVE_STACK_SIZE */
 
 #ifdef HAVE_FIPS
 
@@ -328,31 +337,34 @@ static void myFipsCb(int ok, int err, const char* hash)
 
 #endif /* HAVE_FIPS */
 
-int wolfcrypt_test(void* args)
-{
-    int ret = 0;
 #ifdef WOLFSSL_STATIC_MEMORY
     #ifdef BENCH_EMBEDDED
-        byte memory[10000];
+        static byte gTestMemory[10000];
+    #elif defined(USE_FAST_MATH) && !defined(ALT_ECC_SIZE)
+        static byte gTestMemory[130000];
     #else
-        byte memory[100000];
+        static byte gTestMemory[80000];
     #endif
 #endif
 
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD wolfcrypt_test(void* args)
+#else
+int wolfcrypt_test(void* args)
+#endif
+{
+    int ret;
+
     ((func_args*)args)->return_code = -1; /* error state */
 
 #ifdef WOLFSSL_STATIC_MEMORY
-    if (wc_LoadStaticMemory(&HEAP_HINT, memory, sizeof(memory),
+    if (wc_LoadStaticMemory(&HEAP_HINT, gTestMemory, sizeof(gTestMemory),
                                                 WOLFMEM_GENERAL, 1) != 0) {
         printf("unable to load static memory");
         exit(EXIT_FAILURE);
     }
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    InitMemoryTracker();
-#endif
-
 #if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
     wolfSSL_Debugging_ON();
 #endif
@@ -378,9 +390,8 @@ int wolfcrypt_test(void* args)
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevOpen(&devId);
-    if (ret != 0) {
-        err_sys("Async device open failed", -1236);
-        return -1236;
+    if (ret < 0) {
+        printf("Async device open failed\nRunning without async\n");
     }
 #else
     (void)devId;
@@ -736,12 +747,6 @@ int wolfcrypt_test(void* args)
         else
             printf( "ECC buffer test passed!\n");
     #endif
-    #if defined(FP_ECC)
-        wc_ecc_fp_free();
-    #endif
-    #ifdef ECC_CACHE_CURVE
-        wc_ecc_curve_cache_free();
-    #endif
 #endif
 
 #ifdef HAVE_CURVE25519
@@ -815,13 +820,13 @@ int wolfcrypt_test(void* args)
         printf( "memcb    test passed!\n");
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    ShowMemoryTracker();
+#ifdef WOLFSSL_ASYNC_CRYPT
+    wolfAsync_DevClose(&devId);
 #endif
 
     ((func_args*)args)->return_code = ret;
 
-    return ret;
+    EXIT_TEST(ret);
 }
 
 
@@ -845,10 +850,14 @@ int wolfcrypt_test(void* args)
 
         wolfCrypt_Init();
 
+    #ifdef HAVE_STACK_SIZE
+        StackSizeCheck(&args, wolfcrypt_test);
+    #else
         wolfcrypt_test(&args);
+    #endif
 
         if (wolfCrypt_Cleanup() != 0) {
-            return err_sys("Error with wolfCrypt_Cleanup!\n", -1239);
+            err_sys("Error with wolfCrypt_Cleanup!\n", -1239);
         }
 
 #ifdef HAVE_WNR
@@ -856,7 +865,7 @@ int wolfcrypt_test(void* args)
             err_sys("Failed to free netRandom context", -1238);
 #endif /* HAVE_WNR */
 
-        EXIT_TEST(args.return_code);
+        return args.return_code;
     }
 
 #endif /* NO_MAIN_DRIVER */
@@ -891,7 +900,7 @@ int error_test()
      * APIs. Check that the values that are not errors map to the unknown
      * string.
      */
-    for (i = OPEN_RAN_E; i >= BAD_PATH_ERROR; i--) {
+    for (i = MAX_CODE_E-1; i >= WC_LAST_E; i--) {
         errStr = wc_GetErrorString(i);
         wc_ErrorString(i, out);
 
@@ -1025,8 +1034,7 @@ int base64_test()
 int asn_test()
 {
 #ifndef NO_ASN_TIME
-    {
-    time_t now;
+    long now;
 
     /* Parameter Validation tests. */
     if (wc_GetTime(NULL, sizeof(now)) != BAD_FUNC_ARG)
@@ -1039,7 +1047,6 @@ int asn_test()
         return -102;
     if (now == 0)
         return -103;
-    }
 #endif
 
     return 0;
@@ -1125,9 +1132,10 @@ int md2_test()
 #ifndef NO_MD5
 int md5_test(void)
 {
+    int ret;
     Md5  md5;
-    Md5  partialMd5;
     byte hash[MD5_DIGEST_SIZE];
+    byte hashcopy[MD5_DIGEST_SIZE];
 
     testVector a, b, c, d, e;
     testVector test_md5[5];
@@ -1171,35 +1179,31 @@ int md5_test(void)
     test_md5[3] = d;
     test_md5[4] = e;
 
-    wc_InitMd5(&md5);
+    ret = wc_InitMd5_ex(&md5, HEAP_HINT, devId);
+    if (ret != 0)
+        return -4009;
 
     for (i = 0; i < times; ++i) {
-        wc_Md5Update(&md5, (byte*)test_md5[i].input, (word32)test_md5[i].inLen);
-        wc_Md5Final(&md5, hash);
+        ret = wc_Md5Update(&md5, (byte*)test_md5[i].input, (word32)test_md5[i].inLen);
+        if (ret != 0)
+            return -4010;
+
+        ret = wc_Md5GetHash(&md5, hashcopy);
+        if (ret != 0)
+            return -4011;
+
+        ret = wc_Md5Final(&md5, hash);
+        if (ret != 0)
+            return -4012;
 
         if (XMEMCMP(hash, test_md5[i].output, MD5_DIGEST_SIZE) != 0)
-            return -5 - i;
+            return -10 - i;
+
+        if (XMEMCMP(hash, hashcopy, MD5_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Position restoration and getting the hash doesn't invalidate state. */
-    wc_InitMd5(&md5);
-    wc_InitMd5(&partialMd5);
-    wc_Md5Update(&partialMd5, (byte*)a.input, 1);
-    wc_Md5RestorePos(&md5, &partialMd5);
-    wc_Md5GetHash(&partialMd5, hash);
-    wc_Md5Update(&partialMd5, (byte*)a.input + 1, (word32)a.inLen - 1);
-    wc_Md5Update(&md5, (byte*)a.input + 1, (word32)a.inLen - 1);
-    wc_Md5Final(&partialMd5, hash);
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -10;
-    XMEMSET(hash, 0, a.outLen);
-    wc_Md5Final(&md5, hash);
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -11;
-    if (wc_Md5Hash((byte*)a.input, (word32)a.inLen, hash) != 0)
-        return -12;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -13;
+    wc_Md5Free(&md5);
 
     return 0;
 }
@@ -1289,8 +1293,8 @@ int md4_test(void)
 int sha_test(void)
 {
     Sha  sha;
-    Sha  partialSha;
     byte hash[SHA_DIGEST_SIZE];
+    byte hashcopy[SHA_DIGEST_SIZE];
 
     testVector a, b, c, d;
     testVector test_sha[4];
@@ -1329,54 +1333,31 @@ int sha_test(void)
     test_sha[2] = c;
     test_sha[3] = d;
 
-    ret = wc_InitSha(&sha);
+    ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4001;
 
     for (i = 0; i < times; ++i) {
-        wc_ShaUpdate(&sha, (byte*)test_sha[i].input, (word32)test_sha[i].inLen);
-        wc_ShaFinal(&sha, hash);
+        ret = wc_ShaUpdate(&sha, (byte*)test_sha[i].input, (word32)test_sha[i].inLen);
+        if (ret != 0)
+            return -4002;
+
+        ret = wc_ShaGetHash(&sha, hashcopy);
+        if (ret != 0)
+            return -4003;
+
+        ret = wc_ShaFinal(&sha, hash);
+        if (ret != 0)
+            return -4004;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA_DIGEST_SIZE) != 0)
             return -10 - i;
+
+        if (XMEMCMP(hash, hashcopy, SHA_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Position restoration and getting the hash doesn't invalidate state. */
-    ret = wc_InitSha(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_InitSha(&partialSha);
-    if (ret != 0)
-        return -21;
-    ret = wc_ShaUpdate(&partialSha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -22;
-    wc_ShaRestorePos(&sha, &partialSha);
-    ret = wc_ShaGetHash(&partialSha, hash);
-    if (ret != 0)
-        return -23;
-    ret = wc_ShaUpdate(&partialSha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -24;
-    ret = wc_ShaUpdate(&sha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -25;
-    ret = wc_ShaFinal(&partialSha, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_ShaFinal(&sha, hash);
-    if (ret != 0)
-        return -28;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -29;
-    ret = wc_ShaHash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -30;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -31;
+    wc_ShaFree(&sha);
 
     return 0;
 }
@@ -1518,6 +1499,7 @@ int sha224_test(void)
 {
     Sha224 sha;
     byte   hash[SHA224_DIGEST_SIZE];
+    byte   hashcopy[SHA224_DIGEST_SIZE];
 
     testVector a, b;
     testVector test_sha[2];
@@ -1539,7 +1521,7 @@ int sha224_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha224(&sha);
+    ret = wc_InitSha224_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4005;
 
@@ -1547,13 +1529,20 @@ int sha224_test(void)
         ret = wc_Sha224Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4006;
-        ret = wc_Sha224Final(&sha, hash);
+        ret = wc_Sha224GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4007;
+        ret = wc_Sha224Final(&sha, hash);
+        if (ret != 0)
+            return -4008;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA224_DIGEST_SIZE) != 0)
             return -10 - i;
+
+        if (XMEMCMP(hash, hashcopy, SHA224_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
+    wc_Sha224Free(&sha);
 
     /* Getting the hash doesn't invalidate state. */
     ret = wc_InitSha224(&sha);
@@ -1588,8 +1577,8 @@ int sha224_test(void)
 int sha256_test(void)
 {
     Sha256 sha;
-    Sha256 partialSha;
     byte   hash[SHA256_DIGEST_SIZE];
+    byte   hashcopy[SHA256_DIGEST_SIZE];
 
     testVector a, b;
     testVector test_sha[2];
@@ -1613,7 +1602,7 @@ int sha256_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha256(&sha);
+    ret = wc_InitSha256_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4005;
 
@@ -1621,51 +1610,20 @@ int sha256_test(void)
         ret = wc_Sha256Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4006;
-        ret = wc_Sha256Final(&sha, hash);
+        ret = wc_Sha256GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4007;
+        ret = wc_Sha256Final(&sha, hash);
+        if (ret != 0)
+            return -4008;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA256_DIGEST_SIZE) != 0)
             return -10 - i;
+        if (XMEMCMP(hash, hashcopy, SHA256_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Position restoration and getting the hash doesn't invalidate state. */
-    ret = wc_InitSha256(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_InitSha256(&partialSha);
-    if (ret != 0)
-        return -21;
-    ret = wc_Sha256Update(&partialSha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -22;
-    wc_Sha256RestorePos(&sha, &partialSha);
-    ret = wc_Sha256GetHash(&partialSha, hash);
-    if (ret != 0)
-        return -23;
-    ret = wc_Sha256Update(&partialSha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -24;
-    ret = wc_Sha256Update(&sha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -25;
-    ret = wc_Sha256Final(&partialSha, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_Sha256Final(&sha, hash);
-    if (ret != 0)
-        return -28;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -29;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_Sha256Hash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -30;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -31;
+    wc_Sha256Free(&sha);
 
     return 0;
 }
@@ -1677,6 +1635,7 @@ int sha512_test(void)
 {
     Sha512 sha;
     byte   hash[SHA512_DIGEST_SIZE];
+    byte   hashcopy[SHA512_DIGEST_SIZE];
     int    ret;
 
     testVector a, b;
@@ -1705,7 +1664,7 @@ int sha512_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha512(&sha);
+    ret = wc_InitSha512_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4009;
 
@@ -1713,38 +1672,20 @@ int sha512_test(void)
         ret = wc_Sha512Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4010;
-
-        ret = wc_Sha512Final(&sha, hash);
+        ret = wc_Sha512GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4011;
+        ret = wc_Sha512Final(&sha, hash);
+        if (ret != 0)
+            return -4012;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA512_DIGEST_SIZE) != 0)
             return -10 - i;
+        if (XMEMCMP(hash, hashcopy, SHA512_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Getting the hash doesn't invalidate state. */
-    ret = wc_InitSha512(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_Sha512Update(&sha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -21;
-    ret = wc_Sha512GetHash(&sha, hash);
-    if (ret != 0)
-        return -22;
-    ret = wc_Sha512Update(&sha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -23;
-    ret = wc_Sha512Final(&sha, hash);
-    if (ret != 0)
-        return -24;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -15;
-    ret = wc_Sha512Hash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
+    wc_Sha512Free(&sha);
 
     return 0;
 }
@@ -1756,6 +1697,7 @@ int sha384_test(void)
 {
     Sha384 sha;
     byte   hash[SHA384_DIGEST_SIZE];
+    byte   hashcopy[SHA384_DIGEST_SIZE];
     int    ret;
 
     testVector a, b;
@@ -1782,7 +1724,7 @@ int sha384_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha384(&sha);
+    ret = wc_InitSha384_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4012;
 
@@ -1790,39 +1732,20 @@ int sha384_test(void)
         ret = wc_Sha384Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4013;
-
-        ret = wc_Sha384Final(&sha, hash);
+        ret = wc_Sha384GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4014;
+        ret = wc_Sha384Final(&sha, hash);
+        if (ret != 0)
+            return -4015;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA384_DIGEST_SIZE) != 0)
             return -10 - i;
+        if (XMEMCMP(hash, hashcopy, SHA384_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Getting the hash doesn't invalidate state. */
-    ret = wc_InitSha384(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_Sha384Update(&sha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -21;
-    ret = wc_Sha384GetHash(&sha, hash);
-    if (ret != 0)
-        return -22;
-    ret = wc_Sha384Update(&sha, (byte*)a.input + 1, (word32)(a.inLen - 1));
-    if (ret != 0)
-        return -23;
-    ret = wc_Sha384Final(&sha, hash);
-    if (ret != 0)
-        return -24;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -25;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_Sha384Hash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
+    wc_Sha384Free(&sha);
 
     return 0;
 }
@@ -2041,11 +1964,9 @@ int hmac_md5_test(void)
         }
     #endif
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0) {
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0) {
             return -20009;
         }
-    #endif
 
         ret = wc_HmacSetKey(&hmac, MD5, (byte*)keys[i], (word32)XSTRLEN(keys[i]));
         if (ret != 0)
@@ -2061,9 +1982,8 @@ int hmac_md5_test(void)
         if (XMEMCMP(hash, test_hmac[i].output, MD5_DIGEST_SIZE) != 0)
             return -20 - i;
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-    #endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2126,10 +2046,10 @@ int hmac_sha_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
             return -20010;
-#endif
+
         ret = wc_HmacSetKey(&hmac, SHA, (byte*)keys[i], (word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4018;
@@ -2143,9 +2063,8 @@ int hmac_sha_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2209,10 +2128,10 @@ int hmac_sha224_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
             return -20011;
-#endif
+
         ret = wc_HmacSetKey(&hmac, SHA224, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4021;
@@ -2226,9 +2145,8 @@ int hmac_sha224_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA224_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2295,10 +2213,10 @@ int hmac_sha256_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
             return -20011;
-#endif
+
         ret = wc_HmacSetKey(&hmac, SHA256, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4021;
@@ -2312,9 +2230,8 @@ int hmac_sha256_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA256_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2385,14 +2302,13 @@ int hmac_blake2b_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #ifdef HAVE_CAVIUM_V
-        /* Blake2 not supported on Cavium V, but SHA3 is */
-        return 0;
+
+    #if defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+        /* Blake2 only supported on Cavium Nitrox III */
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+            return -20012;
     #endif
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
-            return -20011;
-#endif
+
         ret = wc_HmacSetKey(&hmac, BLAKE2B_ID, (byte*)keys[i],
                          (word32)XSTRLEN(keys[i]));
         if (ret != 0)
@@ -2407,9 +2323,8 @@ int hmac_blake2b_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, BLAKE2B_256) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2479,6 +2394,10 @@ int hmac_sha384_test(void)
         if (i == 1)
             continue; /* fips not allowed */
 #endif
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+            return -20013;
+
         ret = wc_HmacSetKey(&hmac, SHA384, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4027;
@@ -2492,6 +2411,8 @@ int hmac_sha384_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA384_DIGEST_SIZE) != 0)
             return -20 - i;
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2564,6 +2485,10 @@ int hmac_sha512_test(void)
         if (i == 1)
             continue; /* fips not allowed */
 #endif
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+            return -20014;
+
         ret = wc_HmacSetKey(&hmac, SHA512, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4030;
@@ -2577,6 +2502,8 @@ int hmac_sha512_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA512_DIGEST_SIZE) != 0)
             return -20 - i;
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2640,12 +2567,10 @@ int arc4_test(void)
         if (i == 3)
             keylen = 4;
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_Arc4AsyncInit(&enc, devId) != 0)
+        if (wc_Arc4Init(&enc, HEAP_HINT, devId) != 0)
             return -20001;
-        if (wc_Arc4AsyncInit(&dec, devId) != 0)
+        if (wc_Arc4Init(&dec, HEAP_HINT, devId) != 0)
             return -20002;
-    #endif
 
         wc_Arc4SetKey(&enc, (byte*)keys[i], keylen);
         wc_Arc4SetKey(&dec, (byte*)keys[i], keylen);
@@ -2660,10 +2585,8 @@ int arc4_test(void)
         if (XMEMCMP(cipher, test_arc4[i].output, test_arc4[i].outLen))
             return -20 - 5 - i;
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        wc_Arc4AsyncFree(&enc);
-        wc_Arc4AsyncFree(&dec);
-    #endif
+        wc_Arc4Free(&enc);
+        wc_Arc4Free(&dec);
     }
 
     return 0;
@@ -3535,12 +3458,11 @@ int des3_test(void)
     int ret;
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_Des3AsyncInit(&enc, devId) != 0)
+    if (wc_Des3Init(&enc, HEAP_HINT, devId) != 0)
         return -20005;
-    if (wc_Des3AsyncInit(&dec, devId) != 0)
+    if (wc_Des3Init(&dec, HEAP_HINT, devId) != 0)
         return -20006;
-#endif
+
     ret = wc_Des3_SetKey(&enc, key3, iv3, DES_ENCRYPTION);
     if (ret != 0)
         return -31;
@@ -3548,9 +3470,15 @@ int des3_test(void)
     if (ret != 0)
         return -32;
     ret = wc_Des3_CbcEncrypt(&enc, cipher, vector, sizeof(vector));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -33;
     ret = wc_Des3_CbcDecrypt(&dec, plain, cipher, sizeof(cipher));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -34;
 
@@ -3560,10 +3488,9 @@ int des3_test(void)
     if (XMEMCMP(cipher, verify3, sizeof(cipher)))
         return -36;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_Des3AsyncFree(&enc);
-    wc_Des3AsyncFree(&dec);
-#endif
+    wc_Des3Free(&enc);
+    wc_Des3Free(&dec);
+
     return 0;
 }
 #endif /* NO_DES */
@@ -3724,12 +3651,13 @@ int aes_test(void)
     byte key[] = "0123456789abcdef   ";  /* align */
     byte iv[]  = "1234567890abcdef   ";  /* align */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_AesAsyncInit(&enc, devId) != 0)
+    if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
         return -20003;
-    if (wc_AesAsyncInit(&dec, devId) != 0)
+#ifdef HAVE_AES_DECRYPT
+    if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
         return -20004;
 #endif
+
     ret = wc_AesSetKey(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
     if (ret != 0)
         return -1001;
@@ -3739,11 +3667,17 @@ int aes_test(void)
         return -1002;
 #endif
 
-    ret = wc_AesCbcEncrypt(&enc, cipher, msg,   AES_BLOCK_SIZE);
+    ret = wc_AesCbcEncrypt(&enc, cipher, msg, AES_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -1005;
 #ifdef HAVE_AES_DECRYPT
     ret = wc_AesCbcDecrypt(&dec, plain, cipher, AES_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -1006;
 
@@ -3828,10 +3762,16 @@ int aes_test(void)
                     return -1031;
 
                 ret = wc_AesCbcEncrypt(&enc, bigCipher, bigMsg, msgSz);
+            #if defined(WOLFSSL_ASYNC_CRYPT)
+                ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+            #endif
                 if (ret != 0)
                     return -1032;
 
                 ret = wc_AesCbcDecrypt(&dec, bigPlain, bigCipher, msgSz);
+            #if defined(WOLFSSL_ASYNC_CRYPT)
+                ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+            #endif
                 if (ret != 0)
                     return -1033;
 
@@ -3842,11 +3782,6 @@ int aes_test(void)
     }
 #endif /* WOLFSSL_AESNI HAVE_AES_DECRYPT */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_AesAsyncFree(&enc);
-    wc_AesAsyncFree(&dec);
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 #endif /* HAVE_AES_CBC */
 
 #ifdef WOLFSSL_AES_COUNTER
@@ -4062,6 +3997,11 @@ int aes_test(void)
         return ret;
 #endif
 
+    wc_AesFree(&enc);
+#ifdef HAVE_AES_DECRYPT
+    wc_AesFree(&dec);
+#endif
+
     return ret;
 }
 
@@ -4127,7 +4067,7 @@ int aesgcm_test(void)
         0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
     };
 
-#ifndef HAVE_FIPS
+#if !defined(HAVE_FIPS) && !defined(HAVE_INTEL_QA)
     /* Test Case 12, uses same plaintext and AAD data. */
     const byte k2[] =
     {
@@ -4165,7 +4105,7 @@ int aesgcm_test(void)
         0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
         0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
     };
-#endif /* HAVE_FIPS */
+#endif /* !HAVE_FIPS && !HAVE_INTEL_QA */
 
     byte resultT[sizeof(t1)];
     byte resultP[sizeof(p)];
@@ -4176,10 +4116,22 @@ int aesgcm_test(void)
     XMEMSET(resultC, 0, sizeof(resultC));
     XMEMSET(resultP, 0, sizeof(resultP));
 
-    wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+    if (wc_AesInit(&enc, HEAP_HINT, devId) != 0) {
+        return -20003;
+    }
+
+    result = wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+    if (result != 0)
+        return -66;
+
     /* AES-GCM encrypt and decrypt both use AES encrypt internally */
-    wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1, sizeof(iv1),
+    result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1, sizeof(iv1),
                                         resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (result != 0)
+        return -67;
     if (XMEMCMP(c1, resultC, sizeof(resultC)))
         return -68;
     if (XMEMCMP(t1, resultT, sizeof(resultT)))
@@ -4187,20 +4139,29 @@ int aesgcm_test(void)
 
     result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC),
                       iv1, sizeof(iv1), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (result != 0)
         return -70;
     if (XMEMCMP(p, resultP, sizeof(resultP)))
         return -71;
 
-#ifndef HAVE_FIPS
+    /* QAT only supports 12-byte IV */
+#if !defined(HAVE_FIPS) && !defined(HAVE_INTEL_QA)
     XMEMSET(resultT, 0, sizeof(resultT));
     XMEMSET(resultC, 0, sizeof(resultC));
     XMEMSET(resultP, 0, sizeof(resultP));
 
     wc_AesGcmSetKey(&enc, k2, sizeof(k2));
     /* AES-GCM encrypt and decrypt both use AES encrypt internally */
-    wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv2, sizeof(iv2),
+    result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv2, sizeof(iv2),
                                         resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (result != 0)
+        return -229;
     if (XMEMCMP(c2, resultC, sizeof(resultC)))
         return -230;
     if (XMEMCMP(t2, resultT, sizeof(resultT)))
@@ -4208,11 +4169,16 @@ int aesgcm_test(void)
 
     result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC),
                       iv2, sizeof(iv2), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (result != 0)
         return -232;
     if (XMEMCMP(p, resultP, sizeof(resultP)))
         return -233;
-#endif /* HAVE_FIPS */
+#endif /* !HAVE_FIPS && !HAVE_INTEL_QA */
+
+    wc_AesFree(&enc);
 
     return 0;
 }
@@ -4265,6 +4231,7 @@ int gmac_test(void)
 
     byte tag[16];
 
+    XMEMSET(&gmac, 0, sizeof(Gmac)); /* clear context */
     XMEMSET(tag, 0, sizeof(tag));
     wc_GmacSetKey(&gmac, k1, sizeof(k1));
     wc_GmacUpdate(&gmac, iv1, sizeof(iv1), a1, sizeof(a1), tag, sizeof(t1));
@@ -4331,6 +4298,7 @@ int aesccm_test(void)
 
     int result;
 
+    XMEMSET(&enc, 0, sizeof(Aes)); /* clear context */
     XMEMSET(t2, 0, sizeof(t2));
     XMEMSET(c2, 0, sizeof(c2));
     XMEMSET(p2, 0, sizeof(p2));
@@ -4937,7 +4905,11 @@ int idea_test(void)
         rnd[1000], enc[1000], dec[1000];
 
         /* random values */
+    #ifndef HAVE_FIPS
+        ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+    #else
         ret = wc_InitRng(&rng);
+    #endif
         if (ret != 0)
             return -39;
 
@@ -5011,7 +4983,11 @@ static int random_rng_test(void)
     byte block[32];
     int ret, i;
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0) return -39;
 
     XMEMSET(block, 0, sizeof(block));
@@ -5047,7 +5023,7 @@ exit:
     return ret;
 }
 
-#if (defined(HAVE_HASHDRBG) || defined(NO_RC4)) && !defined(CUSTOM_RAND_GENERATE_BLOCK)
+#if defined(HAVE_HASHDRBG) && !defined(CUSTOM_RAND_GENERATE_BLOCK)
 
 int random_test(void)
 {
@@ -5126,17 +5102,15 @@ int random_test(void)
     return 0;
 }
 
-#else /* (HAVE_HASHDRBG || NO_RC4) && !CUSTOM_RAND_GENERATE_BLOCK */
+#else
 
 int random_test(void)
 {
     /* Basic RNG generate block test */
-    random_rng_test();
-
-    return 0;
+    return random_rng_test();
 }
 
-#endif /* (HAVE_HASHDRBG || NO_RC4) && !CUSTOM_RAND_GENERATE_BLOCK */
+#endif /* HAVE_HASHDRBG && !CUSTOM_RAND_GENERATE_BLOCK */
 #endif /* WC_NO_RNG */
 
 
@@ -5300,8 +5274,8 @@ byte GetEntropy(ENTROPY_CMD cmd, byte* out)
             static const char* clientKeyPub  = CERT_ROOT "client-keyPub.der";
         #endif
         #ifdef WOLFSSL_CERT_GEN
-            static const char* caKeyFile  = CERT_ROOT "ca-key.der";
-            static const char* caCertFile = CERT_ROOT "ca-cert.pem";
+            static const char* rsaCaKeyFile  = CERT_ROOT "ca-key.der";
+            static const char* rsaCaCertFile = CERT_ROOT "ca-cert.pem";
         #endif
     #endif /* !NO_RSA */
     #ifndef NO_DH
@@ -5747,8 +5721,11 @@ static int rsa_sig_test(RsaKey* key, word32 keyLen, int modLen, WC_RNG* rng)
      *     -101 = USER_CRYPTO_ERROR
      */
     if (ret == 0)
-#elif defined(HAVE_FIPS) || !defined(WC_RSA_BLINDING)
-    /* FIPS140 implementation doesn't do blinding. */
+#elif defined(WOLFSSL_ASYNC_CRYPT)
+    /* async may not require RNG */
+    if (ret != 0 && ret != MISSING_RNG_E)
+#elif defined(HAVE_FIPS) || defined(WOLFSSL_ASYNC_CRYPT)
+    /* FIPS140 implementation does not do blinding */
     if (ret != 0)
 #else
     if (ret != MISSING_RNG_E)
@@ -5842,8 +5819,6 @@ static int rsa_decode_test(void)
     const byte good[] = { 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
     const byte goodAlgId[] = { 0x30, 0x0f, 0x30, 0x0d, 0x06, 0x00,
             0x03, 0x09, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
-    const byte goodBitStrNoZero[] = { 0x30, 0x0e, 0x30, 0x0c, 0x06, 0x00,
-            0x03, 0x08, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
     const byte goodAlgIdNull[] = { 0x30, 0x11, 0x30, 0x0f, 0x06, 0x00,
             0x05, 0x00, 0x03, 0x09, 0x00, 0x30, 0x06, 0x02, 0x01, 0x23,
             0x02, 0x1, 0x03 };
@@ -5862,6 +5837,8 @@ static int rsa_decode_test(void)
     const byte badIntN[] = { 0x30, 0x06, 0x02, 0x05, 0x23, 0x02, 0x1, 0x03 };
     const byte badNotIntE[] = { 0x30, 0x06, 0x02, 0x01, 0x23, 0x04, 0x1, 0x03 };
     const byte badLength[] = { 0x30, 0x04, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
+    const byte badBitStrNoZero[] = { 0x30, 0x0e, 0x30, 0x0c, 0x06, 0x00,
+            0x03, 0x08, 0x30, 0x06, 0x02, 0x01, 0x23, 0x02, 0x1, 0x03 };
 
     ret = wc_InitRsaKey(&keyPub, NULL);
     if (ret != 0)
@@ -6060,17 +6037,14 @@ static int rsa_decode_test(void)
     if (ret != 0)
         return -520;
 
-    inSz = sizeof(goodBitStrNoZero);
+    inSz = sizeof(badBitStrNoZero);
     inOutIdx = 0;
-    ret = wc_RsaPublicKeyDecode(goodBitStrNoZero, &inOutIdx, &keyPub, inSz);
-    if (ret != 0) {
+    ret = wc_RsaPublicKeyDecode(badBitStrNoZero, &inOutIdx, &keyPub, inSz);
+    if (ret != ASN_EXPECT_0_E) {
         ret = -556;
         goto done;
     }
-    if (inOutIdx != inSz) {
-        ret = -557;
-        goto done;
-    }
+    ret = 0;
 
 done:
     wc_FreeRsaKey(&keyPub);
@@ -6078,6 +6052,7 @@ done:
 }
 #endif
 
+#define RSA_TEST_BYTES 256
 int rsa_test(void)
 {
     byte*   tmp;
@@ -6089,10 +6064,13 @@ int rsa_test(void)
     WC_RNG rng;
     word32 idx = 0;
     int    ret;
-    byte   in[] = "Everyone gets Friday off.";
-    word32 inLen = (word32)XSTRLEN((char*)in);
-    byte   out[256];
-    byte   plain[256];
+    const char* inStr = "Everyone gets Friday off.";
+    word32      inLen = (word32)XSTRLEN((char*)inStr);
+    const word32 outSz   = RSA_TEST_BYTES;
+    const word32 plainSz = RSA_TEST_BYTES;
+    DECLARE_VAR_INIT(in, byte, inLen, inStr, HEAP_HINT);
+    DECLARE_VAR(out, byte, RSA_TEST_BYTES, HEAP_HINT);
+    DECLARE_VAR(plain, byte, RSA_TEST_BYTES, HEAP_HINT);
     byte*  res;
 #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) \
                                     && !defined(NO_FILESYSTEM)
@@ -6109,8 +6087,13 @@ int rsa_test(void)
 #endif
 
     tmp = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
-    if (tmp == NULL)
-        return -38;
+    if (tmp == NULL
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        || out == NULL || plain == NULL
+    #endif
+    ) {
+        return -40;
+    }
 
 #ifdef USE_CERT_BUFFERS_1024
     XMEMCPY(tmp, client_key_der_1024, sizeof_client_key_der_1024);
@@ -6136,7 +6119,7 @@ int rsa_test(void)
 
     ret = wc_InitRsaKey_ex(&key, HEAP_HINT, devId);
     if (ret != 0) {
-        XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
         return -39;
     }
     ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes);
@@ -6144,7 +6127,12 @@ int rsa_test(void)
         XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
         return -41;
     }
+
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0) {
         XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
         return -42;
@@ -6156,10 +6144,10 @@ int rsa_test(void)
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt(in, inLen, out, sizeof(out), &key, &rng);
+            ret = wc_RsaPublicEncrypt(in, inLen, out, outSz, &key, &rng);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6184,10 +6172,10 @@ int rsa_test(void)
     idx = ret; /* save off encrypted length */
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt(out, idx, plain, sizeof(plain), &key);
+            ret = wc_RsaPrivateDecrypt(out, idx, plain, plainSz, &key);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6203,7 +6191,7 @@ int rsa_test(void)
     }
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
             ret = wc_RsaPrivateDecryptInline(out, idx, &res, &key);
@@ -6218,10 +6206,10 @@ int rsa_test(void)
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaSSL_Sign(in, inLen, out, sizeof(out), &key, &rng);
+            ret = wc_RsaSSL_Sign(in, inLen, out, outSz, &key, &rng);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6231,13 +6219,13 @@ int rsa_test(void)
     }
 
     idx = ret;
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaSSL_Verify(out, idx, plain, sizeof(plain), &key);
+            ret = wc_RsaSSL_Verify(out, idx, plain, plainSz, &key);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6257,14 +6245,14 @@ int rsa_test(void)
     #if !defined(HAVE_FAST_RSA) && !defined(HAVE_USER_RSA) && \
         !defined(HAVE_FIPS)
     #ifndef NO_SHA
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                        WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6277,10 +6265,10 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                        WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6298,13 +6286,13 @@ int rsa_test(void)
     #endif /* NO_SHA */
 
     #ifndef NO_SHA256
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6317,10 +6305,10 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6338,7 +6326,7 @@ int rsa_test(void)
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
             ret = wc_RsaPrivateDecryptInline_ex(out, idx, &res, &key,
@@ -6353,13 +6341,13 @@ int rsa_test(void)
         return -475;
 
     /* check fails if not using the same optional label */
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6372,11 +6360,11 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
-               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
         }
     } while (ret == WC_PENDING_E);
     if (ret > 0) { /* in this case decrypt should fail */
@@ -6387,14 +6375,14 @@ int rsa_test(void)
     ret = 0;
 
     /* check using optional label with encrypt/decrypt */
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
-               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6406,11 +6394,11 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
-               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6427,14 +6415,14 @@ int rsa_test(void)
 
     #ifndef NO_SHA
         /* check fail using mismatch hash algorithms */
-        XMEMSET(plain, 0, sizeof(plain));
+        XMEMSET(plain, 0, plainSz);
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
-                    WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, in, sizeof(in));
+                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+                    WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, in, inLen);
             }
         } while (ret == WC_PENDING_E);
         if (ret < 0) {
@@ -6446,11 +6434,11 @@ int rsa_test(void)
         idx = ret;
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
-                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
             }
         } while (ret == WC_PENDING_E);
         if (ret > 0) { /* should fail */
@@ -6468,13 +6456,13 @@ int rsa_test(void)
        and test, since OAEP padding requires this.
        BAD_FUNC_ARG is returned when this case is not met */
     if (wc_RsaEncryptSize(&key) > ((int)SHA512_DIGEST_SIZE * 2) + 2) {
-        XMEMSET(plain, 0, sizeof(plain));
+        XMEMSET(plain, 0, plainSz);
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA512, WC_MGF1SHA512, NULL, 0);
             }
         } while (ret == WC_PENDING_E);
@@ -6487,10 +6475,10 @@ int rsa_test(void)
         idx = ret;
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA512, WC_MGF1SHA512, NULL, 0);
             }
         } while (ret == WC_PENDING_E);
@@ -6509,13 +6497,13 @@ int rsa_test(void)
     #endif /* WOLFSSL_SHA512 */
 
     /* check using pkcsv15 padding with _ex API */
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, 0, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6528,10 +6516,10 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                   WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, 0, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6849,14 +6837,23 @@ int rsa_test(void)
         }
     #endif /* WOLFSSL_CERT_EXT */
 
-        certSz = wc_MakeSelfCert(&myCert, derCert, FOURK_BUF, &key, &rng);
-        if (certSz < 0) {
+        ret = 0;
+        do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+            if (ret >= 0) {
+                ret = wc_MakeSelfCert(&myCert, derCert, FOURK_BUF, &key, &rng);
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_FreeRng(&rng);
             return -401;
         }
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, HEAP_HINT);
@@ -6966,7 +6963,7 @@ int rsa_test(void)
         XMEMCPY(tmp, ca_key_der_2048, sizeof_ca_key_der_2048);
         bytes3 = sizeof_ca_key_der_2048;
     #else
-        file3 = fopen(caKeyFile, "rb");
+        file3 = fopen(rsaCaKeyFile, "rb");
         if (!file3) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7034,7 +7031,7 @@ int rsa_test(void)
         ret = wc_SetAuthKeyIdFromCert(&myCert, ca_cert_der_1024,
                                             sizeof_ca_cert_der_1024);
     #else
-        ret = wc_SetAuthKeyId(&myCert, caCertFile);
+        ret = wc_SetAuthKeyId(&myCert, rsaCaCertFile);
     #endif
         if (ret != 0) {
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7061,7 +7058,7 @@ int rsa_test(void)
         ret = wc_SetIssuerBuffer(&myCert, ca_cert_der_1024,
                                             sizeof_ca_cert_der_1024);
     #else
-        ret = wc_SetIssuer(&myCert, caCertFile);
+        ret = wc_SetIssuer(&myCert, rsaCaCertFile);
     #endif
         if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7082,9 +7079,17 @@ int rsa_test(void)
             return -407;
         }
 
-        certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
                           &caKey, NULL, &rng);
-        if (certSz < 0) {
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7092,6 +7097,7 @@ int rsa_test(void)
             wc_FreeRng(&rng);
             return -408;
         }
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, HEAP_HINT);
@@ -7224,7 +7230,7 @@ int rsa_test(void)
         fclose(file3);
     #endif /* USE_CERT_BUFFERS_256 */
 
-        wc_ecc_init(&caKey);
+        wc_ecc_init_ex(&caKey, HEAP_HINT, devId);
         ret = wc_EccPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes3);
         if (ret != 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7270,7 +7276,7 @@ int rsa_test(void)
         fclose(file3);
     #endif
 
-        wc_ecc_init(&caKeyPub);
+        wc_ecc_init_ex(&caKeyPub, HEAP_HINT, devId);
         if (ret != 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7346,9 +7352,17 @@ int rsa_test(void)
             return -5407;
         }
 
-        certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
-                          NULL, &caKey, &rng);
-        if (certSz < 0) {
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(myCert.bodySz, myCert.sigType, derCert,
+                                  FOURK_BUF, NULL, &caKey, &rng);
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_ecc_free(&caKey);
@@ -7356,6 +7370,7 @@ int rsa_test(void)
             wc_FreeRng(&rng);
             return -5408;
         }
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, 0);
@@ -7520,7 +7535,7 @@ int rsa_test(void)
         XMEMCPY(tmp, ca_key_der_2048, sizeof_ca_key_der_2048);
         bytes = sizeof_ca_key_der_2048;
     #else
-        caFile = fopen(caKeyFile, "rb");
+        caFile = fopen(rsaCaKeyFile, "rb");
         if (!caFile) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7580,7 +7595,7 @@ int rsa_test(void)
         ret = wc_SetAuthKeyIdFromCert(&myCert, ca_cert_der_1024,
                                             sizeof_ca_cert_der_1024);
     #else
-        ret = wc_SetAuthKeyId(&myCert, caCertFile);
+        ret = wc_SetAuthKeyId(&myCert, rsaCaCertFile);
     #endif
         if (ret != 0) {
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7608,7 +7623,7 @@ int rsa_test(void)
         ret = wc_SetIssuerBuffer(&myCert, ca_cert_der_1024,
                                             sizeof_ca_cert_der_1024);
     #else
-        ret = wc_SetIssuer(&myCert, caCertFile);
+        ret = wc_SetIssuer(&myCert, rsaCaCertFile);
     #endif
         if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7630,17 +7645,25 @@ int rsa_test(void)
             return -456;
         }
 
-        certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
                           &caKey, NULL, &rng);
+            }
+        } while (ret == WC_PENDING_E);
         wc_FreeRsaKey(&caKey);
-        if (certSz < 0) {
+        if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_FreeRng(&rng);
             return -457;
         }
-
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, HEAP_HINT);
@@ -7795,15 +7818,24 @@ int rsa_test(void)
             return -465;
         }
 
-        derSz = wc_SignCert(req.bodySz, req.sigType, der, FOURK_BUF,
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(req.bodySz, req.sigType, der, FOURK_BUF,
                           &key, NULL, &rng);
-        if (derSz < 0) {
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_FreeRng(&rng);
             return -466;
         }
+        derSz = ret;
 
         pemSz = wc_DerToPem(der, derSz, pem, FOURK_BUF, CERTREQ_TYPE);
         if (pemSz < 0) {
@@ -7862,13 +7894,14 @@ int rsa_test(void)
     wc_FreeRsaKey(&key);
 #ifdef WOLFSSL_CERT_EXT
     wc_FreeRsaKey(&keypub);
-#endif
-#ifdef HAVE_CAVIUM
-    wc_RsaFreeCavium(&key);
 #endif
     XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
     wc_FreeRng(&rng);
 
+    FREE_VAR(in, HEAP_HINT);
+    FREE_VAR(out, HEAP_HINT);
+    FREE_VAR(plain, HEAP_HINT);
+
     return 0;
 }
 
@@ -7965,58 +7998,93 @@ int dh_test(void)
     (void)tmp;
     (void)bytes;
 
-    ret = wc_InitDhKey(&key);
-    if (ret != 0)
-        return -57;
-    ret = wc_InitDhKey(&key2);
-    if (ret != 0)
-        return -57;
+    ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+    if (ret != 0) {
+        ret = -57; goto done;
+    }
+    ret = wc_InitDhKey_ex(&key2, HEAP_HINT, devId);
+    if (ret != 0) {
+        ret = -57; goto done;
+    }
 
 #ifdef NO_ASN
     ret = wc_DhSetKey(&key, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-    if (ret != 0)
-        return -51;
+    if (ret != 0) {
+        ret = -51; goto done;
+    }
 
     ret = wc_DhSetKey(&key2, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-    if (ret != 0)
-        return -51;
+    if (ret != 0) {
+        ret = -51; goto done;
+    }
 #else
     ret = wc_DhKeyDecode(tmp, &idx, &key, bytes);
-    if (ret != 0)
-        return -51;
+    if (ret != 0) {
+        ret = -51; goto done;
+    }
 
     idx = 0;
     ret = wc_DhKeyDecode(tmp, &idx, &key2, bytes);
-    if (ret != 0)
-        return -52;
+    if (ret != 0) {
+        ret = -52; goto done;
+    }
 #endif
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
-    if (ret != 0)
-        return -53;
+#endif
+    if (ret != 0) {
+        ret = -53; goto done;
+    }
 
-    ret =  wc_DhGenerateKeyPair(&key, &rng, priv, &privSz, pub, &pubSz);
-    ret += wc_DhGenerateKeyPair(&key2, &rng, priv2, &privSz2, pub2, &pubSz2);
-    if (ret != 0)
-        return -54;
+    ret = wc_DhGenerateKeyPair(&key, &rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -54; goto done;
+    }
 
-    ret =  wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
-    ret += wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
-    if (ret != 0)
-        return -55;
+    ret = wc_DhGenerateKeyPair(&key2, &rng, priv2, &privSz2, pub2, &pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -54; goto done;
+    }
 
-    if (XMEMCMP(agree, agree2, agreeSz))
-        return -56;
+    ret = wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -55; goto done;
+    }
+
+    ret = wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -55; goto done;
+    }
+
+    if (agreeSz != agreeSz2 || XMEMCMP(agree, agree2, agreeSz))
+        ret = -56; goto done;
 
     ret = dh_generate_test(&rng);
     if (ret != 0)
-        return -57;
+        ret = -57;
+
+done:
 
     wc_FreeDhKey(&key);
     wc_FreeDhKey(&key2);
     wc_FreeRng(&rng);
 
-    return 0;
+    return ret;
 }
 
 #endif /* NO_DH */
@@ -8051,11 +8119,12 @@ int dsa_test(void)
     fclose(file);
 #endif /* USE_CERT_BUFFERS */
 
-    ret = wc_InitSha(&sha);
+    ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4002;
     wc_ShaUpdate(&sha, tmp, bytes);
     wc_ShaFinal(&sha, hash);
+    wc_ShaFree(&sha);
 
     ret = wc_InitDsaKey(&key);
     if (ret != 0) return -66;
@@ -8063,7 +8132,11 @@ int dsa_test(void)
     ret = wc_DsaPrivateKeyDecode(tmp, &idx, &key, bytes);
     if (ret != 0) return -61;
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0) return -62;
 
     ret = wc_DsaSign(hash, signature, &key, &rng);
@@ -8207,7 +8280,7 @@ static int generate_random_salt(byte *buf, word32 size)
     if(NULL == buf || !size)
         return -1;
 
-    if (buf && size && wc_InitRng(&rng) == 0) {
+    if (buf && size && wc_InitRng_ex(&rng, HEAP_HINT, devId) == 0) {
         ret = wc_RNG_GenerateBlock(&rng, (byte *)buf, size);
 
         wc_FreeRng(&rng);
@@ -9459,6 +9532,14 @@ int x963kdf_test(void)
 
 #ifdef HAVE_ECC
 
+#ifdef BENCH_EMBEDDED
+    #define ECC_SHARED_SIZE 128
+#else
+    #define ECC_SHARED_SIZE 1024
+#endif
+#define ECC_DIGEST_SIZE     MAX_ECC_BYTES
+#define ECC_SIG_SIZE        ECC_MAX_SIG_SIZE
+
 #ifndef NO_ECC_VECTOR_TEST
     #if (defined(HAVE_ECC192) || defined(HAVE_ECC224) ||\
          !defined(NO_ECC256) || defined(HAVE_ECC384) ||\
@@ -9485,12 +9566,12 @@ static int ecc_test_vector_item(const eccVector* vector)
     int ret = 0, verify;
     word32  x;
     ecc_key userA;
-    byte    sig[1024];
+    DECLARE_VAR(sig, byte, ECC_SIG_SIZE, HEAP_HINT);
 
-    wc_ecc_init(&userA);
+    wc_ecc_init_ex(&userA, HEAP_HINT, devId);
 
-    XMEMSET(sig, 0, sizeof(sig));
-    x = sizeof(sig);
+    XMEMSET(sig, 0, ECC_SIG_SIZE);
+    x = ECC_SIG_SIZE;
 
     ret = wc_ecc_import_raw(&userA, vector->Qx, vector->Qy,
                                              vector->d, vector->curveName);
@@ -9501,8 +9582,16 @@ static int ecc_test_vector_item(const eccVector* vector)
     if (ret != 0)
         goto done;
 
-    ret = wc_ecc_verify_hash(sig, x, (byte*)vector->msg, vector->msgLen,
-                                                            &verify, &userA);
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0) {
+            ret = wc_ecc_verify_hash(sig, x, (byte*)vector->msg, vector->msgLen,
+                                                               &verify, &userA);
+        }
+    } while (ret == WC_PENDING_E);
+
     if (ret != 0)
         goto done;
 
@@ -9512,6 +9601,8 @@ static int ecc_test_vector_item(const eccVector* vector)
 done:
     wc_ecc_free(&userA);
 
+    FREE_VAR(sig, HEAP_HINT);
+
     return ret;
 }
 
@@ -9777,9 +9868,14 @@ static int ecc_test_key_gen(WC_RNG* rng, int keySize)
 
     ecc_key userA;
 
-    wc_ecc_init(&userA);
+    ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
 
     ret = wc_ecc_make_key(rng, keySize, &userA);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
     if (ret != 0)
         goto done;
 
@@ -9850,23 +9946,19 @@ done:
     return ret;
 }
 #endif /* WOLFSSL_KEY_GEN */
+
 static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
     int curve_id)
 {
-#ifdef BENCH_EMBEDDED
-    byte    sharedA[128]; /* Needs to be at least keySize */
-    byte    sharedB[128]; /* Needs to be at least keySize */
-#else
-    byte    sharedA[1024];
-    byte    sharedB[1024];
-#endif
+    DECLARE_VAR(sharedA, byte, ECC_SHARED_SIZE, HEAP_HINT);
+    DECLARE_VAR(sharedB, byte, ECC_SHARED_SIZE, HEAP_HINT);
 #ifdef HAVE_ECC_KEY_EXPORT
     byte    exportBuf[1024];
 #endif
     word32  x, y;
 #ifdef HAVE_ECC_SIGN
-    byte    sig[1024];
-    byte    digest[20];
+    DECLARE_VAR(sig, byte, ECC_SIG_SIZE, HEAP_HINT);
+    DECLARE_VAR(digest, byte, ECC_DIGEST_SIZE, HEAP_HINT);
     int     i;
 #ifdef HAVE_ECC_VERIFY
     int     verify;
@@ -9877,11 +9969,24 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
 
     (void)testVerifyCount;
 
-    wc_ecc_init(&userA);
-    wc_ecc_init(&userB);
-    wc_ecc_init(&pubKey);
+    XMEMSET(&userA, 0, sizeof(ecc_key));
+    XMEMSET(&userB, 0, sizeof(ecc_key));
+    XMEMSET(&pubKey, 0, sizeof(ecc_key));
+
+    ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_init_ex(&pubKey, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
 
     ret = wc_ecc_make_key_ex(rng, keySize, &userA, curve_id);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
     if (ret != 0)
         goto done;
 
@@ -9890,18 +9995,33 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
         goto done;
 
     ret = wc_ecc_make_key_ex(rng, keySize, &userB, curve_id);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
     if (ret != 0)
         goto done;
 
 #ifdef HAVE_ECC_DHE
-    x = sizeof(sharedA);
-    ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+    x = ECC_SHARED_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+    } while (ret == WC_PENDING_E);
     if (ret != 0) {
         goto done;
     }
 
-    y = sizeof(sharedB);
-    ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+    y = ECC_SHARED_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         goto done;
 
@@ -9951,8 +10071,14 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
         goto done;
 
 #ifdef HAVE_ECC_DHE
-    y = sizeof(sharedB);
-    ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+    y = ECC_SHARED_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         goto done;
 
@@ -9967,15 +10093,23 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
         if (ret != 0)
             goto done;
         wc_ecc_free(&pubKey);
-        wc_ecc_init(&pubKey);
+        ret = wc_ecc_init_ex(&pubKey, HEAP_HINT, devId);
+        if (ret != 0)
+            goto done;
 
         ret = wc_ecc_import_x963_ex(exportBuf, x, &pubKey, curve_id);
         if (ret != 0)
             goto done;
 
     #ifdef HAVE_ECC_DHE
-        y = sizeof(sharedB);
-        ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+        y = ECC_SHARED_SIZE;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0)
+                ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+        } while (ret == WC_PENDING_E);
         if (ret != 0)
             goto done;
 
@@ -9988,44 +10122,73 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
 #endif /* HAVE_ECC_KEY_EXPORT */
 
 #ifdef HAVE_ECC_SIGN
-#ifdef ECC_SHAMIR /* ECC w/out Shamir has issue with all 0 digest */
+    /* ECC w/out Shamir has issue with all 0 digest */
+    /* WC_BIGINT doesn't have 0 len well on hardware */
+#if defined(ECC_SHAMIR) && !defined(WOLFSSL_ASYNC_CRYPT)
     /* test DSA sign hash with zeros */
-    for (i = 0; i < (int)sizeof(digest); i++) {
+    for (i = 0; i < (int)ECC_DIGEST_SIZE; i++) {
         digest[i] = 0;
     }
 
-    x = sizeof(sig);
-    ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, rng, &userA);
+    x = ECC_SIG_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_sign_hash(digest, ECC_DIGEST_SIZE, sig, &x, rng,
+                                                                        &userA);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         goto done;
 
 #ifdef HAVE_ECC_VERIFY
     for (i=0; i= 0)
+                ret = wc_ecc_verify_hash(sig, x, digest, ECC_DIGEST_SIZE,
+                                                               &verify, &userA);
+        } while (ret == WC_PENDING_E);
         if (ret != 0)
             goto done;
         if (verify != 1)
             ERROR_OUT(-1016, done);
     }
 #endif /* HAVE_ECC_VERIFY */
-#endif /* ECC_SHAMIR */
+#endif /* ECC_SHAMIR && !WOLFSSL_ASYNC_CRYPT */
 
     /* test DSA sign hash with sequence (0,1,2,3,4,...) */
-    for (i = 0; i < (int)sizeof(digest); i++) {
+    for (i = 0; i < (int)ECC_DIGEST_SIZE; i++) {
         digest[i] = (byte)i;
     }
 
-    x = sizeof(sig);
-    ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, rng, &userA);
-
+    x = ECC_SIG_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_sign_hash(digest, ECC_DIGEST_SIZE, sig, &x, rng,
+                                                                        &userA);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         ERROR_OUT(-1014, done);
 
 #ifdef HAVE_ECC_VERIFY
     for (i=0; i= 0)
+                ret = wc_ecc_verify_hash(sig, x, digest, ECC_DIGEST_SIZE,
+                                                               &verify, &userA);
+        } while (ret == WC_PENDING_E);
         if (ret != 0)
             goto done;
         if (verify != 1)
@@ -10046,6 +10209,13 @@ done:
     wc_ecc_free(&userB);
     wc_ecc_free(&userA);
 
+    FREE_VAR(sharedA, HEAP_HINT);
+    FREE_VAR(sharedB, HEAP_HINT);
+#ifdef HAVE_ECC_SIGN
+    FREE_VAR(sig, HEAP_HINT);
+    FREE_VAR(digest, HEAP_HINT);
+#endif
+
     return ret;
 }
 
@@ -10057,25 +10227,37 @@ static int ecc_test_curve(WC_RNG* rng, int keySize)
 
     ret = ecc_test_curve_size(rng, keySize, ECC_TEST_VERIFY_COUNT, ECC_CURVE_DEF);
     if (ret < 0) {
-        printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret);
-        return ret;
-    }
-
-    #ifdef HAVE_ECC_VECTOR_TEST
-        ret = ecc_test_vector(keySize);
-        if (ret < 0) {
-            printf("ecc_test_vector %d failed!: %d\n", keySize, ret);
+        if (ret == ECC_CURVE_OID_E) {
+            /* ignore error for curves not found */
+            /* some curve sizes are only available with:
+                HAVE_ECC_SECPR2, HAVE_ECC_SECPR3, HAVE_ECC_BRAINPOOL and HAVE_ECC_KOBLITZ */
+        }
+        else {
+            printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret);
             return ret;
         }
-    #endif
+    }
 
-    #ifdef WOLFSSL_KEY_GEN
-        ret = ecc_test_key_gen(rng, keySize);
-        if (ret < 0) {
+#ifdef HAVE_ECC_VECTOR_TEST
+    ret = ecc_test_vector(keySize);
+    if (ret < 0) {
+        printf("ecc_test_vector %d failed!: %d\n", keySize, ret);
+        return ret;
+    }
+#endif
+
+#ifdef WOLFSSL_KEY_GEN
+    ret = ecc_test_key_gen(rng, keySize);
+    if (ret < 0) {
+        if (ret == ECC_CURVE_OID_E) {
+            /* ignore error for curves not found */
+        }
+        else {
             printf("ecc_test_key_gen %d failed!: %d\n", keySize, ret);
             return ret;
         }
-    #endif
+    }
+#endif
 
     return 0;
 }
@@ -10651,7 +10833,11 @@ int ecc_test(void)
         return ret;
 #endif
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0)
         return -1001;
 
@@ -10770,7 +10956,7 @@ done:
 int ecc_encrypt_test(void)
 {
     WC_RNG  rng;
-    int     ret;
+    int     ret = 0;
     ecc_key userA, userB;
     byte    msg[48];
     byte    plain[48];
@@ -10778,122 +10964,153 @@ int ecc_encrypt_test(void)
     word32  outSz   = sizeof(out);
     word32  plainSz = sizeof(plain);
     int     i;
+    ecEncCtx* cliCtx = NULL;
+    ecEncCtx* srvCtx = NULL;
+    byte cliSalt[EXCHANGE_SALT_SZ];
+    byte srvSalt[EXCHANGE_SALT_SZ];
+    const byte* tmpSalt;
+    byte    msg2[48];
+    byte    plain2[48];
+    byte    out2[80];
+    word32  outSz2   = sizeof(out2);
+    word32  plainSz2 = sizeof(plain2);
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0)
-        return -3001;
+        return -1001;
 
-    wc_ecc_init(&userA);
-    wc_ecc_init(&userB);
+    XMEMSET(&userA, 0, sizeof(userA));
+    XMEMSET(&userB, 0, sizeof(userB));
+
+    ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
 
     ret  = wc_ecc_make_key(&rng, 32, &userA);
-    ret += wc_ecc_make_key(&rng, 32, &userB);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0){
+        ret = -3001; goto done;
+    }
 
-    if (ret != 0)
-        return -3002;
+    ret = wc_ecc_make_key(&rng, 32, &userB);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0){
+        ret = -3002; goto done;
+    }
 
-    for (i = 0; i < 48; i++)
+    /* set message to incrementing 0,1,2,etc... */
+    for (i = 0; i < (int)sizeof(msg); i++)
         msg[i] = i;
 
     /* encrypt msg to B */
     ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
-    if (ret != 0)
-        return -3003;
+    if (ret != 0) {
+        ret = -3003; goto done;
+    }
 
     /* decrypt msg from A */
     ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, NULL);
-    if (ret != 0)
-        return -3004;
-
-    if (XMEMCMP(plain, msg, sizeof(msg)) != 0)
-        return -3005;
-
-
-    {  /* let's verify message exchange works, A is client, B is server */
-        ecEncCtx* cliCtx = wc_ecc_ctx_new(REQ_RESP_CLIENT, &rng);
-        ecEncCtx* srvCtx = wc_ecc_ctx_new(REQ_RESP_SERVER, &rng);
-
-        byte cliSalt[EXCHANGE_SALT_SZ];
-        byte srvSalt[EXCHANGE_SALT_SZ];
-        const byte* tmpSalt;
-
-        if (cliCtx == NULL || srvCtx == NULL)
-            return -3006;
-
-        /* get salt to send to peer */
-        tmpSalt = wc_ecc_ctx_get_own_salt(cliCtx);
-        if (tmpSalt == NULL)
-            return -3007;
-        XMEMCPY(cliSalt, tmpSalt, EXCHANGE_SALT_SZ);
-
-        tmpSalt = wc_ecc_ctx_get_own_salt(srvCtx);
-        if (tmpSalt == NULL)
-            return -3007;
-        XMEMCPY(srvSalt, tmpSalt, EXCHANGE_SALT_SZ);
-
-        /* in actual use, we'd get the peer's salt over the transport */
-        ret  = wc_ecc_ctx_set_peer_salt(cliCtx, srvSalt);
-        ret += wc_ecc_ctx_set_peer_salt(srvCtx, cliSalt);
-
-        ret += wc_ecc_ctx_set_info(cliCtx, (byte*)"wolfSSL MSGE", 11);
-        ret += wc_ecc_ctx_set_info(srvCtx, (byte*)"wolfSSL MSGE", 11);
-
-        if (ret != 0)
-            return -3008;
-
-        /* get encrypted msg (request) to send to B */
-        outSz  = sizeof(out);
-        ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz,cliCtx);
-        if (ret != 0)
-            return -3009;
-
-        /* B decrypts msg (request) from A */
-        plainSz = sizeof(plain);
-        ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, srvCtx);
-        if (ret != 0)
-            return -3010;
-
-        if (XMEMCMP(plain, msg, sizeof(msg)) != 0)
-            return -3011;
-
-        {
-            /* msg2 (response) from B to A */
-            byte    msg2[48];
-            byte    plain2[48];
-            byte    out2[80];
-            word32  outSz2   = sizeof(out2);
-            word32  plainSz2 = sizeof(plain2);
-
-            for (i = 0; i < 48; i++)
-                msg2[i] = i+48;
-
-            /* get encrypted msg (response) to send to B */
-            ret = wc_ecc_encrypt(&userB, &userA, msg2, sizeof(msg2), out2,
-                              &outSz2, srvCtx);
-            if (ret != 0)
-                return -3012;
-
-            /* A decrypts msg (response) from B */
-            ret = wc_ecc_decrypt(&userA, &userB, out2, outSz2, plain2, &plainSz2,
-                             cliCtx);
-            if (ret != 0)
-                return -3013;
-
-            if (XMEMCMP(plain2, msg2, sizeof(msg2)) != 0)
-                return -3014;
-        }
-
-        /* cleanup */
-        wc_ecc_ctx_free(srvCtx);
-        wc_ecc_ctx_free(cliCtx);
+    if (ret != 0) {
+        ret = -3004; goto done;
     }
 
+    if (XMEMCMP(plain, msg, sizeof(msg)) != 0) {
+        ret = -3005; goto done;
+    }
+
+    /* let's verify message exchange works, A is client, B is server */
+    cliCtx = wc_ecc_ctx_new(REQ_RESP_CLIENT, &rng);
+    srvCtx = wc_ecc_ctx_new(REQ_RESP_SERVER, &rng);
+    if (cliCtx == NULL || srvCtx == NULL) {
+        ret = -3006; goto done;
+    }
+
+    /* get salt to send to peer */
+    tmpSalt = wc_ecc_ctx_get_own_salt(cliCtx);
+    if (tmpSalt == NULL) {
+        ret = -3007; goto done;
+    }
+    XMEMCPY(cliSalt, tmpSalt, EXCHANGE_SALT_SZ);
+
+    tmpSalt = wc_ecc_ctx_get_own_salt(srvCtx);
+    if (tmpSalt == NULL) {
+        ret = -3007; goto done;
+    }
+    XMEMCPY(srvSalt, tmpSalt, EXCHANGE_SALT_SZ);
+
+    /* in actual use, we'd get the peer's salt over the transport */
+    ret = wc_ecc_ctx_set_peer_salt(cliCtx, srvSalt);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_ctx_set_peer_salt(srvCtx, cliSalt);
+    if (ret != 0)
+        goto done;
+
+    ret = wc_ecc_ctx_set_info(cliCtx, (byte*)"wolfSSL MSGE", 11);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_ctx_set_info(srvCtx, (byte*)"wolfSSL MSGE", 11);
+    if (ret != 0)
+        goto done;
+
+    /* get encrypted msg (request) to send to B */
+    outSz = sizeof(out);
+    ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz,cliCtx);
+    if (ret != 0)
+        goto done;
+
+    /* B decrypts msg (request) from A */
+    plainSz = sizeof(plain);
+    ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, srvCtx);
+    if (ret != 0)
+        goto done;
+
+    if (XMEMCMP(plain, msg, sizeof(msg)) != 0) {
+        ret = -3011; goto done;
+    }
+
+    /* msg2 (response) from B to A */
+    for (i = 0; i < (int)sizeof(msg2); i++)
+        msg2[i] = i + sizeof(msg2);
+
+    /* get encrypted msg (response) to send to B */
+    ret = wc_ecc_encrypt(&userB, &userA, msg2, sizeof(msg2), out2,
+                      &outSz2, srvCtx);
+    if (ret != 0)
+        goto done;
+
+    /* A decrypts msg (response) from B */
+    ret = wc_ecc_decrypt(&userA, &userB, out2, outSz2, plain2, &plainSz2,
+                     cliCtx);
+    if (ret != 0)
+        goto done;
+
+    if (XMEMCMP(plain2, msg2, sizeof(msg2)) != 0) {
+        ret = -3014; goto done;
+    }
+
+done:
+
     /* cleanup */
+    wc_ecc_ctx_free(srvCtx);
+    wc_ecc_ctx_free(cliCtx);
+
     wc_ecc_free(&userB);
     wc_ecc_free(&userA);
     wc_FreeRng(&rng);
 
-    return 0;
+    return ret;
 }
 
 #endif /* HAVE_ECC_ENCRYPT */
@@ -10930,7 +11147,11 @@ int ecc_test_buffers() {
     if (ret != 0)
         return -41;
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0)
         return -42;
 
@@ -10979,6 +11200,10 @@ int ecc_test_buffers() {
         return -52;
 #endif
 
+    wc_ecc_free(&cliKey);
+    wc_ecc_free(&servKey);
+    wc_FreeRng(&rng);
+
     return 0;
 }
 #endif /* USE_CERT_BUFFERS_256 */
@@ -10990,6 +11215,7 @@ int ecc_test_buffers() {
 int curve25519_test(void)
 {
     WC_RNG  rng;
+    int ret;
 #ifdef HAVE_CURVE25519_SHARED_SECRET
     byte    sharedA[32];
     byte    sharedB[32];
@@ -11047,7 +11273,12 @@ int curve25519_test(void)
     };
 #endif /* HAVE_CURVE25519_SHARED_SECRET */
 
-    if (wc_InitRng(&rng) != 0)
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+    ret = wc_InitRng(&rng);
+#endif
+    if (ret != 0)
         return -1001;
 
     wc_curve25519_init(&userA);
@@ -11175,7 +11406,7 @@ int ed25519_test(void)
     byte   exportSKey[ED25519_KEY_SIZE];
     word32 exportPSz;
     word32 exportSSz;
-    int    i;
+    int    i, ret;
     word32 outlen;
 #ifdef HAVE_ED25519_VERIFY
     int    verify;
@@ -11502,7 +11733,14 @@ int ed25519_test(void)
 #endif /* HAVE_ED25519_SIGN && HAVE_ED25519_KEY_EXPORT && HAVE_ED25519_KEY_IMPORT */
 
     /* create ed25519 keys */
-    wc_InitRng(&rng);
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
+    ret = wc_InitRng(&rng);
+#endif
+    if (ret != 0)
+        return -1020;
+
     wc_ed25519_init(&key);
     wc_ed25519_init(&key2);
     wc_ed25519_make_key(&rng, ED25519_KEY_SIZE, &key);
@@ -12508,7 +12746,11 @@ int pkcs7signed_test(void)
     fclose(file);
 #endif /* USE_CERT_BUFFER_ */
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0) {
         XFREE(certDer, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
         XFREE(keyDer, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -12545,7 +12787,7 @@ int pkcs7signed_test(void)
         transId[0] = 0x13;
         transId[1] = SHA_DIGEST_SIZE * 2;
 
-        ret = wc_InitSha(&sha);
+        ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
         if (ret != 0) {
             XFREE(certDer, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(keyDer, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -12554,6 +12796,7 @@ int pkcs7signed_test(void)
         }
         wc_ShaUpdate(&sha, msg.publicKey, msg.publicKeySz);
         wc_ShaFinal(&sha, digest);
+        wc_ShaFree(&sha);
 
         for (i = 0, j = 2; i < SHA_DIGEST_SIZE; i++, j += 2) {
             snprintf((char*)&transId[j], 3, "%02x", digest[i]);
@@ -12682,7 +12925,11 @@ int mp_test()
 
     mp_init_copy(&p, &a);
 
+#ifndef HAVE_FIPS
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
+#else
     ret = wc_InitRng(&rng);
+#endif
     if (ret != 0)
         goto done;
 
@@ -12917,28 +13164,43 @@ static void *my_Realloc_cb(void *ptr, size_t size)
 
 int memcb_test()
 {
+    int ret = 0;
     byte* b = NULL;
+    wolfSSL_Malloc_cb  mc;
+    wolfSSL_Free_cb    fc;
+    wolfSSL_Realloc_cb rc;
 
+    /* Save existing memory callbacks */
+    if (wolfSSL_GetAllocators(&mc, &fc, &rc) != 0)
+        return -12103;
+
+    /* test realloc */
     b = (byte*)XREALLOC(b, 1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (b == NULL) {
+        ERROR_OUT(-12104, exit_memcb);
+    }
     XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
     b = NULL;
 
     /* Parameter Validation testing. */
     if (wolfSSL_SetAllocators(NULL, (wolfSSL_Free_cb)&my_Free_cb,
-            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG)
-        return -12100;
+            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG) {
+        ERROR_OUT(-12100, exit_memcb);
+    }
     if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)&my_Malloc_cb, NULL,
-            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG)
-        return -12101;
+            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG) {
+        ERROR_OUT(-12101, exit_memcb);
+    }
     if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)&my_Malloc_cb,
-            (wolfSSL_Free_cb)&my_Free_cb, NULL) != BAD_FUNC_ARG)
-        return -12102;
+            (wolfSSL_Free_cb)&my_Free_cb, NULL) != BAD_FUNC_ARG) {
+        ERROR_OUT(-12102, exit_memcb);
+    }
 
     /* Use API. */
     if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)&my_Malloc_cb,
-            (wolfSSL_Free_cb)&my_Free_cb, (wolfSSL_Realloc_cb)my_Realloc_cb)
-            != 0)
-        return -12100;
+        (wolfSSL_Free_cb)&my_Free_cb, (wolfSSL_Realloc_cb)my_Realloc_cb) != 0) {
+        ERROR_OUT(-12100, exit_memcb);
+    }
 
     b = (byte*)XMALLOC(1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
     b = (byte*)XREALLOC(b, 1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -12949,8 +13211,14 @@ int memcb_test()
 #else
     if (malloc_cnt != 0 || free_cnt != 0 || realloc_cnt != 0)
 #endif
-        return -12110;
-    return 0;
+        ret = -12110;
+
+exit_memcb:
+
+    /* restore memory callbacks */
+    wolfSSL_SetAllocators(mc, fc, rc);
+
+    return ret;
 }
 #endif
 
diff --git a/wolfcrypt/test/test.h b/wolfcrypt/test/test.h
index 47d8b74c2..d65b27f4f 100644
--- a/wolfcrypt/test/test.h
+++ b/wolfcrypt/test/test.h
@@ -28,7 +28,11 @@
     extern "C" {
 #endif
 
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD wolfcrypt_test(void* args);
+#else
 int wolfcrypt_test(void* args);
+#endif
 
 #ifdef __cplusplus
     }  /* extern "C" */
diff --git a/wolfssl/error-ssl.h b/wolfssl/error-ssl.h
index 236e4dfb7..941c379c5 100644
--- a/wolfssl/error-ssl.h
+++ b/wolfssl/error-ssl.h
@@ -153,6 +153,8 @@ enum wolfSSL_ErrorCodes {
     DTLS_POOL_SZ_E               = -415,   /* exceeded DTLS pool size */
     DECODE_E                     = -416,   /* decode handshake message error */
     HTTP_TIMEOUT                 = -417,   /* HTTP timeout for OCSP or CRL req */
+    WRITE_DUP_READ_E             = -418,   /* Write dup write side can't read */
+    WRITE_DUP_WRITE_E            = -419,   /* Write dup read side can't write */
     /* add strings to wolfSSL_ERR_reason_error_string in internal.c !!!!! */
 
     /* begin negotiation parameter errors */
diff --git a/wolfssl/internal.h b/wolfssl/internal.h
index a54ee754d..66d8e7eaa 100755
--- a/wolfssl/internal.h
+++ b/wolfssl/internal.h
@@ -81,18 +81,24 @@
 #ifdef WOLFSSL_SHA512
     #include 
 #endif
-
 #ifdef HAVE_AESGCM
     #include 
 #endif
-
 #ifdef WOLFSSL_RIPEMD
     #include 
 #endif
-
 #ifdef HAVE_IDEA
     #include 
 #endif
+#ifndef NO_RSA
+    #include 
+#endif
+#ifdef HAVE_ECC
+    #include 
+#endif
+#ifndef NO_DH
+    #include 
+#endif
 
 #include 
 
@@ -1401,7 +1407,7 @@ WOLFSSL_LOCAL
 void InitSuites(Suites*, ProtocolVersion, word16, word16, word16, word16,
                 word16, word16, word16, int);
 WOLFSSL_LOCAL
-int  SetCipherList(Suites*, const char* list);
+int  SetCipherList(WOLFSSL_CTX*, Suites*, const char* list);
 
 #ifndef PSK_TYPES_DEFINED
     typedef unsigned int (*wc_psk_client_callback)(WOLFSSL*, const char*, char*,
@@ -1778,7 +1784,7 @@ typedef struct {
 } CertificateStatusRequest;
 
 WOLFSSL_LOCAL int   TLSX_UseCertificateStatusRequest(TLSX** extensions,
-                                    byte status_type, byte options, void* heap);
+                                    byte status_type, byte options, void* heap, int devId);
 WOLFSSL_LOCAL int   TLSX_CSR_InitRequest(TLSX* extensions, DecodedCert* cert,
                                                                     void* heap);
 WOLFSSL_LOCAL void* TLSX_CSR_GetRequest(TLSX* extensions);
@@ -1800,7 +1806,7 @@ typedef struct CSRIv2 {
 } CertificateStatusRequestItemV2;
 
 WOLFSSL_LOCAL int   TLSX_UseCertificateStatusRequestV2(TLSX** extensions,
-                                    byte status_type, byte options, void* heap);
+                                    byte status_type, byte options, void* heap, int devId);
 WOLFSSL_LOCAL int   TLSX_CSR2_InitRequests(TLSX* extensions, DecodedCert* cert,
                                                        byte isPeer, void* heap);
 WOLFSSL_LOCAL void* TLSX_CSR2_GetRequest(TLSX* extensions, byte status_type,
@@ -2208,6 +2214,10 @@ typedef struct Ciphers {
 #endif
 #if defined(BUILD_AES) || defined(BUILD_AESGCM)
     Aes*    aes;
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        byte* additional;
+        byte* nonce;
+    #endif
 #endif
 #ifdef HAVE_CAMELLIA
     Camellia* cam;
@@ -2224,6 +2234,7 @@ typedef struct Ciphers {
 #ifdef HAVE_IDEA
     Idea* idea;
 #endif
+    byte    state;
     byte    setup;       /* have we set it up flag for detection */
 } Ciphers;
 
@@ -2246,10 +2257,12 @@ WOLFSSL_LOCAL void FreeCiphers(WOLFSSL* ssl);
 
 /* hashes type */
 typedef struct Hashes {
-    #ifndef NO_OLD_TLS
+    #if !defined(NO_MD5) && !defined(NO_OLD_TLS)
         byte md5[MD5_DIGEST_SIZE];
     #endif
-    byte sha[SHA_DIGEST_SIZE];
+    #if !defined(NO_SHA)
+        byte sha[SHA_DIGEST_SIZE];
+    #endif
     #ifndef NO_SHA256
         byte sha256[SHA256_DIGEST_SIZE];
     #endif
@@ -2354,16 +2367,6 @@ enum AcceptState {
     ACCEPT_THIRD_REPLY_DONE
 };
 
-/* sub-states for send/do key share (key exchange) */
-enum KeyShareState {
-    KEYSHARE_BEGIN = 0,
-    KEYSHARE_BUILD,
-    KEYSHARE_DO,
-    KEYSHARE_VERIFY,
-    KEYSHARE_FINALIZE,
-    KEYSHARE_END
-};
-
 /* buffers for struct WOLFSSL */
 typedef struct Buffers {
     bufferStatic    inputBuffer;
@@ -2385,6 +2388,7 @@ typedef struct Buffers {
     buffer          serverDH_G;            /* WOLFSSL_CTX owns, unless we own */
     buffer          serverDH_Pub;
     buffer          serverDH_Priv;
+    DhKey*          serverDH_Key;
 #endif
 #ifndef NO_CERTS
     DerBuffer*      certificate;           /* WOLFSSL_CTX owns, unless we own */
@@ -2496,8 +2500,8 @@ typedef struct Options {
     byte            minDowngrade;       /* minimum downgrade version */
     byte            connectState;       /* nonblocking resume */
     byte            acceptState;        /* nonblocking resume */
-    byte            keyShareState;      /* sub-state for key share (key exchange).
-                                           See enum KeyShareState. */
+    byte            asyncState;         /* sub-state for enum asyncState */
+    byte            buildMsgState;      /* sub-state for enum buildMsgState */
 #ifndef NO_DH
     word16          minDhKeySz;         /* minimum DH key size */
     word16          dhKeySz;            /* actual DH key size */
@@ -2513,6 +2517,7 @@ typedef struct Options {
 
 typedef struct Arrays {
     byte*           pendingMsg;         /* defrag buffer */
+    byte*           preMasterSecret;
     word32          preMasterSz;        /* differs for DH, actual size */
     word32          pendingMsgSz;       /* defrag buffer size */
     word32          pendingMsgOffset;   /* current offset into defrag buffer */
@@ -2526,7 +2531,6 @@ typedef struct Arrays {
     byte            serverRandom[RAN_LEN];
     byte            sessionID[ID_LEN];
     byte            sessionIDSz;
-    byte            preMasterSecret[ENCRYPT_LEN];
     byte            masterSecret[SECRET_LEN];
 #ifdef WOLFSSL_DTLS
     byte            cookie[MAX_COOKIE_LEN];
@@ -2730,14 +2734,12 @@ typedef struct MsgsReceived {
 typedef struct HS_Hashes {
     Hashes          verifyHashes;
     Hashes          certHashes;         /* for cert verify */
-#ifndef NO_OLD_TLS
 #ifndef NO_SHA
     Sha             hashSha;            /* sha hash of handshake msgs */
 #endif
-#ifndef NO_MD5
+#if !defined(NO_MD5) && !defined(NO_OLD_TLS)
     Md5             hashMd5;            /* md5 hash of handshake msgs */
 #endif
-#endif /* NO_OLD_TLS */
 #ifndef NO_SHA256
     Sha256          hashSha256;         /* sha256 hash of handshake msgs */
 #endif
@@ -2750,6 +2752,33 @@ typedef struct HS_Hashes {
 } HS_Hashes;
 
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #define MAX_ASYNC_ARGS 16
+    typedef void (*FreeArgsCb)(struct WOLFSSL* ssl, void* pArgs);
+
+    struct WOLFSSL_ASYNC {
+        WC_ASYNC_DEV* dev;
+        FreeArgsCb    freeArgs; /* function pointer to cleanup args */
+        word32        args[MAX_ASYNC_ARGS]; /* holder for current args */
+    };
+#endif
+
+#ifdef HAVE_WRITE_DUP
+
+    #define WRITE_DUP_SIDE 1
+    #define READ_DUP_SIDE 2
+
+    typedef struct WriteDup {
+        wolfSSL_Mutex   dupMutex;       /* reference count mutex */
+        int             dupCount;       /* reference count */
+        int             dupErr;         /* under dupMutex, pass to other side */
+    } WriteDup;
+
+    WOLFSSL_LOCAL void FreeWriteDup(WOLFSSL* ssl);
+    WOLFSSL_LOCAL int  NotifyWriteSide(WOLFSSL* ssl, int err);
+#endif /* HAVE_WRITE_DUP */
+
+
 /* wolfSSL ssl type */
 struct WOLFSSL {
     WOLFSSL_CTX*    ctx;
@@ -2762,6 +2791,11 @@ struct WOLFSSL {
     void*           verifyCbCtx;        /* cert verify callback user ctx*/
     VerifyCallback  verifyCallback;     /* cert verification callback */
     void*           heap;               /* for user overrides */
+#ifdef HAVE_WRITE_DUP
+    WriteDup*       dupWrite;           /* valid pointer indicates ON */
+             /* side that decrements dupCount to zero frees overall structure */
+    byte            dupSide;            /* write side or read side */
+#endif
 #ifdef WOLFSSL_STATIC_MEMORY
     WOLFSSL_HEAP_HINT heap_hint;
 #endif
@@ -2770,12 +2804,10 @@ struct WOLFSSL {
     void*           hsDoneCtx;         /*  user handshake cb context  */
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptSSLState  async;
-    AsyncCryptDev       asyncDev;
+    struct WOLFSSL_ASYNC async;
 #endif
-    void*           sigKey;             /* RsaKey or ecc_key allocated from heap */
-    word32          sigType;            /* Type of sigKey */
-    word32          sigLen;             /* Actual signature length */
+    void*           hsKey;              /* Handshake key (RsaKey or ecc_key) allocated from heap */
+    word32          hsType;             /* Type of Handshake key (hsKey) */
     WOLFSSL_CIPHER  cipher;
     hmacfp          hmac;
     Ciphers         encrypt;
@@ -2835,6 +2867,7 @@ struct WOLFSSL {
     ecc_key*        peerEccKey;              /* peer's  ECDHE key */
     ecc_key*        peerEccDsaKey;           /* peer's  ECDSA key */
     ecc_key*        eccTempKey;              /* private ECDHE key */
+    int             eccVerifyRes;
     word32          pkCurveOID;              /* curve Ecc_Sum     */
     word32          ecdhCurveOID;            /* curve Ecc_Sum     */
     word16          eccTempKeySz;            /* in octets 20 - 66 */
@@ -2956,16 +2989,13 @@ struct WOLFSSL {
 #ifdef WOLFSSL_JNI
         void* jObjectRef;     /* reference to WolfSSLSession in JNI wrapper */
 #endif /* WOLFSSL_JNI */
-#ifdef HAVE_WOLF_EVENT
-    WOLF_EVENT event;
-#endif /* HAVE_WOLF_EVENT */
 };
 
 
 WOLFSSL_LOCAL
-int  SetSSL_CTX(WOLFSSL*, WOLFSSL_CTX*);
+int  SetSSL_CTX(WOLFSSL*, WOLFSSL_CTX*, int);
 WOLFSSL_LOCAL
-int  InitSSL(WOLFSSL*, WOLFSSL_CTX*);
+int  InitSSL(WOLFSSL*, WOLFSSL_CTX*, int);
 WOLFSSL_LOCAL
 void FreeSSL(WOLFSSL*, void* heap);
 WOLFSSL_API void SSL_ResourceFree(WOLFSSL*);   /* Micrium uses */
@@ -3220,7 +3250,7 @@ WOLFSSL_LOCAL word32  LowResTimer(void);
     WOLFSSL_LOCAL int  CopyDecodedToX509(WOLFSSL_X509*, DecodedCert*);
 #endif
 
-/* used by ssl.c and wolfssl_int.c */
+/* used by ssl.c and internal.c */
 WOLFSSL_LOCAL void c32to24(word32 in, word24 out);
 
 WOLFSSL_LOCAL const char* const* GetCipherNames(void);
@@ -3240,19 +3270,14 @@ WOLFSSL_LOCAL int SetKeysSide(WOLFSSL*, enum encrypt_side);
 
 
 #ifndef NO_DH
-    WOLFSSL_LOCAL int DhGenKeyPair(WOLFSSL* ssl,
-        byte* p, word32 pSz,
-        byte* g, word32 gSz,
+    WOLFSSL_LOCAL int DhGenKeyPair(WOLFSSL* ssl, DhKey* dhKey,
         byte* priv, word32* privSz,
         byte* pub, word32* pubSz);
-    WOLFSSL_LOCAL int DhAgree(WOLFSSL* ssl,
-        byte* p, word32 pSz,
-        byte* g, word32 gSz,
-        byte* priv, word32* privSz,
-        byte* pub, word32* pubSz,
+    WOLFSSL_LOCAL int DhAgree(WOLFSSL* ssl, DhKey* dhKey,
+        const byte* priv, word32 privSz,
         const byte* otherPub, word32 otherPubSz,
         byte* agree, word32* agreeSz);
-#endif
+#endif /* !NO_DH */
 
 #ifdef HAVE_ECC
     WOLFSSL_LOCAL int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer);
@@ -3260,7 +3285,17 @@ WOLFSSL_LOCAL int SetKeysSide(WOLFSSL*, enum encrypt_side);
 
 WOLFSSL_LOCAL int BuildMessage(WOLFSSL* ssl, byte* output, int outSz,
                         const byte* input, int inSz, int type, int hashOutput,
-                        int sizeOnly);
+                        int sizeOnly, int asyncOkay);
+
+WOLFSSL_LOCAL int AllocKey(WOLFSSL* ssl, int type, void** pKey);
+WOLFSSL_LOCAL void FreeKey(WOLFSSL* ssl, int type, void** pKey);
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WOLFSSL_LOCAL int wolfSSL_AsyncPop(WOLFSSL* ssl, byte* state);
+    WOLFSSL_LOCAL int wolfSSL_AsyncPush(WOLFSSL* ssl, WC_ASYNC_DEV* asyncDev,
+                                        word32 flags);
+#endif
+
 
 #ifdef __cplusplus
     }  /* extern "C" */
diff --git a/wolfssl/openssl/md5.h b/wolfssl/openssl/md5.h
index 2e8620825..c4f05d30c 100644
--- a/wolfssl/openssl/md5.h
+++ b/wolfssl/openssl/md5.h
@@ -18,7 +18,7 @@
 
 
 typedef struct WOLFSSL_MD5_CTX {
-    int holder[24];   /* big enough to hold wolfcrypt md5, but check on init */
+    int holder[28 + (WC_ASYNC_DEV_SIZE / sizeof(int))];   /* big enough to hold wolfcrypt md5, but check on init */
 } WOLFSSL_MD5_CTX;
 
 WOLFSSL_API void wolfSSL_MD5_Init(WOLFSSL_MD5_CTX*);
diff --git a/wolfssl/openssl/sha.h b/wolfssl/openssl/sha.h
index d9e168129..7495d4a37 100644
--- a/wolfssl/openssl/sha.h
+++ b/wolfssl/openssl/sha.h
@@ -17,7 +17,8 @@
 
 
 typedef struct WOLFSSL_SHA_CTX {
-    int holder[24];   /* big enough to hold wolfcrypt sha, but check on init */
+    /* big enough to hold wolfcrypt Sha, but check on init */
+    int holder[28 + (WC_ASYNC_DEV_SIZE / sizeof(int))];
 } WOLFSSL_SHA_CTX;
 
 WOLFSSL_API void wolfSSL_SHA_Init(WOLFSSL_SHA_CTX*);
@@ -51,7 +52,8 @@ typedef WOLFSSL_SHA_CTX SHA_CTX;
  * struct are 16 byte aligned. Any derefrence to those elements after casting to
  * Sha224, is expected to also be 16 byte aligned addresses.  */
 typedef struct WOLFSSL_SHA224_CTX {
-    ALIGN16 long long holder[28];   /* big enough, but check on init */
+    /* big enough to hold wolfcrypt Sha224, but check on init */
+    ALIGN16 int holder[34 + (WC_ASYNC_DEV_SIZE / sizeof(int))];
 } WOLFSSL_SHA224_CTX;
 
 WOLFSSL_API void wolfSSL_SHA224_Init(WOLFSSL_SHA224_CTX*);
@@ -77,7 +79,8 @@ typedef WOLFSSL_SHA224_CTX SHA224_CTX;
  * struct are 16 byte aligned. Any derefrence to those elements after casting to
  * Sha256, is expected to also be 16 byte aligned addresses.  */
 typedef struct WOLFSSL_SHA256_CTX {
-    ALIGN16 int holder[28];   /* big enough to hold wolfcrypt sha, but check on init */
+    /* big enough to hold wolfcrypt Sha256, but check on init */
+    ALIGN16 int holder[34 + (WC_ASYNC_DEV_SIZE / sizeof(int))];
 } WOLFSSL_SHA256_CTX;
 
 WOLFSSL_API void wolfSSL_SHA256_Init(WOLFSSL_SHA256_CTX*);
@@ -100,7 +103,8 @@ typedef WOLFSSL_SHA256_CTX SHA256_CTX;
 #ifdef WOLFSSL_SHA384
 
 typedef struct WOLFSSL_SHA384_CTX {
-    long long holder[32];   /* big enough, but check on init */
+    /* big enough to hold wolfCrypt Sha384, but check on init */
+    long long holder[32 + (WC_ASYNC_DEV_SIZE / sizeof(long long))];
 } WOLFSSL_SHA384_CTX;
 
 WOLFSSL_API void wolfSSL_SHA384_Init(WOLFSSL_SHA384_CTX*);
@@ -124,7 +128,8 @@ typedef WOLFSSL_SHA384_CTX SHA384_CTX;
 #ifdef WOLFSSL_SHA512
 
 typedef struct WOLFSSL_SHA512_CTX {
-    long long holder[36];   /* big enough, but check on init */
+    /* big enough to hold wolfCrypt Sha384, but check on init */
+    long long holder[36 + (WC_ASYNC_DEV_SIZE / sizeof(long long))];
 } WOLFSSL_SHA512_CTX;
 
 WOLFSSL_API void wolfSSL_SHA512_Init(WOLFSSL_SHA512_CTX*);
diff --git a/wolfssl/ssl.h b/wolfssl/ssl.h
index 6e8bd8068..dcdfa6254 100644
--- a/wolfssl/ssl.h
+++ b/wolfssl/ssl.h
@@ -369,6 +369,7 @@ WOLFSSL_API int wolfSSL_use_RSAPrivateKey_file(WOLFSSL*, const char*, int);
 
 WOLFSSL_API WOLFSSL_CTX* wolfSSL_CTX_new(WOLFSSL_METHOD*);
 WOLFSSL_API WOLFSSL* wolfSSL_new(WOLFSSL_CTX*);
+WOLFSSL_API WOLFSSL* wolfSSL_write_dup(WOLFSSL*);
 WOLFSSL_API int  wolfSSL_set_fd (WOLFSSL*, int);
 WOLFSSL_API int  wolfSSL_set_write_fd (WOLFSSL*, int);
 WOLFSSL_API int  wolfSSL_set_read_fd (WOLFSSL*, int);
diff --git a/wolfssl/test.h b/wolfssl/test.h
index d47300a3d..08e347788 100644
--- a/wolfssl/test.h
+++ b/wolfssl/test.h
@@ -10,7 +10,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #ifdef ATOMIC_USER
     #include 
@@ -19,6 +18,9 @@
 #endif
 #ifdef HAVE_PK_CALLBACKS
     #include 
+    #ifndef NO_RSA
+        #include 
+    #endif
     #ifdef HAVE_ECC
         #include 
     #endif /* HAVE_ECC */
@@ -245,37 +247,37 @@
 
 /* all certs relative to wolfSSL home directory now */
 #if defined(WOLFSSL_NO_CURRDIR) || defined(WOLFSSL_MDK_SHELL)
-#define caCert     "certs/ca-cert.pem"
-#define eccCert    "certs/server-ecc.pem"
-#define eccKey     "certs/ecc-key.pem"
-#define svrCert    "certs/server-cert.pem"
-#define svrKey     "certs/server-key.pem"
-#define cliCert    "certs/client-cert.pem"
-#define cliKey     "certs/client-key.pem"
-#define ntruCert   "certs/ntru-cert.pem"
-#define ntruKey    "certs/ntru-key.raw"
-#define dhParam    "certs/dh2048.pem"
-#define cliEccKey  "certs/ecc-client-key.pem"
-#define cliEccCert "certs/client-ecc-cert.pem"
-#define crlPemDir  "certs/crl"
+#define caCertFile     "certs/ca-cert.pem"
+#define eccCertFile    "certs/server-ecc.pem"
+#define eccKeyFile     "certs/ecc-key.pem"
+#define svrCertFile    "certs/server-cert.pem"
+#define svrKeyFile     "certs/server-key.pem"
+#define cliCertFile    "certs/client-cert.pem"
+#define cliKeyFile     "certs/client-key.pem"
+#define ntruCertFile   "certs/ntru-cert.pem"
+#define ntruKeyFile    "certs/ntru-key.raw"
+#define dhParamFile    "certs/dh2048.pem"
+#define cliEccKeyFile  "certs/ecc-client-key.pem"
+#define cliEccCertFile "certs/client-ecc-cert.pem"
+#define crlPemDir      "certs/crl"
 #ifdef HAVE_WNR
     /* Whitewood netRandom default config file */
     #define wnrConfig  "wnr-example.conf"
 #endif
 #else
-#define caCert     "./certs/ca-cert.pem"
-#define eccCert    "./certs/server-ecc.pem"
-#define eccKey     "./certs/ecc-key.pem"
-#define svrCert    "./certs/server-cert.pem"
-#define svrKey     "./certs/server-key.pem"
-#define cliCert    "./certs/client-cert.pem"
-#define cliKey     "./certs/client-key.pem"
-#define ntruCert   "./certs/ntru-cert.pem"
-#define ntruKey    "./certs/ntru-key.raw"
-#define dhParam    "./certs/dh2048.pem"
-#define cliEccKey  "./certs/ecc-client-key.pem"
-#define cliEccCert "./certs/client-ecc-cert.pem"
-#define crlPemDir  "./certs/crl"
+#define caCertFile     "./certs/ca-cert.pem"
+#define eccCertFile    "./certs/server-ecc.pem"
+#define eccKeyFile     "./certs/ecc-key.pem"
+#define svrCertFile    "./certs/server-cert.pem"
+#define svrKeyFile     "./certs/server-key.pem"
+#define cliCertFile    "./certs/client-cert.pem"
+#define cliKeyFile     "./certs/client-key.pem"
+#define ntruCertFile   "./certs/ntru-cert.pem"
+#define ntruKeyFile    "./certs/ntru-key.raw"
+#define dhParamFile    "./certs/dh2048.pem"
+#define cliEccKeyFile  "./certs/ecc-client-key.pem"
+#define cliEccCertFile "./certs/client-ecc-cert.pem"
+#define crlPemDir      "./certs/crl"
 #ifdef HAVE_WNR
     /* Whitewood netRandom default config file */
     #define wnrConfig  "./wnr-example.conf"
@@ -1099,7 +1101,53 @@ static INLINE unsigned int my_psk_server_cb(WOLFSSL* ssl, const char* identity,
 #endif /* USE_WINDOWS_API */
 
 
-#if defined(NO_FILESYSTEM) && !defined(NO_CERTS) && defined(FORCE_BUFFER_TEST)
+#if !defined(NO_CERTS)
+    #if !defined(NO_FILESYSTEM) || \
+        (defined(NO_FILESYSTEM) && defined(FORCE_BUFFER_TEST))
+
+    /* reads file size, allocates buffer, reads into buffer, returns buffer */
+    static INLINE int load_file(const char* fname, byte** buf, size_t* bufLen)
+    {
+        int ret;
+        FILE* file;
+
+        if (fname == NULL || buf == NULL || bufLen == NULL)
+            return BAD_FUNC_ARG;
+
+        /* set defaults */
+        *buf = NULL;
+        *bufLen = 0;
+
+        /* open file (read-only binary) */
+        file = fopen(fname, "rb");
+        if (!file) {
+            printf("Error loading %s\n", fname);
+            return BAD_PATH_ERROR;
+        }
+
+        fseek(file, 0, SEEK_END);
+        *bufLen = ftell(file);
+        rewind(file);
+        if (*bufLen > 0) {
+            *buf = (byte*)malloc(*bufLen);
+            if (*buf == NULL) {
+                ret = MEMORY_E;
+                printf("Error allocating %lu bytes\n", (unsigned long)*bufLen);
+            }
+            else {
+                size_t readLen = fread(*buf, *bufLen, 1, file);
+
+                /* check response code */
+                ret = (readLen > 0) ? 0 : -1;
+            }
+        }
+        else {
+            ret = BUFFER_E;
+        }
+        fclose(file);
+
+        return ret;
+    }
 
     enum {
         WOLFSSL_CA   = 1,
@@ -1111,49 +1159,44 @@ static INLINE unsigned int my_psk_server_cb(WOLFSSL* ssl, const char* identity,
     static INLINE void load_buffer(WOLFSSL_CTX* ctx, const char* fname, int type)
     {
         int format = SSL_FILETYPE_PEM;
+        byte* buff = NULL;
+        size_t sz = 0;
 
-        /* test buffer load */
-        long  sz = 0;
-        byte  buff[10000];
-        FILE* file = fopen(fname, "rb");
-
-        if (!file)
+        if (load_file(fname, &buff, &sz) != 0) {
             err_sys("can't open file for buffer load "
                     "Please run from wolfSSL home directory if not");
-        fseek(file, 0, SEEK_END);
-        sz = ftell(file);
-        rewind(file);
-        fread(buff, sizeof(buff), 1, file);
+        }
 
         /* determine format */
         if (strstr(fname, ".der"))
             format = SSL_FILETYPE_ASN1;
 
         if (type == WOLFSSL_CA) {
-            if (wolfSSL_CTX_load_verify_buffer(ctx, buff, sz, format)
+            if (wolfSSL_CTX_load_verify_buffer(ctx, buff, (long)sz, format)
                                               != SSL_SUCCESS)
                 err_sys("can't load buffer ca file");
         }
         else if (type == WOLFSSL_CERT) {
-            if (wolfSSL_CTX_use_certificate_buffer(ctx, buff, sz,
+            if (wolfSSL_CTX_use_certificate_buffer(ctx, buff, (long)sz,
                         format) != SSL_SUCCESS)
                 err_sys("can't load buffer cert file");
         }
         else if (type == WOLFSSL_KEY) {
-            if (wolfSSL_CTX_use_PrivateKey_buffer(ctx, buff, sz,
+            if (wolfSSL_CTX_use_PrivateKey_buffer(ctx, buff, (long)sz,
                         format) != SSL_SUCCESS)
                 err_sys("can't load buffer key file");
         }
         else if (type == WOLFSSL_CERT_CHAIN) {
-            if (wolfSSL_CTX_use_certificate_chain_buffer_format(ctx, buff, sz,
-                        format) != SSL_SUCCESS)
+            if (wolfSSL_CTX_use_certificate_chain_buffer_format(ctx, buff,
+                    (long)sz, format) != SSL_SUCCESS)
                 err_sys("can't load cert chain buffer");
         }
 
-        fclose(file);
+        if (buff)
+            free(buff);
     }
-
-#endif /* NO_FILESYSTEM */
+    #endif /* !NO_FILESYSTEM || (NO_FILESYSTEM && FORCE_BUFFER_TEST) */
+#endif /* !NO_CERTS */
 
 #ifdef VERIFY_CALLBACK
 
@@ -1310,7 +1353,7 @@ static INLINE void CaCb(unsigned char* der, int sz, int type)
             int depth, res;
             FILE* file;
             for(depth = 0; depth <= MAX_WOLF_ROOT_DEPTH; depth++) {
-                file = fopen(ntruKey, "rb");
+                file = fopen(ntruKeyFile, "rb");
                 if (file != NULL) {
                     fclose(file);
                     return depth;
@@ -1339,9 +1382,10 @@ static INLINE void CaCb(unsigned char* der, int sz, int type)
 typedef THREAD_RETURN WOLFSSL_THREAD (*thread_func)(void* args);
 
 
-static INLINE void StackSizeCheck(func_args* args, thread_func tf)
+static INLINE int StackSizeCheck(func_args* args, thread_func tf)
 {
     int            ret, i, used;
+    void*          status;
     unsigned char* myStack = NULL;
     int            stackSize = 1024*128;
     pthread_attr_t myAttr;
@@ -1372,7 +1416,7 @@ static INLINE void StackSizeCheck(func_args* args, thread_func tf)
         exit(EXIT_FAILURE);
     }
 
-    ret = pthread_join(threadId, NULL);
+    ret = pthread_join(threadId, &status);
     if (ret != 0)
         err_sys("pthread_join failed");
 
@@ -1382,8 +1426,12 @@ static INLINE void StackSizeCheck(func_args* args, thread_func tf)
         }
     }
 
+    free(myStack);
+
     used = stackSize - i;
     printf("stack used = %d\n", used);
+
+    return (int)((size_t)status);
 }
 
 
diff --git a/wolfssl/version.h b/wolfssl/version.h
index 5b055a0dc..23ab61358 100644
--- a/wolfssl/version.h
+++ b/wolfssl/version.h
@@ -28,8 +28,8 @@
 extern "C" {
 #endif
 
-#define LIBWOLFSSL_VERSION_STRING "3.10.3"
-#define LIBWOLFSSL_VERSION_HEX 0x03010003
+#define LIBWOLFSSL_VERSION_STRING "3.10.4"
+#define LIBWOLFSSL_VERSION_HEX 0x03010004
 
 #ifdef __cplusplus
 }
diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h
old mode 100644
new mode 100755
index 2b3c4e576..785edc181
--- a/wolfssl/wolfcrypt/aes.h
+++ b/wolfssl/wolfcrypt/aes.h
@@ -73,6 +73,7 @@ typedef struct Aes {
     /* AESNI needs key first, rounds 2nd, not sure why yet */
     ALIGN16 word32 key[60];
     word32  rounds;
+    int     keylen;
 
     ALIGN16 word32 reg[AES_BLOCK_SIZE / sizeof(word32)];      /* for CBC mode */
     ALIGN16 word32 tmp[AES_BLOCK_SIZE / sizeof(word32)];      /* same         */
@@ -88,10 +89,9 @@ typedef struct Aes {
     byte use_aesni;
 #endif /* WOLFSSL_AESNI */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
-    #ifdef HAVE_CAVIUM
-        AesType type;                       /* aes key type */
-    #endif
+    const byte* asyncKey;
+    const byte* asyncIv;
+    WC_ASYNC_DEV asyncDev;
 #endif /* WOLFSSL_ASYNC_CRYPT */
 #ifdef WOLFSSL_AES_COUNTER
     word32  left;            /* unused bytes left from last call */
@@ -99,10 +99,6 @@ typedef struct Aes {
 #ifdef WOLFSSL_PIC32MZ_CRYPT
     word32 key_ce[AES_BLOCK_SIZE*2/sizeof(word32)] ;
     word32 iv_ce [AES_BLOCK_SIZE  /sizeof(word32)] ;
-    int    keylen ;
-#endif
-#ifdef WOLFSSL_TI_CRYPT
-    int    keylen ;
 #endif
     void*  heap; /* memory hint to use */
 } Aes;
@@ -115,7 +111,20 @@ typedef struct Gmac {
 #endif /* HAVE_AESGCM */
 #endif /* HAVE_FIPS */
 
-WOLFSSL_LOCAL int  wc_InitAes_h(Aes* aes, void* h);
+
+/* Authenticate cipher function prototypes */
+typedef int (*wc_AesAuthEncryptFunc)(Aes* aes, byte* out,
+                                   const byte* in, word32 sz,
+                                   const byte* iv, word32 ivSz,
+                                   byte* authTag, word32 authTagSz,
+                                   const byte* authIn, word32 authInSz);
+typedef int (*wc_AesAuthDecryptFunc)(Aes* aes, byte* out,
+                                   const byte* in, word32 sz,
+                                   const byte* iv, word32 ivSz,
+                                   const byte* authTag, word32 authTagSz,
+                                   const byte* authIn, word32 authInSz);
+
+/* AES-CBC */
 WOLFSSL_API int  wc_AesSetKey(Aes* aes, const byte* key, word32 len,
                               const byte* iv, int dir);
 WOLFSSL_API int  wc_AesSetIV(Aes* aes, const byte* iv);
@@ -187,10 +196,8 @@ WOLFSSL_API int wc_AesEcbDecrypt(Aes* aes, byte* out,
 
 WOLFSSL_API int wc_AesGetKeySize(Aes* aes, word32* keySize);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-     WOLFSSL_API int  wc_AesAsyncInit(Aes*, int);
-     WOLFSSL_API void wc_AesAsyncFree(Aes*);
-#endif
+WOLFSSL_API int  wc_AesInit(Aes*, void*, int);
+WOLFSSL_API void wc_AesFree(Aes*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/arc4.h b/wolfssl/wolfcrypt/arc4.h
index 752f1d062..aab0fb984 100644
--- a/wolfssl/wolfcrypt/arc4.h
+++ b/wolfssl/wolfcrypt/arc4.h
@@ -45,17 +45,16 @@ typedef struct Arc4 {
     byte y;
     byte state[ARC4_STATE_SIZE];
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
 #endif
+    void* heap;
 } Arc4;
 
-WOLFSSL_API void wc_Arc4Process(Arc4*, byte*, const byte*, word32);
-WOLFSSL_API void wc_Arc4SetKey(Arc4*, const byte*, word32);
+WOLFSSL_API int wc_Arc4Process(Arc4*, byte*, const byte*, word32);
+WOLFSSL_API int wc_Arc4SetKey(Arc4*, const byte*, word32);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_Arc4AsyncInit(Arc4*, int);
-    WOLFSSL_API void wc_Arc4AsyncFree(Arc4*);
-#endif
+WOLFSSL_API int  wc_Arc4Init(Arc4*, void*, int);
+WOLFSSL_API void wc_Arc4Free(Arc4*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/asn.h b/wolfssl/wolfcrypt/asn.h
index b7377b9fd..f4b0c5e0b 100644
--- a/wolfssl/wolfcrypt/asn.h
+++ b/wolfssl/wolfcrypt/asn.h
@@ -28,9 +28,6 @@
 #ifndef NO_ASN
 
 #include 
-#ifndef NO_RSA
-    #include 
-#endif
 
 /* fips declare of RsaPrivateKeyDecode @wc_fips */
 #if defined(HAVE_FIPS) && !defined(NO_RSA)
@@ -51,9 +48,7 @@
 #endif
 #include 
 #include    /* public interface */
-#ifdef HAVE_ECC
-    #include 
-#endif
+
 
 #ifdef __cplusplus
     extern "C" {
@@ -418,6 +413,60 @@ struct DecodedName {
     int     serialLen;
 };
 
+enum SignatureState {
+    SIG_STATE_BEGIN,
+    SIG_STATE_HASH,
+    SIG_STATE_KEY,
+    SIG_STATE_DO,
+    SIG_STATE_CHECK,
+};
+
+struct SignatureCtx {
+    void* heap;
+    byte* digest;
+#ifndef NO_RSA
+    byte* out;
+    byte* plain;
+#endif
+#ifdef HAVE_ECC
+    int verify;
+#endif
+    union {
+    #ifndef NO_RSA
+        struct RsaKey* rsa;
+    #endif
+    #ifdef HAVE_ECC
+        struct ecc_key* ecc;
+    #endif
+        void* ptr;
+    } key;
+    int devId;
+    int state;
+    int typeH;
+    int digestSz;
+    word32 keyOID;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV* asyncDev;
+#endif
+};
+
+enum CertSignState {
+    CERTSIGN_STATE_BEGIN,
+    CERTSIGN_STATE_DIGEST,
+    CERTSIGN_STATE_ENCODE,
+    CERTSIGN_STATE_DO,
+};
+
+struct CertSignCtx {
+    byte* sig;
+    byte* digest;
+    #ifndef NO_RSA
+        byte* encSig;
+        int encSigSz;
+    #endif
+    int state; /* enum CertSignState */
+};
+
 
 typedef struct DecodedCert DecodedCert;
 typedef struct DecodedName DecodedName;
@@ -425,6 +474,8 @@ typedef struct Signer      Signer;
 #ifdef WOLFSSL_TRUST_PEER_CERT
 typedef struct TrustedPeerCert TrustedPeerCert;
 #endif /* WOLFSSL_TRUST_PEER_CERT */
+typedef struct SignatureCtx SignatureCtx;
+typedef struct CertSignCtx  CertSignCtx;
 
 
 struct DecodedCert {
@@ -566,6 +617,9 @@ struct DecodedCert {
     char    extCertPolicies[MAX_CERTPOL_NB][MAX_CERTPOL_SZ];
     int     extCertPoliciesNb;
 #endif /* WOLFSSL_CERT_EXT */
+
+    Signer* ca;
+    SignatureCtx sigCtx;
 };
 
 
@@ -747,6 +801,10 @@ WOLFSSL_LOCAL int wc_CheckPrivateKey(byte* key, word32 keySz, DecodedCert* der);
                                        mp_int* r, mp_int* s);
 #endif
 
+WOLFSSL_LOCAL void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId);
+WOLFSSL_LOCAL void FreeSignatureCtx(SignatureCtx* sigCtx);
+
+
 #ifdef WOLFSSL_CERT_GEN
 
 enum cert_enums {
diff --git a/wolfssl/wolfcrypt/asn_public.h b/wolfssl/wolfcrypt/asn_public.h
index 1805deb26..f70a4bca7 100644
--- a/wolfssl/wolfcrypt/asn_public.h
+++ b/wolfssl/wolfcrypt/asn_public.h
@@ -25,17 +25,16 @@
 #define WOLF_CRYPT_ASN_PUBLIC_H
 
 #include 
-#ifdef HAVE_ECC
-    #include 
-#endif
-#if defined(WOLFSSL_CERT_GEN) && !defined(NO_RSA)
-    #include 
-#endif
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+/* Opaque keys. Only key pointers are used for arguments */
+typedef struct ecc_key ecc_key;
+typedef struct RsaKey RsaKey;
+typedef struct WC_RNG WC_RNG;
+
 /* Certificate file Type */
 enum CertType {
     CERT_TYPE       = 0,
@@ -95,14 +94,8 @@ enum Ctc_Misc {
 #endif /* WOLFSSL_CERT_EXT */
 };
 
-#ifdef WOLFSSL_CERT_GEN
 
-#ifndef HAVE_ECC
-    typedef struct ecc_key ecc_key;
-#endif
-#ifdef NO_RSA
-    typedef struct RsaKey RsaKey;
-#endif
+#ifdef WOLFSSL_CERT_GEN
 
 typedef struct CertName {
     char country[CTC_NAME_SIZE];
diff --git a/wolfssl/wolfcrypt/des3.h b/wolfssl/wolfcrypt/des3.h
index 409aa81f7..5eb94a1c8 100644
--- a/wolfssl/wolfcrypt/des3.h
+++ b/wolfssl/wolfcrypt/des3.h
@@ -80,11 +80,15 @@ typedef struct Des3 {
     word32 reg[DES_BLOCK_SIZE / sizeof(word32)];      /* for CBC mode */
     word32 tmp[DES_BLOCK_SIZE / sizeof(word32)];      /* same         */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    const byte* key_raw;
+    const byte* iv_raw;
+    WC_ASYNC_DEV asyncDev;
 #endif
+    void* heap;
 } Des3;
 #endif /* HAVE_FIPS */
 
+
 WOLFSSL_API int  wc_Des_SetKey(Des* des, const byte* key,
                                const byte* iv, int dir);
 WOLFSSL_API void wc_Des_SetIV(Des* des, const byte* iv);
@@ -109,10 +113,10 @@ WOLFSSL_API int  wc_Des3_CbcEncrypt(Des3* des, byte* out,
 WOLFSSL_API int  wc_Des3_CbcDecrypt(Des3* des, byte* out,
                                     const byte* in,word32 sz);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_Des3AsyncInit(Des3*, int);
-    WOLFSSL_API void wc_Des3AsyncFree(Des3*);
-#endif
+/* These are only required when using either:
+  static memory (WOLFSSL_STATIC_MEMORY) or asynchronous (WOLFSSL_ASYNC_CRYPT) */
+WOLFSSL_API int  wc_Des3Init(Des3*, void*, int);
+WOLFSSL_API void wc_Des3Free(Des3*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/dh.h b/wolfssl/wolfcrypt/dh.h
index 2410ab777..be40c5c11 100644
--- a/wolfssl/wolfcrypt/dh.h
+++ b/wolfssl/wolfcrypt/dh.h
@@ -34,14 +34,22 @@
     extern "C" {
 #endif
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
 
 /* Diffie-Hellman Key */
 typedef struct DhKey {
     mp_int p, g;                            /* group parameters  */
+    void* heap;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif
 } DhKey;
 
 
 WOLFSSL_API int wc_InitDhKey(DhKey* key);
+WOLFSSL_API int wc_InitDhKey_ex(DhKey* key, void* heap, int devId);
 WOLFSSL_API void wc_FreeDhKey(DhKey* key);
 
 WOLFSSL_API int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng, byte* priv,
@@ -58,7 +66,6 @@ WOLFSSL_API int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p,
                             word32* pInOutSz, byte* g, word32* gInOutSz);
 WOLFSSL_API int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz);
 
-
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h
index c06091859..beb3ec54f 100644
--- a/wolfssl/wolfcrypt/ecc.h
+++ b/wolfssl/wolfcrypt/ecc.h
@@ -36,6 +36,9 @@
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
+    #ifdef WOLFSSL_CERT_GEN
+        #include 
+    #endif
 #endif
 
 #ifdef WOLFSSL_ATECC508A
@@ -105,7 +108,7 @@ enum {
     ECC_MAXSIZE_GEN = 74,   /* MAX Buffer size required when generating ECC keys*/
     ECC_MAX_PAD_SZ  = 4,    /* ECC maximum padding size */
     ECC_MAX_OID_LEN = 16,
-    ECC_MAX_SIG_SIZE= ((MAX_ECC_BYTES * 2) + SIG_HEADER_SZ)
+    ECC_MAX_SIG_SIZE= ((MAX_ECC_BYTES * 2) + ECC_MAX_PAD_SZ + SIG_HEADER_SZ)
 };
 
 /* Curve Types */
@@ -234,6 +237,7 @@ typedef struct alt_fp_int {
 } alt_fp_int;
 #endif /* ALT_ECC_SIZE */
 
+
 /* A point on an ECC curve, stored in Jacbobian format such that (x,y,z) =>
    (x/z^2, y/z^3, 1) when interpreted as affine */
 typedef struct {
@@ -276,10 +280,13 @@ typedef struct ecc_key {
     mp_int    k;        /* private key */
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    mp_int*   r;        /* sign/verify temps */
-    mp_int*   s;
-    AsyncCryptDev asyncDev;
-#endif
+    mp_int* r;          /* sign/verify temps */
+    mp_int* s;
+    WC_ASYNC_DEV asyncDev;
+    #ifdef WOLFSSL_CERT_GEN
+        CertSignCtx certSignCtx; /* context info for cert sign (MakeSignature) */
+    #endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } ecc_key;
 
 
@@ -376,6 +383,11 @@ WOLFSSL_API
 int wc_ecc_get_curve_size_from_name(const char* curveName);
 WOLFSSL_API
 int wc_ecc_get_curve_id_from_name(const char* curveName);
+WOLFSSL_API
+int wc_ecc_get_curve_id_from_params(int fieldSize,
+        const byte* prime, word32 primeSz, const byte* Af, word32 AfSz,
+        const byte* Bf, word32 BfSz, const byte* order, word32 orderSz,
+        const byte* Gx, word32 GxSz, const byte* Gy, word32 GySz, int cofactor);
 
 #ifndef WOLFSSL_ATECC508A
 
@@ -542,14 +554,10 @@ WOLFSSL_API int wc_X963_KDF(enum wc_HashType type, const byte* secret,
 #endif
 
 #ifdef ECC_CACHE_CURVE
+WOLFSSL_API int wc_ecc_curve_cache_init(void);
 WOLFSSL_API void wc_ecc_curve_cache_free(void);
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int wc_ecc_async_handle(ecc_key* key,
-        WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
-    WOLFSSL_API int wc_ecc_async_wait(int ret, ecc_key* key);
-#endif
 
 #ifdef __cplusplus
     }    /* extern "C" */
diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h
index 075336911..df29accc4 100644
--- a/wolfssl/wolfcrypt/error-crypt.h
+++ b/wolfssl/wolfcrypt/error-crypt.h
@@ -186,6 +186,9 @@ enum {
     DH_CHECK_PUB_E      = -243,  /* DH Check Pub Key error */
     BAD_PATH_ERROR      = -244,  /* Bad path for opendir */
 
+    ASYNC_OP_E          = -245,  /* Async operation error */
+
+    WC_LAST_E           = -245,  /* Update this to indicate last error */
     MIN_CODE_E          = -300   /* errors -101 - -299 */
 
     /* add new companion error id strings for any new error codes
diff --git a/wolfssl/wolfcrypt/fe_operations.h b/wolfssl/wolfcrypt/fe_operations.h
index 0696b6789..5bbe1b299 100644
--- a/wolfssl/wolfcrypt/fe_operations.h
+++ b/wolfssl/wolfcrypt/fe_operations.h
@@ -43,6 +43,8 @@ Bounds on each t[i] vary depending on context.
 #ifdef CURVED25519_SMALL
     #define F25519_SIZE	32
     typedef byte     fe[32];
+#elif defined(HAVE___UINT128_T)
+    typedef int64_t  fe[5];
 #else
     typedef int32_t  fe[10];
 #endif
diff --git a/wolfssl/wolfcrypt/hash.h b/wolfssl/wolfcrypt/hash.h
index fa1883bc6..beaeb65ea 100644
--- a/wolfssl/wolfcrypt/hash.h
+++ b/wolfssl/wolfcrypt/hash.h
@@ -114,59 +114,32 @@ WOLFSSL_API int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type,
 
 #ifndef NO_MD5
 #include 
-WOLFSSL_API void wc_Md5GetHash(Md5*, byte*);
-WOLFSSL_API void wc_Md5RestorePos(Md5*, Md5*);
-#if defined(WOLFSSL_TI_HASH)
-    WOLFSSL_API void wc_Md5Free(Md5*);
-#else
-    #define wc_Md5Free(d)
-#endif
+WOLFSSL_API int wc_Md5Hash(const byte* data, word32 len, byte* hash);
 #endif
 
 #ifndef NO_SHA
 #include 
-WOLFSSL_API int wc_ShaGetHash(Sha*, byte*);
-WOLFSSL_API void wc_ShaRestorePos(Sha*, Sha*);
 WOLFSSL_API int wc_ShaHash(const byte*, word32, byte*);
-#if defined(WOLFSSL_TI_HASH)
-     WOLFSSL_API void wc_ShaFree(Sha*);
-#else
-    #define wc_ShaFree(d)
-#endif
 #endif
 
 #ifndef NO_SHA256
 #include 
-WOLFSSL_API int wc_Sha256GetHash(Sha256*, byte*);
-WOLFSSL_API void wc_Sha256RestorePos(Sha256*, Sha256*);
 WOLFSSL_API int wc_Sha256Hash(const byte*, word32, byte*);
-#if defined(WOLFSSL_TI_HASH)
-    WOLFSSL_API void wc_Sha256Free(Sha256*);
-#else
-    #define wc_Sha256Free(d)
-#endif
 
     #if defined(WOLFSSL_SHA224)
-        WOLFSSL_API int wc_Sha224GetHash(Sha224*, byte*);
         WOLFSSL_API int wc_Sha224Hash(const byte*, word32, byte*);
-        #define wc_Sha224Free(d)
     #endif /* defined(WOLFSSL_SHA224) */
 #endif
 
 #ifdef WOLFSSL_SHA512
 #include 
-WOLFSSL_API int wc_Sha512GetHash(Sha512*, byte*);
 WOLFSSL_API int wc_Sha512Hash(const byte*, word32, byte*);
-#define wc_Sha512Free(d)
 
     #if defined(WOLFSSL_SHA384)
-        WOLFSSL_API int wc_Sha384GetHash(Sha384*, byte*);
         WOLFSSL_API int wc_Sha384Hash(const byte*, word32, byte*);
-        #define wc_Sha384Free(d)
     #endif /* defined(WOLFSSL_SHA384) */
 #endif /* WOLFSSL_SHA512 */
 
-
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
diff --git a/wolfssl/wolfcrypt/hmac.h b/wolfssl/wolfcrypt/hmac.h
index 1d4930664..bf7154a64 100644
--- a/wolfssl/wolfcrypt/hmac.h
+++ b/wolfssl/wolfcrypt/hmac.h
@@ -58,7 +58,7 @@
     extern "C" {
 #endif
 #ifndef HAVE_FIPS
-        
+
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
 #endif
@@ -95,7 +95,7 @@ enum {
 /* Select the largest available hash for the buffer size. */
 #if defined(WOLFSSL_SHA512)
     MAX_DIGEST_SIZE = SHA512_DIGEST_SIZE,
-    HMAC_BLOCK_SIZE = SHA512_BLOCK_SIZE
+    HMAC_BLOCK_SIZE = SHA512_BLOCK_SIZE,
 #elif defined(HAVE_BLAKE2)
     MAX_DIGEST_SIZE = BLAKE2B_OUTBYTES,
     HMAC_BLOCK_SIZE = BLAKE2B_BLOCKBYTES,
@@ -110,10 +110,10 @@ enum {
     HMAC_BLOCK_SIZE = SHA224_BLOCK_SIZE
 #elif !defined(NO_SHA)
     MAX_DIGEST_SIZE = SHA_DIGEST_SIZE,
-    HMAC_BLOCK_SIZE = SHA_BLOCK_SIZE
+    HMAC_BLOCK_SIZE = SHA_BLOCK_SIZE,
 #elif !defined(NO_MD5)
     MAX_DIGEST_SIZE = MD5_DIGEST_SIZE,
-    HMAC_BLOCK_SIZE = MD5_BLOCK_SIZE
+    HMAC_BLOCK_SIZE = MD5_BLOCK_SIZE,
 #else
     #error "You have to have some kind of hash if you want to use HMAC."
 #endif
@@ -122,27 +122,27 @@ enum {
 
 /* hash union */
 typedef union {
-    #ifndef NO_MD5
-        Md5 md5;
-    #endif
-    #ifndef NO_SHA
-        Sha sha;
-    #endif
-    #ifdef WOLFSSL_SHA224
-        Sha224 sha224;
-    #endif
-    #ifndef NO_SHA256
-        Sha256 sha256;
-    #endif
-    #ifdef WOLFSSL_SHA384
-        Sha384 sha384;
-    #endif
-    #ifdef WOLFSSL_SHA512
-        Sha512 sha512;
-    #endif
-    #ifdef HAVE_BLAKE2
-        Blake2b blake2b;
-    #endif
+#ifndef NO_MD5
+    Md5 md5;
+#endif
+#ifndef NO_SHA
+    Sha sha;
+#endif
+#ifdef WOLFSSL_SHA224
+    Sha224 sha224;
+#endif
+#ifndef NO_SHA256
+    Sha256 sha256;
+#endif
+#ifdef WOLFSSL_SHA512
+#ifdef WOLFSSL_SHA384
+    Sha384 sha384;
+#endif
+    Sha512 sha512;
+#endif
+#ifdef HAVE_BLAKE2
+    Blake2b blake2b;
+#endif
 } Hash;
 
 /* Hmac digest */
@@ -154,13 +154,14 @@ typedef struct Hmac {
     void*   heap;                 /* heap hint */
     byte    macType;              /* md5 sha or sha256 */
     byte    innerHashKeyed;       /* keyed flag */
+
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
+    byte         keyRaw[HMAC_BLOCK_SIZE];
+    word16       keyLen;          /* hmac key length */
     #ifdef HAVE_CAVIUM
-        word16   keyLen;          /* hmac key length */
-        word16   dataLen;
-        HashType type;            /* hmac key type */
         byte*    data;            /* buffered input data for one call */
+        word16   dataLen;
     #endif /* HAVE_CAVIUM */
 #endif /* WOLFSSL_ASYNC_CRYPT */
 } Hmac;
@@ -172,23 +173,17 @@ WOLFSSL_API int wc_HmacSetKey(Hmac*, int type, const byte* key, word32 keySz);
 WOLFSSL_API int wc_HmacUpdate(Hmac*, const byte*, word32);
 WOLFSSL_API int wc_HmacFinal(Hmac*, byte*);
 WOLFSSL_API int wc_HmacSizeByType(int type);
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_HmacAsyncInit(Hmac*, int);
-    WOLFSSL_API void wc_HmacAsyncFree(Hmac*);
-#endif
-
 
+WOLFSSL_API int wc_HmacInit(Hmac* hmac, void* heap, int devId);
+WOLFSSL_API void wc_HmacFree(Hmac*);
 
 WOLFSSL_API int wolfSSL_GetHmacMaxSize(void);
 
-
 #ifdef HAVE_HKDF
-
-WOLFSSL_API int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
-                    const byte* salt, word32 saltSz,
-                    const byte* info, word32 infoSz,
-                    byte* out, word32 outSz);
-
+    WOLFSSL_API int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+                        const byte* salt, word32 saltSz,
+                        const byte* info, word32 infoSz,
+                        byte* out, word32 outSz);
 #endif /* HAVE_HKDF */
 
 #ifdef __cplusplus
diff --git a/wolfssl/wolfcrypt/include.am b/wolfssl/wolfcrypt/include.am
index ca33c8b1e..92307a2b8 100644
--- a/wolfssl/wolfcrypt/include.am
+++ b/wolfssl/wolfcrypt/include.am
@@ -69,6 +69,16 @@ noinst_HEADERS+= \
                          wolfssl/wolfcrypt/port/nxp/ksdk_port.h \
                          wolfssl/wolfcrypt/port/atmel/atmel.h
 
-if BUILD_CAVIUM
-noinst_HEADERS+=         wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
+if BUILD_ASYNCCRYPT
+nobase_include_HEADERS+= wolfssl/wolfcrypt/async.h
 endif
+
+if BUILD_CAVIUM
+nobase_include_HEADERS+= wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
+endif
+
+if BUILD_INTEL_QA
+nobase_include_HEADERS+= wolfssl/wolfcrypt/port/intel/quickassist.h
+nobase_include_HEADERS+= wolfssl/wolfcrypt/port/intel/quickassist_mem.h
+endif
+
diff --git a/wolfssl/wolfcrypt/integer.h b/wolfssl/wolfcrypt/integer.h
index 543a832bc..9c7bc01b0 100644
--- a/wolfssl/wolfcrypt/integer.h
+++ b/wolfssl/wolfcrypt/integer.h
@@ -45,6 +45,10 @@
 
 #include 
 
+/* wolf big int and common functions */
+#include 
+
+
 #ifdef WOLFSSL_PUBLIC_MP
     #define MP_API   WOLFSSL_API
 #else
@@ -184,14 +188,20 @@ typedef int           mp_err;
    BITS_PER_DIGIT*2) */
 #define MP_WARRAY  (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
 
-/* the infamous mp_int structure */
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT;
+#endif
+
+/* the mp_int structure */
 typedef struct mp_int {
     int used, alloc, sign;
     mp_digit *dp;
-#ifdef WOLFSSL_ASYNC_CRYPT
-    byte* dpraw; /* Used for hardware crypto */
+
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT raw; /* unsigned binary (big endian) */
 #endif
 } mp_int;
+#define MP_INT_DEFINED
 
 /* callback for mp_prime_random, should fill dst with random bytes and return
    how many read [up to len] */
@@ -242,6 +252,7 @@ extern const char *mp_s_rmap;
 /* 6 functions needed by Rsa */
 MP_API int  mp_init (mp_int * a);
 MP_API void mp_clear (mp_int * a);
+MP_API void mp_free (mp_int * a);
 MP_API void mp_forcezero(mp_int * a);
 MP_API int  mp_unsigned_bin_size(mp_int * a);
 MP_API int  mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c);
diff --git a/wolfssl/wolfcrypt/md5.h b/wolfssl/wolfcrypt/md5.h
index b3d1c43af..27c690e4f 100644
--- a/wolfssl/wolfcrypt/md5.h
+++ b/wolfssl/wolfcrypt/md5.h
@@ -50,10 +50,15 @@ enum {
 };
 
 #if defined(WOLFSSL_PIC32MZ_HASH)
-#include "port/pic32/pic32mz-crypt.h"
+    #include "port/pic32/pic32mz-crypt.h"
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
 #endif
 
-#ifndef WOLFSSL_TI_HASH
+#ifdef WOLFSSL_TI_HASH
+    #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
+#else
 
 /* MD5 digest */
 typedef struct Md5 {
@@ -61,24 +66,29 @@ typedef struct Md5 {
     word32  loLen;     /* length in bytes   */
     word32  hiLen;     /* length in bytes   */
     word32  buffer[MD5_BLOCK_SIZE  / sizeof(word32)];
-    #if !defined(WOLFSSL_PIC32MZ_HASH)
+#if !defined(WOLFSSL_PIC32MZ_HASH)
     word32  digest[MD5_DIGEST_SIZE / sizeof(word32)];
-    #else
+#else
     word32  digest[PIC32_HASH_SIZE / sizeof(word32)];
-    pic32mz_desc desc ; /* Crypt Engine descriptor */
-    #endif
+    pic32mz_desc desc; /* Crypt Engine descriptor */
+#endif
+    void*   heap;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } Md5;
 
-#else /* WOLFSSL_TI_HASH */
-    #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
-#endif
-
+#endif /* WOLFSSL_TI_HASH */
 
 WOLFSSL_API int wc_InitMd5(Md5*);
+WOLFSSL_API int wc_InitMd5_ex(Md5*, void*, int);
 WOLFSSL_API int wc_Md5Update(Md5*, const byte*, word32);
 WOLFSSL_API int wc_Md5Final(Md5*, byte*);
+WOLFSSL_API void wc_Md5Free(Md5*);
+
+WOLFSSL_API int  wc_Md5GetHash(Md5*, byte*);
+WOLFSSL_API int  wc_Md5Copy(Md5*, Md5*);
 
-WOLFSSL_API int  wc_Md5Hash(const byte*, word32, byte*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/mem_track.h b/wolfssl/wolfcrypt/mem_track.h
index f24325eaf..ee916d8bc 100644
--- a/wolfssl/wolfcrypt/mem_track.h
+++ b/wolfssl/wolfcrypt/mem_track.h
@@ -34,7 +34,7 @@
  *
  * On startup call:
  * InitMemoryTracker();
- * 
+ *
  * When ready to dump the memory report call:
  * ShowMemoryTracker();
  *
@@ -196,6 +196,7 @@
         return ret;
     }
 
+#ifdef WOLFSSL_TRACK_MEMORY
     STATIC INLINE int InitMemoryTracker(void)
     {
         int ret = wolfSSL_SetAllocators(TrackMalloc, TrackFree, TrackRealloc);
@@ -230,6 +231,7 @@
                                        (unsigned long)ourMemStats.currentBytes);
     #endif
     }
+#endif
 
 #endif /* USE_WOLFSSL_MEMORY */
 
diff --git a/wolfssl/wolfcrypt/memory.h b/wolfssl/wolfcrypt/memory.h
index 96dce8bdd..9ecd9cd92 100644
--- a/wolfssl/wolfcrypt/memory.h
+++ b/wolfssl/wolfcrypt/memory.h
@@ -70,10 +70,14 @@
     #endif /* WOLFSSL_DEBUG_MEMORY */
 #endif /* WOLFSSL_STATIC_MEMORY */
 
-/* Public set function */
-WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb  malloc_function,
-                                    wolfSSL_Free_cb    free_function,
-                                    wolfSSL_Realloc_cb realloc_function);
+/* Public get/set functions */
+WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb,
+                                      wolfSSL_Free_cb,
+                                      wolfSSL_Realloc_cb);
+
+WOLFSSL_API int wolfSSL_GetAllocators(wolfSSL_Malloc_cb*,
+                                      wolfSSL_Free_cb*,
+                                      wolfSSL_Realloc_cb*);
 
 #ifdef WOLFSSL_STATIC_MEMORY
     #define WOLFSSL_STATIC_TIMEOUT 1
@@ -95,7 +99,7 @@ WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb  malloc_function,
         #endif
     #endif
     #ifndef WOLFMEM_DIST
-        #define WOLFMEM_DIST    8,4,4,12,4,5,2,1,1
+        #define WOLFMEM_DIST    8,4,4,12,4,5,8,1,1
     #endif
 
     /* flags for loading static memory (one hot bit) */
diff --git a/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h b/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
deleted file mode 100644
index aed338f40..000000000
--- a/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* cavium-nitrox.h
- *
- * Copyright (C) 2006-2016 wolfSSL Inc.
- *
- * This file is part of wolfSSL. (formerly known as CyaSSL)
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#ifndef _CAVIUM_NITROX_H_
-#define _CAVIUM_NITROX_H_
-
-#ifdef HAVE_CAVIUM
-
-#include 
-
-#ifndef HAVE_CAVIUM_V
-    #include "cavium_sysdep.h"
-#endif
-#include "cavium_common.h"
-#ifndef HAVE_CAVIUM_V
-    #include "cavium_ioctl.h"
-#else
-    #include "cavium_sym_crypto.h"
-    #include "cavium_asym_crypto.h"
-#endif
-#include 
-
-#define CAVIUM_SSL_GRP      0
-#define CAVIUM_DPORT        256
-
-/* Compatibility with older Cavium SDK's */
-#ifndef HAVE_CAVIUM_V
-    typedef int CspHandle;
-    typedef word32 CavReqId;
-
-    #define AES_128 AES_128_BIT
-    #define AES_192 AES_192_BIT
-    #define AES_256 AES_256_BIT
-#else
-    #define CAVIUM_DEV_ID       0
-    #define CAVIUM_BLOCKING     BLOCKING
-    #define CAVIUM_NON_BLOCKING NON_BLOCKING
-    #define CAVIUM_DIRECT       DMA_DIRECT_DIRECT
-    typedef Uint64 CavReqId;
-#endif
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #define CAVIUM_REQ_MODE CAVIUM_NON_BLOCKING
-#else
-    #define CAVIUM_REQ_MODE CAVIUM_BLOCKING
-#endif
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #define CAVIUM_MAX_PENDING  90
-    #define CAVIUM_MAX_POLL     MAX_TO_POLL
-#endif
-
-
-typedef struct CaviumNitroxDev {
-    CspHandle   devId;                      /* nitrox device id */
-    ContextType type;                       /* Typically CONTEXT_SSL, but also ECC types */
-    Uint64      contextHandle;              /* nitrox context memory handle */
-    CavReqId    reqId;                      /* Current requestId */
-} CaviumNitroxDev;
-
-struct WOLF_EVENT;
-
-
-/* Wrapper API's */
-WOLFSSL_LOCAL int NitroxTranslateResponseCode(int ret);
-WOLFSSL_LOCAL CspHandle NitroxGetDeviceHandle(void);
-WOLFSSL_LOCAL CspHandle NitroxOpenDevice(int dma_mode, int dev_id);
-WOLFSSL_LOCAL int NitroxAllocContext(CaviumNitroxDev* nitrox, CspHandle devId,
-    ContextType type);
-WOLFSSL_LOCAL void NitroxFreeContext(CaviumNitroxDev* nitrox);
-WOLFSSL_LOCAL void NitroxCloseDevice(CspHandle devId);
-
-#if defined(WOLFSSL_ASYNC_CRYPT)
-WOLFSSL_LOCAL int NitroxCheckRequest(CspHandle devId, CavReqId reqId);
-WOLFSSL_LOCAL int NitroxCheckRequests(CspHandle devId,
-    CspMultiRequestStatusBuffer* req_stat_buf);
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
-
-/* Crypto wrappers */
-#ifndef NO_RSA
-    struct RsaKey;
-    WOLFSSL_LOCAL int NitroxRsaExptMod(
-                            const byte* in, word32 inLen,
-                            byte* exponent, word32 expLen,
-                            byte* modulus, word32 modLen,
-                            byte* out, word32* outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaPublicEncrypt(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaPrivateDecrypt(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaSSL_Sign(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaSSL_Verify(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-#endif /* !NO_RSA */
-
-#ifndef NO_AES
-    struct Aes;
-    WOLFSSL_LOCAL int NitroxAesSetKey(struct Aes* aes, const byte* key,
-                                                word32 length, const byte* iv);
-    #ifdef HAVE_AES_CBC
-        WOLFSSL_LOCAL int NitroxAesCbcEncrypt(struct Aes* aes, byte* out,
-                                                const byte* in, word32 length);
-    #ifdef HAVE_AES_DECRYPT
-        WOLFSSL_LOCAL int NitroxAesCbcDecrypt(struct Aes* aes, byte* out,
-                                                const byte* in, word32 length);
-    #endif /* HAVE_AES_DECRYPT */
-    #endif /* HAVE_AES_CBC */
-#endif /* !NO_AES */
-
-#ifndef NO_RC4
-    struct Arc4;
-    WOLFSSL_LOCAL void NitroxArc4SetKey(struct Arc4* arc4, const byte* key,
-                                                                word32 length);
-    WOLFSSL_LOCAL void NitroxArc4Process(struct Arc4* arc4, byte* out,
-                                                const byte* in, word32 length);
-#endif /* !NO_RC4 */
-
-#ifndef NO_DES3
-    struct Des3;
-    WOLFSSL_LOCAL int NitroxDes3SetKey(struct Des3* des3, const byte* key,
-                                                               const byte* iv);
-    WOLFSSL_LOCAL int NitroxDes3CbcEncrypt(struct Des3* des3, byte* out,
-                                                const byte* in, word32 length);
-    WOLFSSL_LOCAL int NitroxDes3CbcDecrypt(struct Des3* des3, byte* out,
-                                                const byte* in, word32 length);
-#endif /* !NO_DES3 */
-
-#ifndef NO_HMAC
-    struct Hmac;
-    WOLFSSL_LOCAL int NitroxHmacFinal(struct Hmac* hmac, byte* hash);
-    WOLFSSL_LOCAL int NitroxHmacUpdate(struct Hmac* hmac, const byte* msg,
-                                                                word32 length);
-    WOLFSSL_LOCAL int NitroxHmacSetKey(struct Hmac* hmac, int type,
-                                               const byte* key, word32 length);
-#endif /* NO_HMAC */
-
-#if !defined(HAVE_HASHDRBG) && !defined(NO_RC4)
-    WOLFSSL_API void NitroxRngGenerateBlock(WC_RNG* rng, byte* output, word32 sz);
-#endif
-
-
-#endif /* HAVE_CAVIUM */
-
-#endif /* _CAVIUM_NITROX_H_ */
diff --git a/wolfssl/wolfcrypt/port/ti/ti-hash.h b/wolfssl/wolfcrypt/port/ti/ti-hash.h
index c63a2ce20..93311a4e2 100644
--- a/wolfssl/wolfcrypt/port/ti/ti-hash.h
+++ b/wolfssl/wolfcrypt/port/ti/ti-hash.h
@@ -19,47 +19,43 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
- 
+
 #ifndef WOLF_CRYPT_TI_HASH_H
 #define WOLF_CRYPT_TI_HASH_H
 
 #include 
 
 #ifndef WOLFSSL_TI_INITBUFF
-#define WOLFSSL_TI_INITBUFF 64
+    #define WOLFSSL_TI_INITBUFF    64
 #endif
 
-#define WOLFSSL_MAX_HASH_SIZE  64
+#ifndef WOLFSSL_MAX_HASH_SIZE
+    #define WOLFSSL_MAX_HASH_SIZE  64
+#endif
 
 typedef struct {
-    byte   *msg ;
-    word32 used ;
-    word32 len ;
-    byte hash[WOLFSSL_MAX_HASH_SIZE] ;
-} wolfssl_TI_Hash ;
+    byte   *msg;
+    word32 used;
+    word32 len;
+    byte hash[WOLFSSL_MAX_HASH_SIZE];
+} wolfssl_TI_Hash;
 
 
 #ifndef TI_HASH_TEST
-#if !defined(NO_MD5)
-typedef wolfssl_TI_Hash Md5 ;
 
+#if !defined(NO_MD5)
+    typedef wolfssl_TI_Hash Md5;
 #endif
 #if !defined(NO_SHA)
-typedef wolfssl_TI_Hash Sha ;
+    typedef wolfssl_TI_Hash Sha;
 #endif
 #if !defined(NO_SHA256)
-typedef wolfssl_TI_Hash Sha256 ;
+    typedef wolfssl_TI_Hash Sha256;
+#endif
+#if defined(WOLFSSL_SHA224)
+    typedef wolfssl_TI_Hash Sha224;
 #endif
 
-#if defined(HAVE_SHA224)
-typedef wolfssl_TI_Hash Sha224 ;
-#define SHA224_DIGEST_SIZE  28
+#endif /* !TI_HASH_TEST */
 
-WOLFSSL_API int wc_InitSha224(Sha224* sha224) ;
-WOLFSSL_API int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len) ;
-WOLFSSL_API int wc_Sha224Final(Sha224* sha224, byte* hash) ;
-WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash) ;
-
-#endif
-#endif
 #endif /* WOLF_CRYPT_TI_HASH_H  */
diff --git a/wolfssl/wolfcrypt/random.h b/wolfssl/wolfcrypt/random.h
index 1669a6e26..6a6f104e5 100644
--- a/wolfssl/wolfcrypt/random.h
+++ b/wolfssl/wolfcrypt/random.h
@@ -35,44 +35,71 @@
     extern "C" {
 #endif
 
-/* Maximum generate block length */
-#define RNG_MAX_BLOCK_LEN (0x10000)
+ /* Maximum generate block length */
+#ifndef RNG_MAX_BLOCK_LEN
+    #define RNG_MAX_BLOCK_LEN (0x10000)
+#endif
+
+/* Size of the BRBG seed */
+#ifndef DRBG_SEED_LEN
+    #define DRBG_SEED_LEN (440/8)
+#endif
+
+
+#if defined(CUSTOM_RAND_GENERATE) && !defined(CUSTOM_RAND_TYPE)
+    /* To maintain compatibility the default is byte */
+    #define CUSTOM_RAND_TYPE    byte
+#endif
+
+/* make sure Hash DRBG is enabled, unless WC_NO_HASHDRBG is defined
+    or CUSTOM_RAND_GENERATE_BLOCK is defined*/
+#if !defined(WC_NO_HASHDRBG) || !defined(CUSTOM_RAND_GENERATE_BLOCK)
+    #undef  HAVE_HASHDRBG
+    #define HAVE_HASHDRBG
+#endif
+
 
 #ifndef HAVE_FIPS /* avoid redefining structs and macros */
 
-#if defined(WOLFSSL_FORCE_RC4_DRBG) && defined(NO_RC4)
-    #error Cannot have WOLFSSL_FORCE_RC4_DRBG and NO_RC4 defined.
-#endif /* WOLFSSL_FORCE_RC4_DRBG && NO_RC4 */
-
-
 /* RNG supports the following sources (in order):
  * 1. CUSTOM_RAND_GENERATE_BLOCK: Defines name of function as RNG source and
- *     bypasses the P-RNG.
- * 2. HAVE_HASHDRBG && !NO_SHA256 (SHA256 enabled): Uses SHA256 based P-RNG
+ *     bypasses the options below.
+ * 2. HAVE_INTEL_RDRAND: Uses the Intel RDRAND if supported by CPU.
+ * 3. HAVE_HASHDRBG (requires SHA256 enabled): Uses SHA256 based P-RNG
  *     seeded via wc_GenerateSeed. This is the default source.
- * 3. !NO_RC4 (RC4 enabled): Uses RC4
  */
 
+ /* Seed source can be overriden by defining one of these:
+      CUSTOM_RAND_GENERATE_SEED
+      CUSTOM_RAND_GENERATE_SEED_OS
+      CUSTOM_RAND_GENERATE */
+
+
 #if defined(CUSTOM_RAND_GENERATE_BLOCK)
     /* To use define the following:
      * #define CUSTOM_RAND_GENERATE_BLOCK myRngFunc
      * extern int myRngFunc(byte* output, word32 sz);
      */
-#elif (defined(HAVE_HASHDRBG) || defined(NO_RC4))
+#elif defined(HAVE_HASHDRBG)
     #ifdef NO_SHA256
         #error "Hash DRBG requires SHA-256."
     #endif /* NO_SHA256 */
-
     #include 
+#elif defined(HAVE_WNR)
+     /* allow whitewood as direct RNG source using wc_GenerateSeed directly */
 #else
-    #include 
+    #error No RNG source defined!
 #endif
 
-
 #ifdef HAVE_WNR
     #include 
 #endif
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
+
 #if defined(USE_WINDOWS_API)
     #if defined(_WIN64)
         typedef unsigned __int64 ProviderHandle;
@@ -98,46 +125,26 @@ typedef struct OS_Seed {
     #define WC_RNG_TYPE_DEFINED
 #endif
 
-#if (defined(HAVE_HASHDRBG) || defined(NO_RC4)) && !defined(CUSTOM_RAND_GENERATE_BLOCK)
+#ifdef HAVE_HASHDRBG
+    /* Private DRBG state */
+    struct DRBG;
+#endif
 
-#define DRBG_SEED_LEN (440/8)
-
-
-struct DRBG; /* Private DRBG state */
-
-
-/* Hash-based Deterministic Random Bit Generator */
+/* RNG context */
 struct WC_RNG {
-    struct DRBG* drbg;
     OS_Seed seed;
     void* heap;
+#ifdef HAVE_HASHDRBG
+    /* Hash-based Deterministic Random Bit Generator */
+    struct DRBG* drbg;
     byte status;
-};
-
-
-
-#else /* (HAVE_HASHDRBG || NO_RC4) && !CUSTOM_RAND_GENERATE_BLOCK */
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #include 
-#endif
-
-/* secure Random Number Generator */
-
-
-struct WC_RNG {
-    OS_Seed seed;
-#ifndef NO_RC4
-    Arc4    cipher;
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
+    int devId;
 #endif
 };
 
-
-
-#endif /* (HAVE_HASHDRBG || NO_RC4) && !CUSTOM_RAND_GENERATE_BLOCK */
 #endif /* HAVE_FIPS */
 
 /* NO_OLD_RNGNAME removes RNG struct name to prevent possible type conflicts,
@@ -146,6 +153,7 @@ struct WC_RNG {
     #define RNG WC_RNG
 #endif
 
+
 WOLFSSL_LOCAL
 int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz);
 
@@ -158,18 +166,18 @@ int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz);
 
 
 WOLFSSL_API int  wc_InitRng(WC_RNG*);
-WOLFSSL_API int  wc_InitRng_ex(WC_RNG* rng, void* heap);
+WOLFSSL_API int  wc_InitRng_ex(WC_RNG* rng, void* heap, int devId);
 WOLFSSL_API int  wc_RNG_GenerateBlock(WC_RNG*, byte*, word32 sz);
 WOLFSSL_API int  wc_RNG_GenerateByte(WC_RNG*, byte*);
 WOLFSSL_API int  wc_FreeRng(WC_RNG*);
 
 
-#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
+#ifdef HAVE_HASHDRBG
     WOLFSSL_API int wc_RNG_HealthTest(int reseed,
                                         const byte* entropyA, word32 entropyASz,
                                         const byte* entropyB, word32 entropyBSz,
                                         byte* output, word32 outputSz);
-#endif /* HAVE_HASHDRBG || NO_RC4 */
+#endif /* HAVE_HASHDRBG */
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/rsa.h b/wolfssl/wolfcrypt/rsa.h
index d7f5ccaf9..66c46d109 100644
--- a/wolfssl/wolfcrypt/rsa.h
+++ b/wolfssl/wolfcrypt/rsa.h
@@ -55,6 +55,9 @@
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
+    #ifdef WOLFSSL_CERT_GEN
+        #include 
+    #endif
 #endif
 
 enum {
@@ -80,18 +83,21 @@ enum {
 /* RSA */
 typedef struct RsaKey {
     mp_int n, e, d, p, q, dP, dQ, u;
-    int   type;                               /* public or private */
     void* heap;                               /* for user memory overrides */
+    byte* data;                               /* temp buffer for async RSA */
+    int   type;                               /* public or private */
     int   state;
-    byte*  tmp;                               /* temp buffer for async RSA */
-    word32 tmpLen;
-    byte   tmpIsAlloc;
+    word32 dataLen;
 #ifdef WC_RSA_BLINDING
     WC_RNG* rng;                              /* for PrivateDecrypt blinding */
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
+    #ifdef WOLFSSL_CERT_GEN
+        CertSignCtx certSignCtx; /* context info for cert sign (MakeSignature) */
+    #endif
 #endif /* WOLFSSL_ASYNC_CRYPT */
+    byte   dataIsAlloc;
 } RsaKey;
 #endif /*HAVE_FIPS */
 
@@ -163,11 +169,6 @@ WOLFSSL_API int  wc_RsaFlattenPublicKey(RsaKey*, byte*, word32*, byte*,
     WOLFSSL_API int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng);
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_RsaAsyncHandle(RsaKey* key, WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
-    WOLFSSL_API int  wc_RsaAsyncWait(int ret, RsaKey* key);
-#endif
-
 #endif /* HAVE_USER_RSA */
 
 #ifdef __cplusplus
diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h
index 9e16be339..4fc29414a 100644
--- a/wolfssl/wolfcrypt/settings.h
+++ b/wolfssl/wolfcrypt/settings.h
@@ -1233,6 +1233,12 @@ extern void uITRON4_free(void *p) ;
     #endif
 #endif
 
+/* write dup cannot be used with secure renegotiation because write dup
+ * make write side write only and read side read only */
+#if defined(HAVE_WRITE_DUP) && defined(HAVE_SECURE_RENEGOTIATION)
+    #error "WRITE DUP and SECURE RENEGOTIATION cannot both be on"
+#endif
+
 #ifdef WOLFSSL_SGX
     #define WOLFCRYPT_ONLY   /* limitation until IO resolved */
     #define SINGLE_THREADED
@@ -1405,12 +1411,6 @@ extern void uITRON4_free(void *p) ;
     #define WOLFSSL_MIN_AUTH_TAG_SZ 12
 #endif
 
-/* If not forcing ARC4 as the DRBG or using custom RNG block gen, enable Hash_DRBG */
-#undef HAVE_HASHDRBG
-#if !defined(WOLFSSL_FORCE_RC4_DRBG) && !defined(CUSTOM_RAND_GENERATE_BLOCK)
-    #define HAVE_HASHDRBG
-#endif
-
 
 /* sniffer requires:
  * static RSA cipher suites
@@ -1455,11 +1455,25 @@ extern void uITRON4_free(void *p) ;
     #undef HAVE_WOLF_EVENT
     #define HAVE_WOLF_EVENT
 
+    #ifdef WOLFSSL_ASYNC_CRYPT_TEST
+        #define WC_ASYNC_DEV_SIZE 320+24
+    #else
+        #define WC_ASYNC_DEV_SIZE 320
+    #endif
+
     #if !defined(HAVE_CAVIUM) && !defined(HAVE_INTEL_QA) && \
         !defined(WOLFSSL_ASYNC_CRYPT_TEST)
         #error No async hardware defined with WOLFSSL_ASYNC_CRYPT!
     #endif
+
+    /* Enable ECC_CACHE_CURVE for ASYNC */
+    #if !defined(ECC_CACHE_CURVE)
+        #define ECC_CACHE_CURVE
+    #endif
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#ifndef WC_ASYNC_DEV_SIZE
+    #define WC_ASYNC_DEV_SIZE 0
+#endif
 
 /* leantls checks */
 #ifdef WOLFSSL_LEANTLS
diff --git a/wolfssl/wolfcrypt/sha.h b/wolfssl/wolfcrypt/sha.h
index 6dbd91b87..db72e9118 100644
--- a/wolfssl/wolfcrypt/sha.h
+++ b/wolfssl/wolfcrypt/sha.h
@@ -34,13 +34,21 @@
 
 #ifdef FREESCALE_LTC_SHA
     #include "fsl_ltc.h"
-#endif 
+#endif
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
 #ifndef HAVE_FIPS /* avoid redefining structs */
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+    #include "port/pic32/pic32mz-crypt.h"
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
 /* in bytes */
 enum {
 #if defined(STM32F2_HASH) || defined(STM32F4_HASH)
@@ -52,12 +60,8 @@ enum {
     SHA_PAD_SIZE     = 56
 };
 
-#ifdef WOLFSSL_PIC32MZ_HASH
-#include "port/pic32/pic32mz-crypt.h"
-#endif
 
 #ifndef WOLFSSL_TI_HASH
-      
 /* Sha digest */
 typedef struct Sha {
     #ifdef FREESCALE_LTC_SHA
@@ -67,24 +71,36 @@ typedef struct Sha {
         word32  loLen;     /* length in bytes   */
         word32  hiLen;     /* length in bytes   */
         word32  buffer[SHA_BLOCK_SIZE  / sizeof(word32)];
-        #ifndef WOLFSSL_PIC32MZ_HASH
-            word32  digest[SHA_DIGEST_SIZE / sizeof(word32)];
-        #else
-            word32  digest[PIC32_HASH_SIZE / sizeof(word32)];
-            pic32mz_desc desc; /* Crypt Engine descriptor */
-        #endif
-    #endif /* FREESCALE_LTC_SHA */
+    #ifndef WOLFSSL_PIC32MZ_HASH
+        word32  digest[SHA_DIGEST_SIZE / sizeof(word32)];
+    #else
+        word32  digest[PIC32_HASH_SIZE / sizeof(word32)];
+    #endif
+        void*   heap;
+    #ifdef WOLFSSL_PIC32MZ_HASH
+        pic32mz_desc desc; /* Crypt Engine descriptor */
+    #endif
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        WC_ASYNC_DEV asyncDev;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* FREESCALE_LTC_SHA */
 } Sha;
 
-#else /* WOLFSSL_TI_HASH */
+#else
     #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
-#endif
+#endif /* WOLFSSL_TI_HASH */
+
 
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha(Sha*);
+WOLFSSL_API int wc_InitSha_ex(Sha* sha, void* heap, int devId);
 WOLFSSL_API int wc_ShaUpdate(Sha*, const byte*, word32);
 WOLFSSL_API int wc_ShaFinal(Sha*, byte*);
+WOLFSSL_API void wc_ShaFree(Sha*);
+
+WOLFSSL_API int wc_ShaGetHash(Sha*, byte*);
+WOLFSSL_API int wc_ShaCopy(Sha*, Sha*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/sha256.h b/wolfssl/wolfcrypt/sha256.h
index 997b0c1e1..4d8ef1f64 100644
--- a/wolfssl/wolfcrypt/sha256.h
+++ b/wolfssl/wolfcrypt/sha256.h
@@ -44,9 +44,13 @@
 #endif
 
 #ifndef HAVE_FIPS /* avoid redefinition of structs */
+
 #ifdef WOLFSSL_PIC32MZ_HASH
     #include "port/pic32/pic32mz-crypt.h"
 #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
 
 /* in bytes */
 enum {
@@ -69,9 +73,13 @@ typedef struct Sha256 {
     word32  buffLen;   /* in bytes          */
     word32  loLen;     /* length in bytes   */
     word32  hiLen;     /* length in bytes   */
-    #ifdef WOLFSSL_PIC32MZ_HASH
-        pic32mz_desc desc ; /* Crypt Engine descriptor */
-    #endif
+    void*   heap;
+#ifdef WOLFSSL_PIC32MZ_HASH
+    pic32mz_desc desc; /* Crypt Engine descriptor */
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 #endif /* FREESCALE_LTC_SHA */
 } Sha256;
 
@@ -82,8 +90,13 @@ typedef struct Sha256 {
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha256(Sha256*);
+WOLFSSL_API int wc_InitSha256_ex(Sha256*, void*, int);
 WOLFSSL_API int wc_Sha256Update(Sha256*, const byte*, word32);
 WOLFSSL_API int wc_Sha256Final(Sha256*, byte*);
+WOLFSSL_API void wc_Sha256Free(Sha256*);
+
+WOLFSSL_API int wc_Sha256GetHash(Sha256*, byte*);
+WOLFSSL_API int wc_Sha256Copy(Sha256* src, Sha256* dst);
 
 #ifdef WOLFSSL_SHA224
 
@@ -100,8 +113,13 @@ typedef Sha256 Sha224;
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha224(Sha224*);
+WOLFSSL_API int wc_InitSha224_ex(Sha224*, void*, int);
 WOLFSSL_API int wc_Sha224Update(Sha224*, const byte*, word32);
 WOLFSSL_API int wc_Sha224Final(Sha224*, byte*);
+WOLFSSL_API void wc_Sha224Free(Sha224*);
+
+WOLFSSL_API int wc_Sha224GetHash(Sha224*, byte*);
+WOLFSSL_API int wc_Sha224Copy(Sha224* src, Sha224* dst);
 
 #endif /* WOLFSSL_SHA224 */
 
diff --git a/wolfssl/wolfcrypt/sha512.h b/wolfssl/wolfcrypt/sha512.h
index 2f53772e9..7fea27e6e 100644
--- a/wolfssl/wolfcrypt/sha512.h
+++ b/wolfssl/wolfcrypt/sha512.h
@@ -42,6 +42,10 @@
 
 #ifndef HAVE_FIPS /* avoid redefinition of structs */
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
 /* in bytes */
 enum {
     SHA512              =   4,   /* hash type unique */
@@ -58,13 +62,22 @@ typedef struct Sha512 {
     word64  hiLen;     /* length in bytes   */
     word64  digest[SHA512_DIGEST_SIZE / sizeof(word64)];
     word64  buffer[SHA512_BLOCK_SIZE  / sizeof(word64)];
+    void*   heap;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } Sha512;
 
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha512(Sha512*);
+WOLFSSL_API int wc_InitSha512_ex(Sha512*, void*, int);
 WOLFSSL_API int wc_Sha512Update(Sha512*, const byte*, word32);
 WOLFSSL_API int wc_Sha512Final(Sha512*, byte*);
+WOLFSSL_API void wc_Sha512Free(Sha512*);
+
+WOLFSSL_API int wc_Sha512GetHash(Sha512*, byte*);
+WOLFSSL_API int wc_Sha512Copy(Sha512* src, Sha512* dst);
 
 #if defined(WOLFSSL_SHA384)
 
@@ -81,8 +94,13 @@ typedef Sha512 Sha384;
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha384(Sha384*);
+WOLFSSL_API int wc_InitSha384_ex(Sha384*, void*, int);
 WOLFSSL_API int wc_Sha384Update(Sha384*, const byte*, word32);
 WOLFSSL_API int wc_Sha384Final(Sha384*, byte*);
+WOLFSSL_API void wc_Sha384Free(Sha384*);
+
+WOLFSSL_API int wc_Sha384GetHash(Sha384*, byte*);
+WOLFSSL_API int wc_Sha384Copy(Sha384* src, Sha384* dst);
 
 #endif /* WOLFSSL_SHA384 */
 
diff --git a/wolfssl/wolfcrypt/tfm.h b/wolfssl/wolfcrypt/tfm.h
index 2b9faca38..bc989a159 100644
--- a/wolfssl/wolfcrypt/tfm.h
+++ b/wolfssl/wolfcrypt/tfm.h
@@ -43,6 +43,9 @@
 
 #include 
 
+/* wolf big int and common functions */
+#include 
+
 #ifdef __cplusplus
     extern "C" {
 #endif
@@ -288,16 +291,21 @@
 #define FP_YES        1   /* yes response */
 #define FP_NO         0   /* no response */
 
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT;
+#endif
+
 /* a FP type */
 typedef struct fp_int {
     int      used;
     int      sign;
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     int      size;
 #endif
     fp_digit dp[FP_SIZE];
-#ifdef WOLFSSL_ASYNC_CRYPT
-    byte *dpraw; /* Used for hardware crypto */
+
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT raw; /* unsigned binary (big endian) */
 #endif
 } fp_int;
 
@@ -380,6 +388,8 @@ typedef struct fp_int {
 void fp_init(fp_int *a);
 MP_API void fp_zero(fp_int *a);
 MP_API void fp_clear(fp_int *a); /* uses ForceZero to clear sensitive memory */
+MP_API void fp_forcezero (fp_int * a);
+MP_API void fp_free(fp_int* a);
 
 /* zero/even/odd ? */
 #define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
@@ -605,6 +615,7 @@ void fp_sqr_comba64(fp_int *a, fp_int *b);
 typedef fp_digit mp_digit;
 typedef fp_word  mp_word;
 typedef fp_int mp_int;
+#define MP_INT_DEFINED
 
 /* Constants */
 #define MP_LT   FP_LT   /* less than    */
@@ -627,8 +638,9 @@ typedef fp_int mp_int;
 #define mp_isneg(a)   fp_isneg(a)
 MP_API int  mp_init (mp_int * a);
 MP_API void mp_clear (mp_int * a);
-#define mp_forcezero(a) fp_clear(a)
-MP_API int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
+MP_API void mp_free (mp_int * a);
+MP_API void mp_forcezero (mp_int * a);
+MP_API int  mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
                          mp_int* f);
 
 MP_API int  mp_add (mp_int * a, mp_int * b, mp_int * c);
diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h
old mode 100644
new mode 100755
index e86578b87..ece3517b2
--- a/wolfssl/wolfcrypt/types.h
+++ b/wolfssl/wolfcrypt/types.h
@@ -140,7 +140,7 @@
     #elif defined(__MWERKS__) && TARGET_CPU_PPC
         #define PPC_INTRINSICS
         #define FAST_ROTATE
-    #elif defined(__GNUC__) && defined(__i386__)
+    #elif defined(__GNUC__)  && (defined(__i386__) || defined(__x86_64__))
         /* GCC does peephole optimizations which should result in using rotate
            instructions  */
         #define FAST_ROTATE
@@ -178,7 +178,19 @@
 		WOLFSSL_API void* XMALLOC(size_t n, void* heap, int type);
 		WOLFSSL_API void* XREALLOC(void *p, size_t n, void* heap, int type);
 		WOLFSSL_API void XFREE(void *p, void* heap, int type);
-	#elif defined(XMALLOC_USER)
+	#elif defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_INTEL_QA)
+        #include 
+        #undef USE_WOLFSSL_MEMORY
+        #ifdef WOLFSSL_DEBUG_MEMORY
+            #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t), __func__, __LINE__)
+            #define XFREE(p, h, t)       IntelQaFree((p), (h), (t), __func__, __LINE__)
+            #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t), __func__, __LINE__)
+        #else
+            #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t))
+            #define XFREE(p, h, t)       IntelQaFree((p), (h), (t))
+            #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t))
+        #endif /* WOLFSSL_DEBUG_MEMORY */
+    #elif defined(XMALLOC_USER)
 	    /* prototypes for user heap override functions */
 	    #include   /* for size_t */
 	    extern void *XMALLOC(size_t n, void* heap, int type);
@@ -222,6 +234,41 @@
         #endif /* WOLFSSL_STATIC_MEMORY */
 	#endif
 
+    /* declare/free variable handling for async */
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        #define DECLARE_VAR(VAR_NAME, VAR_TYPE, VAR_SIZE, HEAP) \
+            VAR_TYPE* VAR_NAME = (VAR_TYPE*)XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT);
+        #define DECLARE_VAR_INIT(VAR_NAME, VAR_TYPE, VAR_SIZE, INIT_VALUE, HEAP) \
+            VAR_TYPE* VAR_NAME = ({ \
+                VAR_TYPE* ptr = XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT); \
+                if (ptr && INIT_VALUE) { \
+                    XMEMCPY(ptr, INIT_VALUE, sizeof(VAR_TYPE) * VAR_SIZE); \
+                } \
+                ptr; \
+            })
+        #define DECLARE_ARRAY(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \
+            VAR_TYPE* VAR_NAME[VAR_ITEMS]; \
+            int idx##VAR_NAME; \
+            for (idx##VAR_NAME=0; idx##VAR_NAME
@@ -284,68 +331,74 @@
 
 	/* memory allocation types for user hints */
 	enum {
-	    DYNAMIC_TYPE_CA           = 1,
-	    DYNAMIC_TYPE_CERT         = 2,
-	    DYNAMIC_TYPE_KEY          = 3,
-	    DYNAMIC_TYPE_FILE         = 4,
-	    DYNAMIC_TYPE_SUBJECT_CN   = 5,
-	    DYNAMIC_TYPE_PUBLIC_KEY   = 6,
-	    DYNAMIC_TYPE_SIGNER       = 7,
-	    DYNAMIC_TYPE_NONE         = 8,
-	    DYNAMIC_TYPE_BIGINT       = 9,
-	    DYNAMIC_TYPE_RSA          = 10,
-	    DYNAMIC_TYPE_METHOD       = 11,
-	    DYNAMIC_TYPE_OUT_BUFFER   = 12,
-	    DYNAMIC_TYPE_IN_BUFFER    = 13,
-	    DYNAMIC_TYPE_INFO         = 14,
-	    DYNAMIC_TYPE_DH           = 15,
-	    DYNAMIC_TYPE_DOMAIN       = 16,
-	    DYNAMIC_TYPE_SSL          = 17,
-	    DYNAMIC_TYPE_CTX          = 18,
-	    DYNAMIC_TYPE_WRITEV       = 19,
-	    DYNAMIC_TYPE_OPENSSL      = 20,
-	    DYNAMIC_TYPE_DSA          = 21,
-	    DYNAMIC_TYPE_CRL          = 22,
-	    DYNAMIC_TYPE_REVOKED      = 23,
-	    DYNAMIC_TYPE_CRL_ENTRY    = 24,
-	    DYNAMIC_TYPE_CERT_MANAGER = 25,
-	    DYNAMIC_TYPE_CRL_MONITOR  = 26,
-	    DYNAMIC_TYPE_OCSP_STATUS  = 27,
-	    DYNAMIC_TYPE_OCSP_ENTRY   = 28,
-	    DYNAMIC_TYPE_ALTNAME      = 29,
-	    DYNAMIC_TYPE_SUITES       = 30,
-	    DYNAMIC_TYPE_CIPHER       = 31,
-	    DYNAMIC_TYPE_RNG          = 32,
-	    DYNAMIC_TYPE_ARRAYS       = 33,
-	    DYNAMIC_TYPE_DTLS_POOL    = 34,
-	    DYNAMIC_TYPE_SOCKADDR     = 35,
-	    DYNAMIC_TYPE_LIBZ         = 36,
-	    DYNAMIC_TYPE_ECC          = 37,
-	    DYNAMIC_TYPE_TMP_BUFFER   = 38,
-	    DYNAMIC_TYPE_DTLS_MSG     = 39,
-	    DYNAMIC_TYPE_ASYNC_TMP    = 40,
-	    DYNAMIC_TYPE_ASYNC_RSA    = 41,
-	    DYNAMIC_TYPE_X509         = 42,
-	    DYNAMIC_TYPE_TLSX         = 43,
-	    DYNAMIC_TYPE_OCSP         = 44,
-	    DYNAMIC_TYPE_SIGNATURE    = 45,
-	    DYNAMIC_TYPE_HASHES       = 46,
-        DYNAMIC_TYPE_SRP          = 47,
-        DYNAMIC_TYPE_COOKIE_PWD   = 48,
-        DYNAMIC_TYPE_USER_CRYPTO  = 49,
-        DYNAMIC_TYPE_OCSP_REQUEST = 50,
-        DYNAMIC_TYPE_X509_EXT     = 51,
-        DYNAMIC_TYPE_X509_STORE   = 52,
-        DYNAMIC_TYPE_X509_CTX     = 53,
-        DYNAMIC_TYPE_URL          = 54,
-        DYNAMIC_TYPE_DTLS_FRAG    = 55,
-        DYNAMIC_TYPE_DTLS_BUFFER  = 56,
-        DYNAMIC_TYPE_SESSION_TICK = 57,
-        DYNAMIC_TYPE_PKCS         = 58,
-        DYNAMIC_TYPE_MUTEX        = 59,
-        DYNAMIC_TYPE_PKCS7        = 60,
+        DYNAMIC_TYPE_CA           = 1,
+        DYNAMIC_TYPE_CERT         = 2,
+        DYNAMIC_TYPE_KEY          = 3,
+        DYNAMIC_TYPE_FILE         = 4,
+        DYNAMIC_TYPE_SUBJECT_CN   = 5,
+        DYNAMIC_TYPE_PUBLIC_KEY   = 6,
+        DYNAMIC_TYPE_SIGNER       = 7,
+        DYNAMIC_TYPE_NONE         = 8,
+        DYNAMIC_TYPE_BIGINT       = 9,
+        DYNAMIC_TYPE_RSA          = 10,
+        DYNAMIC_TYPE_METHOD       = 11,
+        DYNAMIC_TYPE_OUT_BUFFER   = 12,
+        DYNAMIC_TYPE_IN_BUFFER    = 13,
+        DYNAMIC_TYPE_INFO         = 14,
+        DYNAMIC_TYPE_DH           = 15,
+        DYNAMIC_TYPE_DOMAIN       = 16,
+        DYNAMIC_TYPE_SSL          = 17,
+        DYNAMIC_TYPE_CTX          = 18,
+        DYNAMIC_TYPE_WRITEV       = 19,
+        DYNAMIC_TYPE_OPENSSL      = 20,
+        DYNAMIC_TYPE_DSA          = 21,
+        DYNAMIC_TYPE_CRL          = 22,
+        DYNAMIC_TYPE_REVOKED      = 23,
+        DYNAMIC_TYPE_CRL_ENTRY    = 24,
+        DYNAMIC_TYPE_CERT_MANAGER = 25,
+        DYNAMIC_TYPE_CRL_MONITOR  = 26,
+        DYNAMIC_TYPE_OCSP_STATUS  = 27,
+        DYNAMIC_TYPE_OCSP_ENTRY   = 28,
+        DYNAMIC_TYPE_ALTNAME      = 29,
+        DYNAMIC_TYPE_SUITES       = 30,
+        DYNAMIC_TYPE_CIPHER       = 31,
+        DYNAMIC_TYPE_RNG          = 32,
+        DYNAMIC_TYPE_ARRAYS       = 33,
+        DYNAMIC_TYPE_DTLS_POOL    = 34,
+        DYNAMIC_TYPE_SOCKADDR     = 35,
+        DYNAMIC_TYPE_LIBZ         = 36,
+        DYNAMIC_TYPE_ECC          = 37,
+        DYNAMIC_TYPE_TMP_BUFFER   = 38,
+        DYNAMIC_TYPE_DTLS_MSG     = 39,
+        DYNAMIC_TYPE_X509         = 40,
+        DYNAMIC_TYPE_TLSX         = 41,
+        DYNAMIC_TYPE_OCSP         = 42,
+        DYNAMIC_TYPE_SIGNATURE    = 43,
+        DYNAMIC_TYPE_HASHES       = 44,
+        DYNAMIC_TYPE_SRP          = 45,
+        DYNAMIC_TYPE_COOKIE_PWD   = 46,
+        DYNAMIC_TYPE_USER_CRYPTO  = 47,
+        DYNAMIC_TYPE_OCSP_REQUEST = 48,
+        DYNAMIC_TYPE_X509_EXT     = 49,
+        DYNAMIC_TYPE_X509_STORE   = 50,
+        DYNAMIC_TYPE_X509_CTX     = 51,
+        DYNAMIC_TYPE_URL          = 52,
+        DYNAMIC_TYPE_DTLS_FRAG    = 53,
+        DYNAMIC_TYPE_DTLS_BUFFER  = 54,
+        DYNAMIC_TYPE_SESSION_TICK = 55,
+        DYNAMIC_TYPE_PKCS         = 56,
+        DYNAMIC_TYPE_MUTEX        = 57,
+        DYNAMIC_TYPE_PKCS7        = 58,
+        DYNAMIC_TYPE_AES          = 59,
+        DYNAMIC_TYPE_WOLF_BIGINT  = 60,
         DYNAMIC_TYPE_ASN1         = 61,
-        DYNAMIC_TYPE_LOG          = 62
+        DYNAMIC_TYPE_LOG          = 62,
+        DYNAMIC_TYPE_WRITEDUP     = 63,
+        DYNAMIC_TYPE_DH_BUFFER    = 64,
+        DYNAMIC_TYPE_HMAC         = 65,
+        DYNAMIC_TYPE_ASYNC        = 66,
+        DYNAMIC_TYPE_ASYNC_NUMA   = 67,
+        DYNAMIC_TYPE_ASYNC_NUMA64 = 68,
 	};
 
 	/* max error buffer string size */
@@ -396,7 +449,7 @@
 
 
     /* AESNI requires alignment and ARMASM gains some performance from it */
-    #if defined(WOLFSSL_AESNI) || defined(WOLFSSL_ARMASM)
+    #if defined(WOLFSSL_AESNI) || defined(WOLFSSL_ARMASM) || defined(USE_INTEL_SPEEDUP)
         #if !defined(ALIGN16)
             #if defined(__GNUC__)
                 #define ALIGN16 __attribute__ ( (aligned (16)))
@@ -409,6 +462,18 @@
             #endif
         #endif /* !ALIGN16 */
 
+        #if !defined (ALIGN32)
+            #if defined (__GNUC__)
+                #define ALIGN32 __attribute__ ( (aligned (32)))
+            #elif defined(_MSC_VER)
+                /* disable align warning, we want alignment ! */
+                #pragma warning(disable: 4324)
+                #define ALIGN32 __declspec (align (32))
+            #else
+                #define ALIGN32
+            #endif
+        #endif
+
        #if !defined(ALIGN32)
             #if defined(__GNUC__)
                 #define ALIGN32 __attribute__ ( (aligned (32)))
@@ -440,6 +505,8 @@
 
     #ifdef WOLFSSL_RIOT_OS
         #define EXIT_TEST(ret) exit(ret)
+    #elif defined(HAVE_STACK_SIZE)
+        #define EXIT_TEST(ret) return (void*)((size_t)(ret))
     #else
         #define EXIT_TEST(ret) return ret
     #endif
diff --git a/wolfssl/wolfcrypt/wolfevent.h b/wolfssl/wolfcrypt/wolfevent.h
index 5dbf16450..4691356bb 100644
--- a/wolfssl/wolfcrypt/wolfevent.h
+++ b/wolfssl/wolfcrypt/wolfevent.h
@@ -29,6 +29,9 @@
 #ifndef SINGLE_THREADED
     #include 
 #endif
+#ifdef HAVE_CAVIUM
+    #include 
+#endif
 
 typedef struct WOLFSSL WOLFSSL;
 typedef struct WOLF_EVENT WOLF_EVENT;
@@ -38,13 +41,12 @@ typedef unsigned short WOLF_EVENT_FLAG;
 
 typedef enum WOLF_EVENT_TYPE {
     WOLF_EVENT_TYPE_NONE,
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        WOLF_EVENT_TYPE_ASYNC_ANY,
-        WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
-        WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,
-        WOLF_EVENT_TYPE_ASYNC_FIRST = WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
-        WOLF_EVENT_TYPE_ASYNC_LAST = WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,
-    #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WOLF_EVENT_TYPE_ASYNC_WOLFSSL,    /* context is WOLFSSL* */
+    WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,  /* context is WC_ASYNC_DEV */
+    WOLF_EVENT_TYPE_ASYNC_FIRST = WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
+    WOLF_EVENT_TYPE_ASYNC_LAST = WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } WOLF_EVENT_TYPE;
 
 struct WOLF_EVENT {
@@ -53,11 +55,20 @@ struct WOLF_EVENT {
     WOLF_EVENT*         prev;
 
     void*               context;
+    union {
+        void* ptr;
+#ifdef WOLFSSL_ASYNC_CRYPT
+        struct WC_ASYNC_DEV* async;
+#endif
+    } dev;
 #ifdef HAVE_CAVIUM
-    word64              reqId;
+    CavReqId            reqId;
 #endif
     int                 ret;    /* Async return code */
+    unsigned int        flags;
     WOLF_EVENT_TYPE     type;
+
+    /* event flags */
     WOLF_EVENT_FLAG     pending:1;
     WOLF_EVENT_FLAG     done:1;
     /* Future event flags can go here */
@@ -87,12 +98,16 @@ WOLFSSL_API int wolfEvent_Poll(WOLF_EVENT* event, WOLF_EVENT_FLAG flags);
 WOLFSSL_API int wolfEventQueue_Init(WOLF_EVENT_QUEUE* queue);
 WOLFSSL_API int wolfEventQueue_Push(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
 WOLFSSL_API int wolfEventQueue_Pop(WOLF_EVENT_QUEUE* queue, WOLF_EVENT** event);
-WOLFSSL_API int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
 WOLFSSL_API int wolfEventQueue_Poll(WOLF_EVENT_QUEUE* queue, void* context_filter,
     WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG flags, int* eventCount);
 WOLFSSL_API int wolfEventQueue_Count(WOLF_EVENT_QUEUE* queue);
 WOLFSSL_API void wolfEventQueue_Free(WOLF_EVENT_QUEUE* queue);
 
+/* the queue mutex must be locked prior to calling these */
+WOLFSSL_API int wolfEventQueue_Add(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
+WOLFSSL_API int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
+
+
 #endif /* HAVE_WOLF_EVENT */
 
 
diff --git a/wolfssl/wolfcrypt/wolfmath.h b/wolfssl/wolfcrypt/wolfmath.h
index e6a348653..e32efc1b2 100644
--- a/wolfssl/wolfcrypt/wolfmath.h
+++ b/wolfssl/wolfcrypt/wolfmath.h
@@ -19,15 +19,43 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
+#if defined(HAVE_WOLF_BIGINT) && !defined(WOLF_BIGINT_DEFINED)
+    /* raw big integer */
+    typedef struct WC_BIGINT {
+        byte*   buf;
+        word32  len;
+        void*   heap;
+    } WC_BIGINT;
+
+    #define WOLF_BIGINT_DEFINED
+#endif
+
+
+/* only define functions if mp_int has been declared */
+#ifdef MP_INT_DEFINED
+
 #ifndef __WOLFMATH_H__
 #define __WOLFMATH_H__
 
+    /* common math functions */
+    int get_digit_count(mp_int* a);
+    mp_digit get_digit(mp_int* a, int n);
+    int get_rand_digit(WC_RNG* rng, mp_digit* d);
+    int mp_rand(mp_int* a, int digits, WC_RNG* rng);
 
-/* common math functions */
-WOLFSSL_LOCAL int get_digit_count(mp_int* a);
-WOLFSSL_LOCAL mp_digit get_digit(mp_int* a, int n);
-WOLFSSL_LOCAL int get_rand_digit(WC_RNG* rng, mp_digit* d);
-WOLFSSL_LOCAL int mp_rand(mp_int* a, int digits, WC_RNG* rng);
 
+    #ifdef HAVE_WOLF_BIGINT
+        void wc_bigint_init(WC_BIGINT* a);
+        int wc_bigint_alloc(WC_BIGINT* a, word32 sz);
+        int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen);
+        int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen);
+        void wc_bigint_zero(WC_BIGINT* a);
+        void wc_bigint_free(WC_BIGINT* a);
+
+        int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst);
+        int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst);
+    #endif /* HAVE_WOLF_BIGINT */
 
 #endif /* __WOLFMATH_H__ */
+
+#endif /* MP_INT_DEFINED */