From c1640e8a3dcb418d9f3f609fe16323c3629cc4de Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 7 Apr 2017 15:46:32 -0700 Subject: [PATCH] =?UTF-8?q?Intel=20QuickAssist=20(QAT)=20support=20and=20a?= =?UTF-8?q?sync=20enhancements/fixes:=20*=20Adds=20./configure=20"--with-i?= =?UTF-8?q?ntelqa=3D../QAT1.6=E2=80=9D,=20port=20files,=20memory=20managem?= =?UTF-8?q?ent=20and=20README.md=20(see=20wolfcrypt/src/port/intel/).=20*?= =?UTF-8?q?=20Added=20Intel=20QAT=20support=20for=20RSA=20public/private?= =?UTF-8?q?=20(CRT/non-CRT),=20AES=20CBC/GCM,=20ECDH/ECDSA,=20DH,=20DES3,?= =?UTF-8?q?=20SHA,=20SHA224,=20SHA256,=20SHA384,=20SHA512,=20MD5=20and=20H?= =?UTF-8?q?MAC.=20*=20wolfSSL=20async=20enabled=20all=20client=20and=20ser?= =?UTF-8?q?ver:=20PKI,=20Encrypt/Decrypt,=20Hashing/HMAC=20and=20Certifica?= =?UTF-8?q?te=20Sign/Verify.=20*=20wolfSSL=20async=20support=20in=20functi?= =?UTF-8?q?ons:=20Encrypt,=20Decrypt,=20VerifyMAC,=20BuildMessage,=20Confi?= =?UTF-8?q?rmSignature,=20DoCertificate,=20ParseCertRelative,=20and=20Make?= =?UTF-8?q?Signature.=20*=20wolfCrypt=20test=20and=20benchmark=20async=20s?= =?UTF-8?q?upport=20added=20for=20all=20HW=20acceleration.=20*=20wolfCrypt?= =?UTF-8?q?=20benchmark=20multi-threading=20support.=20*=20Added=20QuickAs?= =?UTF-8?q?sist=20memory=20overrides=20for=20XMALLOC,=20XFREE=20and=20XREA?= =?UTF-8?q?LLOC.=20XREALLOC=20determines=20if=20existing=20pointer=20needs?= =?UTF-8?q?=20reallocated=20for=20NUMA.=20*=20Refactor=20to=20make=20sure?= =?UTF-8?q?=20=E2=80=9Cheap=E2=80=9D=20is=20available=20for=20async=20dev?= =?UTF-8?q?=20init.=20*=20Added=20async=20support=20for=20all=20examples?= =?UTF-8?q?=20for=20connect,=20accept,=20read=20and=20write.=20*=20Added?= =?UTF-8?q?=20new=20WC=5FBIGINT=20(in=20wolfmath.c)=20for=20async=20hardwa?= =?UTF-8?q?re=20support.=20*=20Added=20async=20simulator=20tests=20for=20D?= =?UTF-8?q?ES3=20CBC,=20AES=20CBC/GCM.=20*=20Added=20QAT=20standalone=20bu?= =?UTF-8?q?ild=20for=20unit=20testing.=20*=20Added=20int=20return=20code?= =?UTF-8?q?=20to=20SHA=20and=20MD5=20functions.=20*=20Refactor=20of=20the?= =?UTF-8?q?=20async=20stack=20variable=20handling,=20so=20async=20operatio?= =?UTF-8?q?ns=20have=20generic=20args=20buffer=20area=20and=20cleanup=20fu?= =?UTF-8?q?nction=20pointer.=20*=20Combined=20duplicate=20code=20for=20asy?= =?UTF-8?q?nc=20push/pop=20handling.=20*=20Refactor=20internal.c=20to=20ad?= =?UTF-8?q?d=20AllocKey=20/=20FreeKey.=20*=20Refactor=20of=20hash=20init/f?= =?UTF-8?q?ree=20in=20TLS=20to=20use=20InitHashes=20and=20FreeHashes.=20*?= =?UTF-8?q?=20Refactor=20of=20the=20async=20event->context=20to=20use=20WO?= =?UTF-8?q?LF=5FEVENT=5FTYPE=5FASYNC=5FWOLFSSL=20for=20WOLFSSL*=20and=20WO?= =?UTF-8?q?LF=5FEVENT=5FTYPE=5FASYNC=5FWOLFCRYPT=20for=20WC=5FASYNC=5FDEV*?= =?UTF-8?q?.=20*=20Suppress=20error=20message=20for=20WC=5FPENDING=5FE.=20?= =?UTF-8?q?*=20Implemented=20"wolfSSL=5FEVP=5FMD=5FCTX=5Finit"=20to=20do?= =?UTF-8?q?=20memset.=20*=20Cleanup=20of=20the=20openssl=20compat=20CTX=20?= =?UTF-8?q?sizes=20when=20async=20is=20enabled.=20*=20Cleanup=20of=20AES,?= =?UTF-8?q?=20DES3,=20DH,=20SHA,=20MD5,=20DES3,=20DH,=20HMAC,=20MD5=20for?= =?UTF-8?q?=20consistency=20and=20readability.=20*=20Cleanup=20of=20the=20?= =?UTF-8?q?OPAQUE=5FLEN.=20*=20Cleanup=20to=20use=20ENCRYPT=5FLEN=20instea?= =?UTF-8?q?d=20of=20sizeof(ssl->arrays.preMasterSecret).=20*=20Changed=20s?= =?UTF-8?q?sl->arrays.preMasterSecret=20to=20use=20XMALLOC=20(accelerates?= =?UTF-8?q?=20HW=20operations)=20*=20Reduce=20verbosity=20with=20debug=20e?= =?UTF-8?q?nabled=20for=20"GetMyVersion",=20"wolfSSL=20Using=20RSA=20OAEP?= =?UTF-8?q?=20padding"=20and=20"wolfSSL=20Using=20RSA=20PKCSV15=20padding"?= =?UTF-8?q?.=20*=20Updated=20RSA=20un-padding=20error=20message=20so=20its?= =?UTF-8?q?=20different=20than=20one=20above=20it=20for=20better=20debuggi?= =?UTF-8?q?ng.=20*=20Added=20QAT=20async=20enables=20for=20each=20algorith?= =?UTF-8?q?m.=20*=20Refactor=20of=20the=20async=20init=20to=20use=20=5Fex.?= =?UTF-8?q?=20*=20Added=20WC=5FASYNC=5FTHRESH=5FNONE=20to=20allow=20bypass?= =?UTF-8?q?=20of=20the=20async=20thresholds=20for=20testing.=20*=20Reforma?= =?UTF-8?q?tted=20the=20benchmark=20results:=20PKI:=20"RSA=202048=20privat?= =?UTF-8?q?e=20HW=2018522=20ops=20took=201.003=20sec,=20avg=200.054=20ms,?= =?UTF-8?q?=2018467.763=20ops/sec"=20Crypto/Hashing:=20SHA-256=20SW=20350?= =?UTF-8?q?=20megs=20took=201.009=20seconds,=20346.946=20MB/s=20Cycles=20p?= =?UTF-8?q?er=20byte=20=3D=209.87=20*=20Added=20min=20execution=20time=20f?= =?UTF-8?q?or=20all=20benchmarks.=20*=20Moved=20wc=5F*GetHash=20and=20wc?= =?UTF-8?q?=5F*RestorePos=20to=20appropriate=20files=20so=20use=20of=20isC?= =?UTF-8?q?opy=20flag=20is=20local.=20*=20Fix=20for=20ECC=20sign=20status?= =?UTF-8?q?=20sometimes=20being=20invalid=20due=20to=20uninitialized=20ECC?= =?UTF-8?q?=20digest=20in=20benchmark.=20*=20Added=20new=20DECLARE=5FVAR/F?= =?UTF-8?q?REE=5FVAR=20and=20DECLARE=5FARRAY/FREE=5FARRAY=20macros=20for?= =?UTF-8?q?=20helping=20setup=20test/benchmark=20variables=20to=20accelera?= =?UTF-8?q?te=20async.=20*=20Added=20NO=5FSW=5FBENCH=20option=20to=20only?= =?UTF-8?q?=20run=20HW=20bench.=20*=20Added=20support=20for=20PRNG=20to=20?= =?UTF-8?q?use=20hardware=20SHA256=20if=20=5Fwc=20devId=20provided.=20*=20?= =?UTF-8?q?Fix=20to=20prevent=20curve=20tests=20from=20running=20against?= =?UTF-8?q?=20wrong=20curve=20sizes.=20Changed=20wc=5Fecc=5Fset=5Fcurve=20?= =?UTF-8?q?to=20match=20on=20exact=20size.=20*=20Added=20the=20wc=5F*GetHa?= =?UTF-8?q?sh=20calls=20to=20the=20wolfCrypt=20tests.=20*=20Added=20async?= =?UTF-8?q?=20hardware=20start/stop=20to=20wolfSSL=20init/cleanup.=20*=20R?= =?UTF-8?q?efactor=20to=20add=20wc=5F*Copy=20for=20hashing=20context=20(fo?= =?UTF-8?q?r=20async),=20which=20replaces=20wc=5F*RestorePos.=20*=20Fixes?= =?UTF-8?q?=20for=20building=20with=20TI=20hashing=20(including:=20SHA224,?= =?UTF-8?q?=20missing=20new=20API=E2=80=99s=20and=20building=20with=20dumm?= =?UTF-8?q?y=20build=20for=20non=20hw=20testing).=20Note:=20We=20need=20to?= =?UTF-8?q?=20add=20build=20test=20for=20this=20`./configure=20CFLAGS=3D"-?= =?UTF-8?q?DWOLFSSL=5FTI=5FHASH=20-DTI=5FDUMMY=5FBUILD=E2=80=9D`.=20*=20Ad?= =?UTF-8?q?ded=20arg=20checks=20on=20wc=5F*GetHash=20and=20wc=5F*Copy.=20*?= =?UTF-8?q?=20Cleanup=20of=20the=20BuildMD5,=20BuildSHA,=20BuildMD5=5FCert?= =?UTF-8?q?Verify=20and=20BuildSHA=5FCertVerify=20functions.=20*=20Added?= =?UTF-8?q?=20new=20./configure=20--enable-asyncthreads,=20to=20allow=20en?= =?UTF-8?q?able/disable=20of=20the=20async=20threading=20support.=20If=20-?= =?UTF-8?q?-enable-asynccrypt=20set=20this=20will=20be=20enabled=20by=20de?= =?UTF-8?q?fault=20if=20pthread=20is=20supported.=20Allows=20multi-threade?= =?UTF-8?q?d=20benchmarks=20with=20async=20simulator.=20*=20Added=20checks?= =?UTF-8?q?=20for=20all=20hashing=20to=20verify=20valid=20->buffLen.=20*?= =?UTF-8?q?=20Fix=20for=20SHA512=20scan-build=20warning=20about=20un-initi?= =?UTF-8?q?alized=20=E2=80=9CW=5FX=E2=80=9D.=20*=20Fix=20for=20valgrind=20?= =?UTF-8?q?un-initialized=20use=20of=20buffer=20in=20AllocDer=20(der->buff?= =?UTF-8?q?er)=20and=20BuildTlsFinished=20handshake=5Fhash.=20*=20Refactor?= =?UTF-8?q?=20of=20the=20benchmarking=20to=20use=20common=20function=20for?= =?UTF-8?q?=20start,=20check=20and=20finish=20of=20the=20stats.=20*=20Fixe?= =?UTF-8?q?d=20issue=20with=20ECC=20cache=20loading=20in=20multi-threading?= =?UTF-8?q?.=20*=20Fix=20bug=20with=20AESNI=20not=20aligned=20code=20that?= =?UTF-8?q?=20assumes=20XMALLOC=20is=2016-byte=20aligned.=20*=20Added=20ne?= =?UTF-8?q?w=20WC=5FASYNC=5FNO=5F=E2=80=A6=20options=20to=20allow=20disabl?= =?UTF-8?q?ing=20of=20individual=20async=20algorithms.=20New=20defines=20a?= =?UTF-8?q?re:=20WC=5FASYNC=5FNO=5FCRYPT,=20WC=5FASYNC=5FNO=5FPKI=20and=20?= =?UTF-8?q?WC=5FASYNC=5FNO=5FHASH.=20Additionally=20each=20algorithm=20has?= =?UTF-8?q?=20a=20WC=5FASYNC=5FNO=5F[ALGO]=20define.=20*=20Added=20?= =?UTF-8?q?=E2=80=9CwolfSSL=5FGetAllocators=E2=80=9D=20API=20and=20fixed?= =?UTF-8?q?=20the=20wolfCrypt=20memcb=5Ftest=20so=20it=20restores=20callba?= =?UTF-8?q?ck=20pointers=20after=20test=20is=20complete=20(fixes=20issue?= =?UTF-8?q?=20with=20using=20custom=20allocators=20and=20test=20breaking?= =?UTF-8?q?=20it).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cproject | 2 +- .gitignore | 9 + IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp | 26 +- autogen.sh | 14 + configure.ac | 85 +- examples/client/client.c | 422 +- examples/echoclient/echoclient.c | 120 +- examples/echoserver/echoserver.c | 106 +- examples/server/server.c | 307 +- mcapi/crypto.h | 34 +- src/internal.c | 6621 +++++++++-------- src/keys.c | 257 +- src/ssl.c | 281 +- src/tls.c | 99 +- tests/api.c | 4 + tests/hash.c | 33 +- tests/suites.c | 31 +- tests/unit.c | 17 +- tirtos/README | 3 + wolfcrypt/benchmark/benchmark.c | 3454 +++++---- wolfcrypt/src/aes.c | 1081 +-- wolfcrypt/src/arc4.c | 46 +- wolfcrypt/src/asn.c | 1245 ++-- wolfcrypt/src/des3.c | 1657 +++-- wolfcrypt/src/dh.c | 236 +- wolfcrypt/src/ecc.c | 546 +- wolfcrypt/src/error.c | 3 + wolfcrypt/src/hash.c | 333 +- wolfcrypt/src/hmac.c | 952 ++- wolfcrypt/src/include.am | 7 + wolfcrypt/src/integer.c | 33 +- wolfcrypt/src/logging.c | 4 +- wolfcrypt/src/md5.c | 428 +- wolfcrypt/src/memory.c | 10 + wolfcrypt/src/misc.c | 10 +- wolfcrypt/src/pkcs12.c | 4 + wolfcrypt/src/pkcs7.c | 8 +- wolfcrypt/src/port/arm/armv8-aes.c | 27 +- wolfcrypt/src/port/arm/armv8-sha256.c | 45 +- wolfcrypt/src/port/cavium/README.md | 33 +- wolfcrypt/src/port/cavium/cavium_nitrox.c | 778 -- wolfcrypt/src/port/intel/README.md | 3 + wolfcrypt/src/port/nxp/ksdk_port.c | 1 + wolfcrypt/src/port/ti/ti-ccm.c | 55 +- wolfcrypt/src/port/ti/ti-hash.c | 206 +- wolfcrypt/src/random.c | 255 +- wolfcrypt/src/rsa.c | 436 +- wolfcrypt/src/sha.c | 135 +- wolfcrypt/src/sha256.c | 2520 ++++--- wolfcrypt/src/sha512.c | 1541 ++-- wolfcrypt/src/signature.c | 41 +- wolfcrypt/src/tfm.c | 60 +- wolfcrypt/src/wc_port.c | 52 +- wolfcrypt/src/wolfevent.c | 31 +- wolfcrypt/src/wolfmath.c | 140 + wolfcrypt/test/test.c | 1244 ++-- wolfssl/internal.h | 88 +- wolfssl/openssl/md5.h | 2 +- wolfssl/openssl/sha.h | 15 +- wolfssl/test.h | 4 +- wolfssl/wolfcrypt/aes.h | 33 +- wolfssl/wolfcrypt/arc4.h | 13 +- wolfssl/wolfcrypt/asn.h | 70 +- wolfssl/wolfcrypt/asn_public.h | 19 +- wolfssl/wolfcrypt/des3.h | 12 +- wolfssl/wolfcrypt/dh.h | 9 +- wolfssl/wolfcrypt/ecc.h | 23 +- wolfssl/wolfcrypt/error-crypt.h | 3 + wolfssl/wolfcrypt/hash.h | 29 +- wolfssl/wolfcrypt/hmac.h | 77 +- wolfssl/wolfcrypt/include.am | 14 +- wolfssl/wolfcrypt/integer.h | 17 +- wolfssl/wolfcrypt/md5.h | 38 +- wolfssl/wolfcrypt/mem_track.h | 4 +- wolfssl/wolfcrypt/memory.h | 14 +- wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h | 165 - wolfssl/wolfcrypt/port/ti/ti-hash.h | 40 +- wolfssl/wolfcrypt/random.h | 5 +- wolfssl/wolfcrypt/rsa.h | 21 +- wolfssl/wolfcrypt/settings.h | 14 + wolfssl/wolfcrypt/sha.h | 44 +- wolfssl/wolfcrypt/sha256.h | 24 +- wolfssl/wolfcrypt/sha512.h | 18 + wolfssl/wolfcrypt/tfm.h | 22 +- wolfssl/wolfcrypt/types.h | 192 +- wolfssl/wolfcrypt/wolfevent.h | 33 +- wolfssl/wolfcrypt/wolfmath.h | 38 +- 87 files changed, 15069 insertions(+), 12162 deletions(-) mode change 100644 => 100755 src/internal.c mode change 100644 => 100755 src/ssl.c mode change 100644 => 100755 wolfcrypt/src/aes.c mode change 100644 => 100755 wolfcrypt/src/des3.c mode change 100644 => 100755 wolfcrypt/src/dh.c mode change 100644 => 100755 wolfcrypt/src/ecc.c mode change 100644 => 100755 wolfcrypt/src/hmac.c mode change 100644 => 100755 wolfcrypt/src/md5.c delete mode 100644 wolfcrypt/src/port/cavium/cavium_nitrox.c create mode 100644 wolfcrypt/src/port/intel/README.md mode change 100644 => 100755 wolfcrypt/src/rsa.c mode change 100644 => 100755 wolfcrypt/src/sha.c mode change 100644 => 100755 wolfcrypt/src/sha256.c mode change 100644 => 100755 wolfcrypt/src/sha512.c mode change 100644 => 100755 wolfssl/wolfcrypt/aes.h delete mode 100644 wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h mode change 100644 => 100755 wolfssl/wolfcrypt/types.h diff --git a/.cproject b/.cproject index b93835e25..1db9a198e 100644 --- a/.cproject +++ b/.cproject @@ -66,7 +66,7 @@ - + diff --git a/.gitignore b/.gitignore index f1fd0c9c9..fd5def3b6 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,12 @@ src/async.c wolfssl/async.h wolfcrypt/src/async.c wolfssl/wolfcrypt/async.h +wolfcrypt/src/port/intel/quickassist.c +wolfcrypt/src/port/intel/quickassist_mem.c +wolfcrypt/src/port/cavium/cavium_nitrox.c +wolfssl/wolfcrypt/port/intel/quickassist.h +wolfssl/wolfcrypt/port/intel/quickassist_mem.h +wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h ctaocrypt/benchmark/benchmark ctaocrypt/test/testctaocrypt wolfcrypt/benchmark/benchmark @@ -191,3 +197,6 @@ wrapper/CSharp/x64/ # Visual Studio Code Workspace Files *.vscode IDE/INTIME-RTOS/Debug_* + +# Binaries +wolfcrypt/src/port/intel/qat_test diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp b/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp index 357ac26f3..30156bf33 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp +++ b/IDE/ROWLEY-CROSSWORKS-ARM/wolfssl_ltc.hzp @@ -300,12 +300,26 @@ recurse="Yes" /> - + + + + + + + + + + + + + + + + + + + + diff --git a/autogen.sh b/autogen.sh index 6b08f3cd5..e5ea530fa 100755 --- a/autogen.sh +++ b/autogen.sh @@ -22,6 +22,20 @@ if test -e .git; then # touch async crypt files touch ./wolfcrypt/src/async.c touch ./wolfssl/wolfcrypt/async.h + + # touch async port files + touch ./wolfcrypt/src/port/intel/quickassist.c + touch ./wolfcrypt/src/port/intel/quickassist_mem.c + touch ./wolfcrypt/src/port/cavium/cavium_nitrox.c + if [ ! -d ./wolfssl/wolfcrypt/port/intel ]; then + mkdir ./wolfssl/wolfcrypt/port/intel + fi + touch ./wolfssl/wolfcrypt/port/intel/quickassist.h + touch ./wolfssl/wolfcrypt/port/intel/quickassist_mem.h + if [ ! -d ./wolfssl/wolfcrypt/port/cavium ]; then + mkdir ./wolfssl/wolfcrypt/port/cavium + fi + touch ./wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h else WARNINGS="all" fi diff --git a/configure.ac b/configure.ac index b86f2307f..b1cdc7cc4 100644 --- a/configure.ac +++ b/configure.ac @@ -2726,6 +2726,7 @@ AC_ARG_WITH([cavium], [ AC_MSG_CHECKING([for cavium]) CPPFLAGS="$CPPFLAGS -DHAVE_CAVIUM" + LIB_ADD="-lrt $LIB_ADD" if test "x$withval" == "xyes" ; then AC_MSG_ERROR([need a PATH for --with-cavium]) @@ -2742,6 +2743,8 @@ AC_ARG_WITH([cavium], if test "x$cavium_linked" == "xno" ; then AC_MSG_ERROR([cavium isn't found. If it's already installed, specify its path using --with-cavium=/dir/]) + else + AM_CFLAGS="$AM_CFLAGS -DHAVE_CAVIUM" fi AC_MSG_RESULT([yes]) enable_shared=no @@ -2758,6 +2761,7 @@ AC_ARG_WITH([cavium-v], [ AC_MSG_CHECKING([for cavium]) CPPFLAGS="$CPPFLAGS -DHAVE_CAVIUM -DHAVE_CAVIUM_V" + LIB_ADD="-lrt $LIB_ADD" if test "x$withval" == "xyes" ; then AC_MSG_ERROR([need a PATH for --with-cavium]) @@ -2766,7 +2770,7 @@ AC_ARG_WITH([cavium-v], trycaviumdir=$withval fi - LDFLAGS="$AM_LDFLAGS $trycaviumdir/utils/sample_tests/cavium_common.o $trycaviumdir/utils/sample_tests/cavium_sym_crypto.o $trycaviumdir/utils/sample_tests/cavium_asym_crypto.o" + LDFLAGS="$AM_LDFLAGS $trycaviumdir/api/obj/cavium_common.o $trycaviumdir/api/obj/cavium_sym_crypto.o $trycaviumdir/api/obj/cavium_asym_crypto.o" CPPFLAGS="$CPPFLAGS -I$trycaviumdir/include" #AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include "cavium_common.h"]], [[ CspShutdown(0); ]])],[ cavium_linked=yes ],[ cavium_linked=no ]) @@ -2774,6 +2778,8 @@ AC_ARG_WITH([cavium-v], if test "x$cavium_linked" == "xno" ; then AC_MSG_ERROR([cavium isn't found. If it's already installed, specify its path using --with-cavium-v=/dir/]) + else + AM_CFLAGS="$AM_CFLAGS -DHAVE_CAVIUM -DHAVE_CAVIUM_V" fi AC_MSG_RESULT([yes]) @@ -2791,6 +2797,46 @@ AC_ARG_WITH([cavium-v], AM_CONDITIONAL([BUILD_CAVIUM], [test "x$ENABLED_CAVIUM" = "xyes"]) +# Intel Quick Assist +tryqatdir="" +AC_ARG_WITH([intelqa], + [ --with-intelqa=PATH PATH to Intel QuickAssit (QAT) driver dir ], + [ + AC_MSG_CHECKING([for intelqa]) + CPPFLAGS="$CPPFLAGS -DHAVE_INTEL_QA -DDO_CRYPTO -DUSER_SPACE" + + if test "x$withval" == "xyes" ; then + AC_MSG_ERROR([need a PATH for --with-intelqa]) + fi + if test "x$withval" != "xno" ; then + tryqatdir=$withval + fi + + CPPFLAGS="$CPPFLAGS -I$tryqatdir/quickassist/include -I$tryqatdir/quickassist/include/lac -I$tryqatdir/quickassist/utilities/osal/include -I$tryqatdir/quickassist/utilities/osal/src/linux/user_space/include -I$tryqatdir/quickassist/lookaside/access_layer/include -I$tryqatdir/quickassist/lookaside/access_layer/src/common/include -I$srcdir/wolfssl -I$srcdir/wolfssl/wolfcrypt/port/intel" + LDFLAGS="$LDFLAGS -L$tryqatdir/build -Wl,-Map=output.map" + LIBS="$LIBS -licp_qa_al_s" + LIB_ADD="-ladf_proxy -losal -lrt $LIB_ADD" + + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include "cpa_cy_common.h"]], [[ Cpa16U count = 0; cpaCyGetNumInstances(&count); ]])],[ intelqa_linked=yes ],[ intelqa_linked=no ]) + + if test "x$intelqa_linked" == "xno" ; then + AC_MSG_ERROR([Intel QuickAssist not found. + If it's already installed, specify its path using --with-intelqa=/dir/]) + else + AM_CFLAGS="$AM_CFLAGS -DHAVE_INTEL_QA -DDO_CRYPTO -DUSER_SPACE" + fi + AC_MSG_RESULT([yes]) + + ENABLED_INTEL_QA=yes + ], + [ + ENABLED_INTEL_QA=no + ] +) + +AM_CONDITIONAL([BUILD_INTEL_QA], [test "x$ENABLED_INTEL_QA" = "xyes"]) + + # Fast RSA using Intel IPP ippdir="${srcdir}/IPP" ipplib="lib" # if autoconf guesses 32bit system changes lib directory @@ -2997,11 +3043,12 @@ AC_ARG_ENABLE([asynccrypt], if test "$ENABLED_ASYNCCRYPT" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT -DHAVE_WOLF_EVENT -DHAVE_WOLF_BIGINT" - # if Cavium not enabled the use async simulator for testing - if test "x$ENABLED_CAVIUM" = "xno" + # if no async hardware then use simulator for testing + if test "x$ENABLED_CAVIUM" = "xno" && test "x$ENABLED_INTEL_QA" = "xno" then + # Async threading is Linux specific AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_ASYNC_CRYPT_TEST" fi fi @@ -3010,6 +3057,35 @@ AM_CONDITIONAL([BUILD_ASYNCCRYPT], [test "x$ENABLED_ASYNCCRYPT" = "xyes"]) AM_CONDITIONAL([BUILD_WOLFEVENT], [test "x$ENABLED_ASYNCCRYPT" = "xyes"]) +# check for async if using Intel QuckAssist or Cavium +if test "x$ENABLED_INTEL_QA" = "xyes" || test "x$ENABLED_CAVIUM" = "xyes" ; then + if test "x$ENABLED_ASYNCCRYPT" = "xno" ; then + AC_MSG_ERROR([Please enable enable asynchronous support using --enable-asynccrypt]) + fi +fi + + +# Asynchronous threading +AC_ARG_ENABLE([asyncthreads], + [ --enable-asyncthreads Enable Asynchronous Threading (default: enabled)], + [ ENABLED_ASYNCTHREADS=$enableval ], + [ ENABLED_ASYNCTHREADS=yes ] + ) + +if test "$ENABLED_ASYNCCRYPT" = "yes" && test "$ENABLED_ASYNCTHREADS" = "yes" +then + AX_PTHREAD([ENABLED_ASYNCTHREADS=yes],[ENABLED_ASYNCTHREADS=no]) +else + ENABLED_ASYNCTHREADS=no +fi + +if test "$ENABLED_ASYNCTHREADS" = "yes" +then + LIB_ADD="-lpthread $LIB_ADD" + AM_CFLAGS="$AM_CFLAGS -D_GNU_SOURCE" +else + AM_CFLAGS="$AM_CFLAGS -DWC_NO_ASYNC_THREADING" +fi # Session Export @@ -3515,6 +3591,7 @@ echo " * Cavium: $ENABLED_CAVIUM" echo " * ARM ASM: $ENABLED_ARMASM" echo " * AES Key Wrap: $ENABLED_AESKEYWRAP" echo " * Write duplicate: $ENABLED_WRITEDUP" +echo " * Intel Quick Assist: $ENABLED_INTEL_QA" echo "" echo "---" diff --git a/examples/client/client.c b/examples/client/client.c index 8af4123ff..7d7581d2e 100644 --- a/examples/client/client.c +++ b/examples/client/client.c @@ -42,18 +42,13 @@ #include -#if !defined(WOLFSSL_TRACK_MEMORY) && !defined(NO_MAIN_DRIVER) - /* in case memory tracker wants stats */ - #define WOLFSSL_TRACK_MEMORY -#endif - #include #include #include "examples/client/client.h" -#ifdef WOLFSSL_ASYNC_CRYPT +#ifndef HAVE_FIPS static int devId = INVALID_DEVID; #endif @@ -75,7 +70,7 @@ #endif -static void NonBlockingSSL_Connect(WOLFSSL* ssl) +static int NonBlockingSSL_Connect(WOLFSSL* ssl) { #ifndef WOLFSSL_CALLBACKS int ret = wolfSSL_connect(ssl); @@ -98,7 +93,7 @@ static void NonBlockingSSL_Connect(WOLFSSL* ssl) #ifdef WOLFSSL_ASYNC_CRYPT else if (error == WC_PENDING_E) { ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } + if (ret < 0) break; } #endif @@ -110,11 +105,11 @@ static void NonBlockingSSL_Connect(WOLFSSL* ssl) } if ((select_ret == TEST_RECV_READY) || - (select_ret == TEST_ERROR_READY)) { + (select_ret == TEST_ERROR_READY) || error == WC_PENDING_E) { #ifndef WOLFSSL_CALLBACKS ret = wolfSSL_connect(ssl); #else - ret = wolfSSL_connect_ex(ssl,handShakeCB,timeoutCB,timeout); + ret = wolfSSL_connect_ex(ssl, handShakeCB, timeoutCB, timeout); #endif error = wolfSSL_get_error(ssl, 0); } @@ -131,8 +126,8 @@ static void NonBlockingSSL_Connect(WOLFSSL* ssl) error = SSL_FATAL_ERROR; } } - if (ret != SSL_SUCCESS) - err_sys("SSL_connect failed"); + + return ret; } @@ -166,7 +161,7 @@ static int ClientBenchmarkConnections(WOLFSSL_CTX* ctx, char* host, word16 port, /* time passed in number of connects give average */ int times = benchmark; int loops = resumeSession ? 2 : 1; - int i = 0; + int i = 0, err, ret; #ifndef NO_SESSION_CACHE WOLFSSL_SESSION* benchSession = NULL; #endif @@ -193,8 +188,23 @@ static int ClientBenchmarkConnections(WOLFSSL_CTX* ctx, char* host, word16 port, if (wolfSSL_set_fd(ssl, sockfd) != SSL_SUCCESS) { err_sys("error in setting fd"); } - if (wolfSSL_connect(ssl) != SSL_SUCCESS) + + do { + err = 0; /* reset error */ + ret = wolfSSL_connect(ssl); + if (ret != SSL_SUCCESS) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != SSL_SUCCESS) { err_sys("SSL_connect failed"); + } wolfSSL_shutdown(ssl); #ifndef NO_SESSION_CACHE @@ -226,7 +236,7 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, double start, conn_time = 0, tx_time = 0, rx_time = 0; SOCKET_T sockfd; WOLFSSL* ssl; - int ret; + int ret = 0, err = 0; start = current_time(1); ssl = wolfSSL_new(ctx); @@ -236,7 +246,21 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, if (wolfSSL_set_fd(ssl, sockfd) != SSL_SUCCESS) { err_sys("error in setting fd"); } - if (wolfSSL_connect(ssl) == SSL_SUCCESS) { + + do { + err = 0; /* reset error */ + ret = wolfSSL_connect(ssl); + if (ret != SSL_SUCCESS) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret == SSL_SUCCESS) { /* Perform throughput test */ char *tx_buffer, *rx_buffer; @@ -244,14 +268,18 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, conn_time = current_time(0) - start; /* Allocate TX/RX buffers */ - tx_buffer = (char*)malloc(TEST_BUFFER_SIZE); - rx_buffer = (char*)malloc(TEST_BUFFER_SIZE); - if(tx_buffer && rx_buffer) { + tx_buffer = (char*)XMALLOC(TEST_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + rx_buffer = (char*)XMALLOC(TEST_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (tx_buffer && rx_buffer) { WC_RNG rng; /* Startup the RNG */ + #ifndef HAVE_FIPS + ret = wc_InitRng_ex(&rng, NULL, devId); + #else ret = wc_InitRng(&rng); - if(ret == 0) { + #endif + if (ret == 0) { int xfer_bytes; /* Generate random data to send */ @@ -263,7 +291,7 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, /* Perform TX and RX of bytes */ xfer_bytes = 0; - while(throughput > xfer_bytes) { + while (throughput > xfer_bytes) { int len, rx_pos, select_ret; /* Determine packet size */ @@ -271,10 +299,22 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, /* Perform TX */ start = current_time(1); - if (wolfSSL_write(ssl, tx_buffer, len) != len) { - int writeErr = wolfSSL_get_error(ssl, 0); - printf("wolfSSL_write error %d!\n", writeErr); - err_sys("wolfSSL_write failed"); + do { + err = 0; /* reset error */ + ret = wolfSSL_write(ssl, tx_buffer, len); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != len) { + printf("SSL_write bench error %d!\n", err); + err_sys("SSL_write failed"); } tx_time += current_time(0) - start; @@ -283,13 +323,21 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, if (select_ret == TEST_RECV_READY) { start = current_time(1); rx_pos = 0; - while(rx_pos < len) { - ret = wolfSSL_read(ssl, &rx_buffer[rx_pos], len - rx_pos); - if(ret <= 0) { - int readErr = wolfSSL_get_error(ssl, 0); - if (readErr != SSL_ERROR_WANT_READ) { - printf("wolfSSL_read error %d!\n", readErr); - err_sys("wolfSSL_read failed"); + while (rx_pos < len) { + ret = wolfSSL_read(ssl, &rx_buffer[rx_pos], + len - rx_pos); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + else + #endif + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read bench error %d\n", err); + err_sys("SSL_read failed"); } } else { @@ -319,8 +367,8 @@ static int ClientBenchmarkThroughput(WOLFSSL_CTX* ctx, char* host, word16 port, else { err_sys("Client buffer malloc failed"); } - if(tx_buffer) free(tx_buffer); - if(rx_buffer) free(rx_buffer); + if(tx_buffer) XFREE(tx_buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if(rx_buffer) XFREE(rx_buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); } else { err_sys("wolfSSL_connect failed"); @@ -412,7 +460,7 @@ static int StartTLS_Init(SOCKET_T* sockfd) /* Closes down the SMTP connection */ static int SMTP_Shutdown(WOLFSSL* ssl, int wc_shutdown) { - int ret; + int ret, err = 0; char tmpBuf[256]; if (ssl == NULL) @@ -423,13 +471,38 @@ static int SMTP_Shutdown(WOLFSSL* ssl, int wc_shutdown) XMEMSET(tmpBuf, 0, sizeof(tmpBuf)); /* C: QUIT */ - if (wolfSSL_write(ssl, starttlsCmd[5], (int)XSTRLEN(starttlsCmd[5])) != - (int)XSTRLEN(starttlsCmd[5])) + do { + ret = wolfSSL_write(ssl, starttlsCmd[5], (int)XSTRLEN(starttlsCmd[5])); + if (ret < 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != (int)XSTRLEN(starttlsCmd[5])) { err_sys("failed to send SMTP QUIT command\n"); + } /* S: 221 2.0.0 Service closing transmission channel */ - if (wolfSSL_read(ssl, tmpBuf, sizeof(tmpBuf)) < 0) + do { + ret = wolfSSL_read(ssl, tmpBuf, sizeof(tmpBuf)); + if (ret < 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret < 0) { err_sys("failed to read SMTP closing down response\n"); + } printf("%s\n", tmpBuf); @@ -551,7 +624,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #endif char reply[80]; - int input; int msgSz = (int)XSTRLEN(msg); int resumeSz = (int)XSTRLEN(resumeMsg); @@ -584,7 +656,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #endif int scr = 0; /* allow secure renegotiation */ int forceScr = 0; /* force client initiaed scr */ - int trackMemory = 0; int useClientCert = 1; int fewerPackets = 0; int atomicUser = 0; @@ -623,7 +694,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) byte disableExtMasterSecret = 0; #endif - #ifdef HAVE_OCSP int useOcsp = 0; char* ocspUrl = NULL; @@ -632,6 +702,7 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #ifdef HAVE_WNR const char* wnrConfigFile = wnrConfig; #endif + char buffer[WOLFSSL_MAX_ERROR_SZ]; int argc = ((func_args*)args)->argc; char** argv = ((func_args*)args)->argv; @@ -663,9 +734,9 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) StackTrap(); #ifndef WOLFSSL_VXWORKS - /* Not used: j, y, I, J, K, Q, Y */ + /* Not used: j, t, y, I, J, K, Q, Y */ while ((ch = mygetopt(argc, argv, "?" - "ab:c:defgh:ik:l:mnop:q:rstuv:wxz" + "ab:c:defgh:ik:l:mnop:q:rsuv:wxz" "A:B:CDE:F:GHL:M:NO:PRS:TUVW:XZ:")) != -1) { switch (ch) { case '?' : @@ -710,12 +781,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) usePsk = 1; break; - case 't' : - #ifdef USE_WOLFSSL_MEMORY - trackMemory = 1; - #endif - break; - #ifdef WOLFSSL_TRUST_PEER_CERT case 'E' : trustCert = myoptarg; @@ -1036,11 +1101,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } } -#if defined(USE_WOLFSSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY) - if (trackMemory) - InitMemoryTracker(); -#endif - #ifdef HAVE_WNR if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0) err_sys("can't load whitewood net random config file"); @@ -1272,9 +1332,8 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #ifdef WOLFSSL_ASYNC_CRYPT ret = wolfAsync_DevOpen(&devId); - if (ret != 0) { - wolfSSL_CTX_free(ctx); - err_sys("Async device open failed"); + if (ret < 0) { + printf("Async device open failed\nRunning without async\n"); } wolfSSL_CTX_UseAsync(ctx, devId); #endif /* WOLFSSL_ASYNC_CRYPT */ @@ -1515,38 +1574,38 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (nonBlocking) { wolfSSL_set_using_nonblock(ssl, 1); tcp_set_nonblocking(&sockfd); - NonBlockingSSL_Connect(ssl); + ret = NonBlockingSSL_Connect(ssl); } else { do { -#ifdef WOLFSSL_ASYNC_CRYPT - if (err == WC_PENDING_E) { - ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } - } -#endif - err = 0; /* Reset error */ + err = 0; /* reset error */ ret = wolfSSL_connect(ssl); if (ret != SSL_SUCCESS) { err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif } - } while (ret != SSL_SUCCESS && err == WC_PENDING_E); - - if (ret != SSL_SUCCESS) { - char buffer[WOLFSSL_MAX_ERROR_SZ]; - printf("err = %d, %s\n", err, wolfSSL_ERR_error_string(err, buffer)); - wolfSSL_free(ssl); - wolfSSL_CTX_free(ctx); - err_sys("wolfSSL_connect failed"); - /* see note at top of README */ - /* if you're getting an error here */ - } + } while (err == WC_PENDING_E); } #else timeout.tv_sec = DEFAULT_TIMEOUT_SEC; timeout.tv_usec = 0; - NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ + ret = NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ #endif + if (ret != SSL_SUCCESS) { + printf("wolfSSL_connect error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); + wolfSSL_free(ssl); + wolfSSL_CTX_free(ctx); + err_sys("wolfSSL_connect failed"); + /* see note at top of README */ + /* if you're getting an error here */ + } + showPeer(ssl); #ifdef OPENSSL_EXTRA @@ -1626,7 +1685,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) " nonblocking yet"); } else { if (wolfSSL_Rehandshake(ssl) != SSL_SUCCESS) { - char buffer[WOLFSSL_MAX_ERROR_SZ]; err = wolfSSL_get_error(ssl, 0); printf("err = %d, %s\n", err, wolfSSL_ERR_error_string(err, buffer)); @@ -1651,30 +1709,70 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) /* allow some time for exporting the session */ #ifdef WOLFSSL_SESSION_EXPORT_DEBUG - #ifdef USE_WINDOWS_API - Sleep(500); - #elif defined(WOLFSSL_TIRTOS) - Task_sleep(1); - #else - sleep(1); - #endif +#ifdef USE_WINDOWS_API + Sleep(500); +#elif defined(WOLFSSL_TIRTOS) + Task_sleep(1); +#else + sleep(1); +#endif #endif /* WOLFSSL_SESSION_EXPORT_DEBUG */ - if (wolfSSL_write(ssl, msg, msgSz) != msgSz) { + + do { + err = 0; /* reset error */ + ret = wolfSSL_write(ssl, msg, msgSz); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != msgSz) { + printf("SSL_write msg error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(ssl); wolfSSL_CTX_free(ctx); err_sys("SSL_write failed"); } - input = wolfSSL_read(ssl, reply, sizeof(reply)-1); - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(ssl, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("Server response: %s\n", reply); if (sendGET) { /* get html */ while (1) { - input = wolfSSL_read(ssl, reply, sizeof(reply)-1); - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(ssl, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("%s\n", reply); } else @@ -1682,13 +1780,13 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } } } - else if (input < 0) { - int readErr = wolfSSL_get_error(ssl, 0); - if (readErr != SSL_ERROR_WANT_READ) { - printf("wolfSSL_read error %d!\n", readErr); + if (ret < 0) { + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read reply error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(ssl); wolfSSL_CTX_free(ctx); - err_sys("wolfSSL_read failed"); + err_sys("SSL_read failed"); } } @@ -1799,18 +1897,37 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (nonBlocking) { wolfSSL_set_using_nonblock(sslResume, 1); tcp_set_nonblocking(&sockfd); - NonBlockingSSL_Connect(sslResume); + ret = NonBlockingSSL_Connect(sslResume); } - else if (wolfSSL_connect(sslResume) != SSL_SUCCESS) { - wolfSSL_free(sslResume); - wolfSSL_CTX_free(ctx); - err_sys("SSL resume failed"); + else { + do { + err = 0; /* reset error */ + ret = wolfSSL_connect(sslResume); + if (ret != SSL_SUCCESS) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, + WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); } #else timeout.tv_sec = DEFAULT_TIMEOUT_SEC; timeout.tv_usec = 0; - NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ + ret = NonBlockingSSL_Connect(ssl); /* will keep retrying on timeout */ #endif + if (ret != SSL_SUCCESS) { + printf("wolfSSL_connect resume error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); + wolfSSL_free(sslResume); + wolfSSL_CTX_free(ctx); + err_sys("wolfSSL_connect resume failed"); + } + showPeer(sslResume); if (wolfSSL_session_reused(sslResume)) @@ -1847,7 +1964,22 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) #endif #endif /* WOLFSSL_SESSION_EXPORT_DEBUG */ - if (wolfSSL_write(sslResume, resumeMsg, resumeSz) != resumeSz) { + do { + err = 0; /* reset error */ + ret = wolfSSL_write(sslResume, resumeMsg, resumeSz); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != resumeSz) { + printf("SSL_write resume error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(sslResume); wolfSSL_CTX_free(ctx); err_sys("SSL_write failed"); @@ -1855,26 +1987,50 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) if (nonBlocking) { /* give server a chance to bounce a message back to client */ - #ifdef USE_WINDOWS_API - Sleep(500); - #elif defined(WOLFSSL_TIRTOS) - Task_sleep(1); - #else - sleep(1); - #endif + #ifdef USE_WINDOWS_API + Sleep(500); + #elif defined(WOLFSSL_TIRTOS) + Task_sleep(1); + #else + sleep(1); + #endif } - input = wolfSSL_read(sslResume, reply, sizeof(reply)-1); - - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(sslResume, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("Server resume response: %s\n", reply); if (sendGET) { /* get html */ while (1) { - input = wolfSSL_read(sslResume, reply, sizeof(reply)-1); - if (input > 0) { - reply[input] = 0; + do { + err = 0; /* reset error */ + ret = wolfSSL_read(sslResume, reply, sizeof(reply)-1); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, + WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; printf("%s\n", reply); } else @@ -1882,18 +2038,30 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) } } } - else if (input < 0) { - int readErr = wolfSSL_get_error(sslResume, 0); - if (readErr != SSL_ERROR_WANT_READ) { - printf("wolfSSL_read error %d!\n", readErr); + if (ret < 0) { + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read resume error %d, %s\n", err, + wolfSSL_ERR_error_string(err, buffer)); wolfSSL_free(sslResume); wolfSSL_CTX_free(ctx); - err_sys("wolfSSL_read failed"); + err_sys("SSL_read failed"); } } /* try to send session break */ - wolfSSL_write(sslResume, msg, msgSz); + do { + err = 0; /* reset error */ + ret = wolfSSL_write(sslResume, msg, msgSz); + if (ret <= 0) { + err = wolfSSL_get_error(sslResume, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(sslResume, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); ret = wolfSSL_shutdown(sslResume); if (wc_shutdown && ret == SSL_SHUTDOWN_NOT_DONE) @@ -1912,11 +2080,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) wolfAsync_DevClose(&devId); #endif -#if defined(USE_WOLFSSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY) - if (trackMemory) - ShowMemoryTracker(); -#endif /* USE_WOLFSSL_MEMORY */ - /* There are use cases when these assignments are not read. To avoid * potential confusion those warnings have been handled here. */ @@ -1925,7 +2088,6 @@ THREAD_RETURN WOLFSSL_THREAD client_test(void* args) (void) verifyCert; (void) ourCert; (void) ourKey; - (void) trackMemory; #if !defined(WOLFSSL_TIRTOS) return 0; diff --git a/examples/echoclient/echoclient.c b/examples/echoclient/echoclient.c index fdceb7048..09f0286ec 100644 --- a/examples/echoclient/echoclient.c +++ b/examples/echoclient/echoclient.c @@ -23,7 +23,7 @@ #ifdef HAVE_CONFIG_H #include #endif - + #include /* let's use cyassl layer AND cyassl openssl layer */ @@ -35,7 +35,7 @@ #include #if !defined(WOLFSSL_MDK_ARM) - #include "cmsis_os.h" + #include "cmsis_os.h" #include "rl_net.h" #else #include "rtl.h" @@ -81,16 +81,17 @@ void echoclient_test(void* args) int argc = 0; char** argv = 0; word16 port = yasslPort; + char buffer[CYASSL_MAX_ERROR_SZ]; ((func_args*)args)->return_code = -1; /* error state */ - + #ifndef WOLFSSL_MDK_SHELL argc = ((func_args*)args)->argc; argv = ((func_args*)args)->argv; #endif if (argc >= 2) { - fin = fopen(argv[1], "r"); + fin = fopen(argv[1], "r"); inCreated = 1; } if (argc >= 3) { @@ -105,7 +106,7 @@ void echoclient_test(void* args) doDTLS = 1; #endif -#ifdef CYASSL_LEANPSK +#ifdef CYASSL_LEANPSK doPSK = 1; #endif @@ -173,15 +174,15 @@ void echoclient_test(void* args) #ifdef WOLFSSL_ASYNC_CRYPT ret = wolfAsync_DevOpen(&devId); - if (ret != 0) { - err_sys("Async device open failed"); + if (ret < 0) { + printf("Async device open failed\nRunning without async\n"); } wolfSSL_CTX_UseAsync(ctx, devId); #endif /* WOLFSSL_ASYNC_CRYPT */ ssl = SSL_new(ctx); tcp_connect(&sockfd, yasslIP, port, doDTLS, 0, ssl); - + SSL_set_fd(ssl, sockfd); #if defined(USE_WINDOWS_API) && defined(CYASSL_DTLS) && defined(NO_MAIN_DRIVER) /* let echoserver bind first, TODO: add Windows signal like pthreads does */ @@ -189,31 +190,46 @@ void echoclient_test(void* args) #endif do { -#ifdef WOLFSSL_ASYNC_CRYPT - if (err == WC_PENDING_E) { - ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } - } -#endif err = 0; /* Reset error */ ret = SSL_connect(ssl); if (ret != SSL_SUCCESS) { err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif } - } while (ret != SSL_SUCCESS && err == WC_PENDING_E); - + } while (err == WC_PENDING_E); if (ret != SSL_SUCCESS) { - char buffer[CYASSL_MAX_ERROR_SZ]; - printf("err = %d, %s\n", err, ERR_error_string(err, buffer)); + printf("SSL_connect error %d, %s\n", err, + ERR_error_string(err, buffer)); err_sys("SSL_connect failed"); } while (fgets(msg, sizeof(msg), fin) != 0) { - + sendSz = (int)XSTRLEN(msg); - if (SSL_write(ssl, msg, sendSz) != sendSz) + do { + err = 0; /* reset error */ + ret = SSL_write(ssl, msg, sendSz); + if (ret <= 0) { + err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret != sendSz) { + printf("SSL_write msg error %d, %s\n", err, + ERR_error_string(err, buffer)); err_sys("SSL_write failed"); + } if (strncmp(msg, "quit", 4) == 0) { fputs("sending server shutdown command: quit!\n", fout); @@ -225,29 +241,39 @@ void echoclient_test(void* args) break; } - #ifndef WOLFSSL_MDK_SHELL - while (sendSz) { - int got; - if ( (got = SSL_read(ssl, reply, sizeof(reply)-1)) > 0) { - reply[got] = 0; - fputs(reply, fout); - fflush(fout) ; - sendSz -= got; - } - else - break; - } - #else + #ifndef WOLFSSL_MDK_SHELL + while (sendSz) + #endif { - int got; - if ( (got = SSL_read(ssl, reply, sizeof(reply)-1)) > 0) { - reply[got] = 0; + do { + err = 0; /* reset error */ + ret = SSL_read(ssl, reply, sizeof(reply)-1); + if (ret <= 0) { + err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret > 0) { + reply[ret] = 0; fputs(reply, fout); fflush(fout) ; - sendSz -= got; + sendSz -= ret; + } + else { + printf("SSL_read msg error %d, %s\n", err, + ERR_error_string(err, buffer)); + err_sys("SSL_read failed"); + + #ifndef WOLFSSL_MDK_SHELL + break; + #endif } } - #endif } @@ -255,7 +281,19 @@ void echoclient_test(void* args) strncpy(msg, "break", 6); sendSz = (int)strlen(msg); /* try to tell server done */ - SSL_write(ssl, msg, sendSz); + do { + err = 0; /* reset error */ + ret = SSL_write(ssl, msg, sendSz); + if (ret <= 0) { + err = SSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); #else SSL_shutdown(ssl); #endif @@ -272,7 +310,7 @@ void echoclient_test(void* args) if (outCreated) fclose(fout); CloseSocket(sockfd); - ((func_args*)args)->return_code = 0; + ((func_args*)args)->return_code = 0; } @@ -311,7 +349,7 @@ void echoclient_test(void* args) return args.return_code; } - + #endif /* NO_MAIN_DRIVER */ diff --git a/examples/echoserver/echoserver.c b/examples/echoserver/echoserver.c index 1afd7d56c..efbab5276 100644 --- a/examples/echoserver/echoserver.c +++ b/examples/echoserver/echoserver.c @@ -36,8 +36,8 @@ #if !defined(WOLFSSL_MDK_ARM) #include "cmsis_os.h" - #include "rl_fs.h" - #include "rl_net.h" + #include "rl_fs.h" + #include "rl_net.h" #else #include "rtl.h" #include "wolfssl_MDK_ARM.h" @@ -91,6 +91,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) word16 port; int argc = ((func_args*)args)->argc; char** argv = ((func_args*)args)->argv; + char buffer[CYASSL_MAX_ERROR_SZ]; #ifdef ECHO_OUT FILE* fout = stdout; @@ -232,8 +233,8 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) #ifdef WOLFSSL_ASYNC_CRYPT ret = wolfAsync_DevOpen(&devId); - if (ret != 0) { - err_sys("Async device open failed"); + if (ret < 0) { + printf("Async device open failed\nRunning without async\n"); } wolfSSL_CTX_UseAsync(ctx, devId); #endif /* WOLFSSL_ASYNC_CRYPT */ @@ -283,23 +284,21 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) #endif do { -#ifdef WOLFSSL_ASYNC_CRYPT - if (err == WC_PENDING_E) { - ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); - if (ret < 0) { break; } else if (ret == 0) { continue; } - } -#endif err = 0; /* Reset error */ ret = CyaSSL_accept(ssl); if (ret != SSL_SUCCESS) { err = CyaSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif } - } while (ret != SSL_SUCCESS && err == WC_PENDING_E); - + } while (err == WC_PENDING_E); if (ret != SSL_SUCCESS) { - char buffer[CYASSL_MAX_ERROR_SZ]; - err = CyaSSL_get_error(ssl, 0); - printf("error = %d, %s\n", err, CyaSSL_ERR_error_string(err, buffer)); + printf("SSL_accept error = %d, %s\n", err, + CyaSSL_ERR_error_string(err, buffer)); printf("SSL_accept failed\n"); CyaSSL_free(ssl); CloseSocket(clientfd); @@ -321,7 +320,29 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) write_ssl = ssl; #endif - while ( (echoSz = CyaSSL_read(ssl, command, sizeof(command)-1)) > 0) { + while (1) { + do { + err = 0; /* reset error */ + ret = CyaSSL_read(ssl, command, sizeof(command)-1); + if (ret <= 0) { + err = CyaSSL_get_error(ssl, 0); + #ifdef WOLFSSL_ASYNC_CRYPT + if (err == WC_PENDING_E) { + ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW); + if (ret < 0) break; + } + #endif + } + } while (err == WC_PENDING_E); + if (ret <= 0) { + if (err != SSL_ERROR_WANT_READ) { + printf("SSL_read echo error %d, %s!\n", err, + CyaSSL_ERR_error_string(err, buffer)); + } + break; + } + + echoSz = ret; if (firstRead == 1) { firstRead = 0; /* browser may send 1 byte 'G' to start */ @@ -334,7 +355,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) strncpy(command, "GET", 4); /* fall through to normal GET */ } - + if ( strncmp(command, "quit", 4) == 0) { printf("client sent quit command: shutting down!\n"); shutDown = 1; @@ -356,7 +377,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args) char header[] = "\n
\n";
                 char body[]   = "greetings from wolfSSL\n";
                 char footer[] = "\r\n\r\n";
-            
+
                 strncpy(command, type, sizeof(type));
                 echoSz = sizeof(type) - 1;
 
@@ -367,18 +388,51 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args)
                 strncpy(&command[echoSz], footer, sizeof(footer));
                 echoSz += (int)sizeof(footer);
 
-                if (CyaSSL_write(write_ssl, command, echoSz) != echoSz)
-                    err_sys("SSL_write failed");
+                do {
+                    err = 0; /* reset error */
+                    ret = CyaSSL_write(write_ssl, command, echoSz);
+                    if (ret <= 0) {
+                        err = CyaSSL_get_error(ssl, 0);
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (err == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPoll(write_ssl, WOLF_POLL_FLAG_CHECK_HW);
+                            if (ret < 0) break;
+                        }
+                    #endif
+                    }
+                } while (err == WC_PENDING_E);
+                if (ret != echoSz) {
+                    printf("SSL_write get error = %d, %s\n", err,
+                        CyaSSL_ERR_error_string(err, buffer));
+                    err_sys("SSL_write get failed");
+                }
                 break;
             }
             command[echoSz] = 0;
 
-            #ifdef ECHO_OUT
-                fputs(command, fout);
-            #endif
+        #ifdef ECHO_OUT
+            fputs(command, fout);
+        #endif
 
-            if (CyaSSL_write(write_ssl, command, echoSz) != echoSz)
-                err_sys("SSL_write failed");
+            do {
+                err = 0; /* reset error */
+                ret = CyaSSL_write(write_ssl, command, echoSz);
+                if (ret <= 0) {
+                    err = CyaSSL_get_error(write_ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(write_ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+
+            if (ret != echoSz) {
+                printf("SSL_write echo error = %d, %s\n", err,
+                        CyaSSL_ERR_error_string(err, buffer));
+                err_sys("SSL_write echo failed");
+            }
         }
 #ifndef CYASSL_DTLS
         CyaSSL_shutdown(ssl);
@@ -461,7 +515,7 @@ THREAD_RETURN CYASSL_THREAD echoserver_test(void* args)
         return args.return_code;
     }
 
-        
+
 #endif /* NO_MAIN_DRIVER */
 
 
diff --git a/examples/server/server.c b/examples/server/server.c
index dece767bf..229236b63 100644
--- a/examples/server/server.c
+++ b/examples/server/server.c
@@ -30,11 +30,6 @@
     #include    /* ecc_fp_free */
 #endif
 
-#if !defined(WOLFSSL_TRACK_MEMORY) && !defined(NO_MAIN_DRIVER)
-    /* in case memory tracker wants stats */
-    #define WOLFSSL_TRACK_MEMORY
-#endif
-
 #if defined(WOLFSSL_MDK_ARM) || defined(WOLFSSL_KEIL_TCP_NET)
         #include 
         #include 
@@ -97,25 +92,35 @@ static int NonBlockingSSL_Accept(SSL* ssl)
 #endif
     int error = SSL_get_error(ssl, 0);
     SOCKET_T sockfd = (SOCKET_T)CyaSSL_get_fd(ssl);
-    int select_ret;
+    int select_ret = 0;
 
     while (ret != SSL_SUCCESS && (error == SSL_ERROR_WANT_READ ||
-                                  error == SSL_ERROR_WANT_WRITE)) {
+                                  error == SSL_ERROR_WANT_WRITE ||
+                                  error == WC_PENDING_E)) {
         int currTimeout = 1;
 
         if (error == SSL_ERROR_WANT_READ) {
             /* printf("... server would read block\n"); */
-        } else {
+        }
+        else if (error == SSL_ERROR_WANT_WRITE) {
             /* printf("... server would write block\n"); */
         }
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        else if (error == WC_PENDING_E) {
+            ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+            if (ret < 0) break;
+        }
+    #endif
 
-#ifdef CYASSL_DTLS
-        currTimeout = CyaSSL_dtls_get_current_timeout(ssl);
-#endif
-        select_ret = tcp_select(sockfd, currTimeout);
+        if (error != WC_PENDING_E) {
+        #ifdef CYASSL_DTLS
+            currTimeout = CyaSSL_dtls_get_current_timeout(ssl);
+        #endif
+            select_ret = tcp_select(sockfd, currTimeout);
+        }
 
         if ((select_ret == TEST_RECV_READY) ||
-                                        (select_ret == TEST_ERROR_READY)) {
+            (select_ret == TEST_ERROR_READY) || error == WC_PENDING_E) {
             #ifndef CYASSL_CALLBACKS
                 ret = SSL_accept(ssl);
             #else
@@ -127,12 +132,12 @@ static int NonBlockingSSL_Accept(SSL* ssl)
         else if (select_ret == TEST_TIMEOUT && !CyaSSL_dtls(ssl)) {
             error = SSL_ERROR_WANT_READ;
         }
-#ifdef CYASSL_DTLS
+    #ifdef CYASSL_DTLS
         else if (select_ret == TEST_TIMEOUT && CyaSSL_dtls(ssl) &&
                                             CyaSSL_dtls_got_timeout(ssl) >= 0) {
             error = SSL_ERROR_WANT_READ;
         }
-#endif
+    #endif
         else {
             error = SSL_FATAL_ERROR;
         }
@@ -144,60 +149,92 @@ static int NonBlockingSSL_Accept(SSL* ssl)
 /* Echo number of bytes specified by -e arg */
 int ServerEchoData(SSL* ssl, int clientfd, int echoData, int throughput)
 {
-    int ret = 0;
-    char* buffer = (char*)malloc(TEST_BUFFER_SIZE);
-    if(buffer) {
-        double start = 0, rx_time = 0, tx_time = 0;
-        int xfer_bytes = 0;
-        while((echoData && throughput == 0) || (!echoData && xfer_bytes < throughput)) {
-            int select_ret = tcp_select(clientfd, 1); /* Timeout=1 second */
-            if (select_ret == TEST_RECV_READY) {
-                int len = min(TEST_BUFFER_SIZE, throughput - xfer_bytes);
-                int rx_pos = 0;
-                if(throughput) {
-                    start = current_time(1);
-                }
-                while(rx_pos < len) {
-                    ret = SSL_read(ssl, &buffer[rx_pos], len - rx_pos);
-                    if (ret <= 0) {
-                        int readErr = SSL_get_error(ssl, 0);
-                        if (readErr != SSL_ERROR_WANT_READ) {
-                            printf("SSL_read error %d!\n", readErr);
-                            err_sys("SSL_read failed");
-                        }
-                    }
-                    else {
-                        rx_pos += ret;
-                    }
-                }
-                if(throughput) {
-                    rx_time += current_time(0) - start;
-                    start = current_time(1);
-                }
-                if (SSL_write(ssl, buffer, len) != len) {
-                    err_sys("SSL_write failed");
-                }
-                if(throughput) {
-                    tx_time += current_time(0) - start;
-                }
+    int ret = 0, err;
+    double start = 0, rx_time = 0, tx_time = 0;
+    int xfer_bytes = 0, select_ret, len, rx_pos;
+    char* buffer;
 
-                xfer_bytes += len;
+    buffer = (char*)malloc(TEST_BUFFER_SIZE);
+    if (!buffer) {
+        err_sys("Server buffer malloc failed");
+    }
+
+    while ((echoData && throughput == 0) ||
+          (!echoData && xfer_bytes < throughput))
+    {
+        select_ret = tcp_select(clientfd, 1); /* Timeout=1 second */
+        if (select_ret == TEST_RECV_READY) {
+
+            len = min(TEST_BUFFER_SIZE, throughput - xfer_bytes);
+            rx_pos = 0;
+
+            if (throughput) {
+                start = current_time(1);
             }
-        }
-        free(buffer);
 
-        if(throughput) {
-            printf("wolfSSL Server Benchmark %d bytes\n"
-                "\tRX      %8.3f ms (%8.3f MBps)\n"
-                "\tTX      %8.3f ms (%8.3f MBps)\n",
-                throughput,
-                tx_time * 1000, throughput / tx_time / 1024 / 1024,
-                rx_time * 1000, throughput / rx_time / 1024 / 1024
-            );
+            /* Read data */
+            while (rx_pos < len) {
+                ret = SSL_read(ssl, &buffer[rx_pos], len - rx_pos);
+                if (ret <= 0) {
+                    err = SSL_get_error(ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                    else
+                #endif
+                    if (err != SSL_ERROR_WANT_READ) {
+                        printf("SSL_read echo error %d\n", err);
+                        err_sys("SSL_read failed");
+                    }
+                }
+                else {
+                    rx_pos += ret;
+                }
+            }
+            if (throughput) {
+                rx_time += current_time(0) - start;
+                start = current_time(1);
+            }
+
+            /* Write data */
+            do {
+                err = 0; /* reset error */
+                ret = SSL_write(ssl, buffer, len);
+                if (ret <= 0) {
+                    err = SSL_get_error(ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+            if (ret != len) {
+                printf("SSL_write echo error %d\n", err);
+                err_sys("SSL_write failed");
+            }
+
+            if (throughput) {
+                tx_time += current_time(0) - start;
+            }
+
+            xfer_bytes += len;
         }
     }
-    else {
-        err_sys("Server buffer malloc failed");
+
+    free(buffer);
+
+    if (throughput) {
+        printf("wolfSSL Server Benchmark %d bytes\n"
+            "\tRX      %8.3f ms (%8.3f MBps)\n"
+            "\tTX      %8.3f ms (%8.3f MBps)\n",
+            throughput,
+            tx_time * 1000, throughput / tx_time / 1024 / 1024,
+            rx_time * 1000, throughput / rx_time / 1024 / 1024
+        );
     }
 
     return EXIT_SUCCESS;
@@ -300,7 +337,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     int    needDH = 0;
     int    useNtruKey   = 0;
     int    nonBlocking  = 0;
-    int    trackMemory  = 0;
     int    fewerPackets = 0;
     int    pkCallbacks  = 0;
     int    wc_shutdown     = 0;
@@ -349,6 +385,7 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
 #ifdef HAVE_WNR
     const char* wnrConfigFile = wnrConfig;
 #endif
+    char buffer[CYASSL_MAX_ERROR_SZ];
 
 #ifdef WOLFSSL_STATIC_MEMORY
     #if (defined(HAVE_ECC) && !defined(ALT_ECC_SIZE)) \
@@ -392,9 +429,9 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
 #ifdef WOLFSSL_VXWORKS
     useAnyAddr = 1;
 #else
-    /* Not Used: h, m, x, y, z, F, J, K, M, Q, T, U, V, W, X, Y */
+    /* Not Used: h, m, t, x, y, z, F, J, K, M, Q, T, U, V, W, X, Y */
     while ((ch = mygetopt(argc, argv, "?"
-                "abc:defgijk:l:nop:q:rstuv:w"
+                "abc:defgijk:l:nop:q:rsuv:w"
                 "A:B:C:D:E:GHIL:NO:PR:S:YZ:")) != -1) {
         switch (ch) {
             case '?' :
@@ -417,12 +454,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
                 usePskPlus = 1;
                 break;
 
-            case 't' :
-            #ifdef USE_WOLFSSL_MEMORY
-                trackMemory = 1;
-            #endif
-                break;
-
             case 'n' :
                 useNtruKey = 1;
                 break;
@@ -633,11 +664,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
         }
     }
 
-#if defined(USE_CYASSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
-    if (trackMemory)
-        InitMemoryTracker();
-#endif
-
 #ifdef HAVE_WNR
     if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0)
         err_sys("can't load whitewood net random config file");
@@ -882,25 +908,26 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevOpen(&devId);
-    if (ret != 0) {
-        err_sys("Async device open failed");
+    if (ret < 0) {
+        printf("Async device open failed\nRunning without async\n");
     }
     wolfSSL_CTX_UseAsync(ctx, devId);
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
     while (1) {
         /* allow resume option */
-        if(resumeCount > 1) {
+        if (resumeCount > 1) {
             if (dtlsUDP == 0) {
                 SOCKADDR_IN_T client;
                 socklen_t client_len = sizeof(client);
                 clientfd = accept(sockfd, (struct sockaddr*)&client,
                                  (ACCEPT_THIRD_T)&client_len);
-            } else {
+            }
+            else {
                 tcp_listen(&sockfd, &port, useAnyAddr, dtlsUDP, dtlsSCTP);
                 clientfd = sockfd;
             }
-            if(WOLFSSL_SOCKET_IS_INVALID(clientfd)) {
+            if (WOLFSSL_SOCKET_IS_INVALID(clientfd)) {
                 err_sys("tcp accept failed");
             }
         }
@@ -1029,34 +1056,32 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
         }
 #endif
 
-        do {
-#ifdef WOLFSSL_ASYNC_CRYPT
-            if (err == WC_PENDING_E) {
-                ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
-                if (ret < 0) { break; } else if (ret == 0) { continue; }
-            }
-#endif
-
-            err = 0; /* Reset error */
 #ifndef CYASSL_CALLBACKS
-            if (nonBlocking) {
-                ret = NonBlockingSSL_Accept(ssl);
-            }
-            else {
-                ret = SSL_accept(ssl);
-            }
-#else
+        if (nonBlocking) {
             ret = NonBlockingSSL_Accept(ssl);
+        }
+        else {
+            do {
+                err = 0; /* reset error */
+                ret = SSL_accept(ssl);
+                if (ret != SSL_SUCCESS) {
+                    err = SSL_get_error(ssl, 0);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+        }
+#else
+        ret = NonBlockingSSL_Accept(ssl);
 #endif
-            if (ret != SSL_SUCCESS) {
-                err = SSL_get_error(ssl, 0);
-            }
-        } while (ret != SSL_SUCCESS && err == WC_PENDING_E);
-
         if (ret != SSL_SUCCESS) {
-            char buffer[CYASSL_MAX_ERROR_SZ];
             err = SSL_get_error(ssl, 0);
-            printf("error = %d, %s\n", err, ERR_error_string(err, buffer));
+            printf("SSL_accept error %d, %s\n", err,
+                                                ERR_error_string(err, buffer));
             err_sys("SSL_accept failed");
         }
 
@@ -1119,27 +1144,63 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
             free(list);
         }
 #endif
-        if(echoData == 0 && throughput == 0) {
-            ret = SSL_read(ssl, input, sizeof(input)-1);
+        if (echoData == 0 && throughput == 0) {
+            const char* write_msg;
+            int write_msg_sz;
+
+            /* Read data */
+            do {
+                err = 0; /* reset error */
+                ret = SSL_read(ssl, input, sizeof(input)-1);
+                if (ret <= 0) {
+                    err = SSL_get_error(ssl, 0);
+
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                    else
+                #endif
+                    if (err != SSL_ERROR_WANT_READ) {
+                        printf("SSL_read input error %d, %s\n", err,
+                                                ERR_error_string(err, buffer));
+                        err_sys("SSL_read failed");
+                    }
+                }
+            } while (err == WC_PENDING_E);
             if (ret > 0) {
-                input[ret] = 0;
+                input[ret] = 0; /* null terminate message */
                 printf("Client message: %s\n", input);
-
-            }
-            else if (ret < 0) {
-                int readErr = SSL_get_error(ssl, 0);
-                if (readErr != SSL_ERROR_WANT_READ)
-                    err_sys("SSL_read failed");
             }
 
+            /* Write data */
             if (!useWebServerMsg) {
-                if (SSL_write(ssl, msg, sizeof(msg)) != sizeof(msg))
-                    err_sys("SSL_write failed");
+                write_msg = msg;
+                write_msg_sz = sizeof(msg);
             }
             else {
-                if (SSL_write(ssl, webServerMsg, sizeof(webServerMsg))
-                                                        != sizeof(webServerMsg))
-                    err_sys("SSL_write failed");
+                write_msg = webServerMsg;
+                write_msg_sz = sizeof(webServerMsg);
+            }
+            do {
+                err = 0; /* reset error */
+                ret = SSL_write(ssl, write_msg, write_msg_sz);
+                if (ret <= 0) {
+                    err = SSL_get_error(ssl, 0);
+
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (err == WC_PENDING_E) {
+                        ret = wolfSSL_AsyncPoll(ssl, WOLF_POLL_FLAG_CHECK_HW);
+                        if (ret < 0) break;
+                    }
+                #endif
+                }
+            } while (err == WC_PENDING_E);
+            if (ret != write_msg_sz) {
+                printf("SSL_write msg error %d, %s\n", err,
+                                                ERR_error_string(err, buffer));
+                err_sys("SSL_write failed");
             }
         }
         else {
@@ -1199,11 +1260,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     ecc_fp_free();  /* free per thread cache */
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
-    if (trackMemory)
-        ShowMemoryTracker();
-#endif
-
 #ifdef CYASSL_TIRTOS
     fdCloseSession(Task_self());
 #endif
@@ -1226,7 +1282,6 @@ THREAD_RETURN CYASSL_THREAD server_test(void* args)
     (void) useNtruKey;
     (void) ourDhParam;
     (void) ourCert;
-    (void) trackMemory;
 #ifndef CYASSL_TIRTOS
     return 0;
 #endif
diff --git a/mcapi/crypto.h b/mcapi/crypto.h
index 6db1dd211..4d71a3f26 100644
--- a/mcapi/crypto.h
+++ b/mcapi/crypto.h
@@ -34,7 +34,7 @@
 
 /* MD5 */
 typedef struct CRYPT_MD5_CTX {
-    int holder[24];   /* big enough to hold internal, but check on init */
+    int holder[28];   /* big enough to hold internal, but check on init */
 } CRYPT_MD5_CTX;
 
 int CRYPT_MD5_Initialize(CRYPT_MD5_CTX*);
@@ -42,13 +42,13 @@ int CRYPT_MD5_DataAdd(CRYPT_MD5_CTX*, const unsigned char*, unsigned int);
 int CRYPT_MD5_Finalize(CRYPT_MD5_CTX*, unsigned char*);
 
 enum {
-    CRYPT_MD5_DIGEST_SIZE = 16 
+    CRYPT_MD5_DIGEST_SIZE = 16
 };
 
 
 /* SHA */
 typedef struct CRYPT_SHA_CTX {
-    int holder[24];   /* big enough to hold internal, but check on init */
+    int holder[28];   /* big enough to hold internal, but check on init */
 } CRYPT_SHA_CTX;
 
 int CRYPT_SHA_Initialize(CRYPT_SHA_CTX*);
@@ -62,7 +62,7 @@ enum {
 
 /* SHA-256 */
 typedef struct CRYPT_SHA256_CTX {
-    int holder[28];   /* big enough to hold internal, but check on init */
+    int holder[32];   /* big enough to hold internal, but check on init */
 } CRYPT_SHA256_CTX;
 
 int CRYPT_SHA256_Initialize(CRYPT_SHA256_CTX*);
@@ -70,13 +70,13 @@ int CRYPT_SHA256_DataAdd(CRYPT_SHA256_CTX*, const unsigned char*, unsigned int);
 int CRYPT_SHA256_Finalize(CRYPT_SHA256_CTX*, unsigned char*);
 
 enum {
-    CRYPT_SHA256_DIGEST_SIZE = 32 
+    CRYPT_SHA256_DIGEST_SIZE = 32
 };
 
 
 /* SHA-384 */
 typedef struct CRYPT_SHA384_CTX {
-    long long holder[32];   /* big enough to hold internal, but check on init */
+    long long holder[36];   /* big enough to hold internal, but check on init */
 } CRYPT_SHA384_CTX;
 
 int CRYPT_SHA384_Initialize(CRYPT_SHA384_CTX*);
@@ -98,13 +98,13 @@ int CRYPT_SHA512_DataAdd(CRYPT_SHA512_CTX*, const unsigned char*, unsigned int);
 int CRYPT_SHA512_Finalize(CRYPT_SHA512_CTX*, unsigned char*);
 
 enum {
-    CRYPT_SHA512_DIGEST_SIZE = 64 
+    CRYPT_SHA512_DIGEST_SIZE = 64
 };
 
 
 /* HMAC */
 typedef struct CRYPT_HMAC_CTX {
-    long long holder[69];   /* big enough to hold internal, but check on init */
+    long long holder[72];   /* big enough to hold internal, but check on init */
 } CRYPT_HMAC_CTX;
 
 int CRYPT_HMAC_SetKey(CRYPT_HMAC_CTX*, int, const unsigned char*, unsigned int);
@@ -113,10 +113,10 @@ int CRYPT_HMAC_Finalize(CRYPT_HMAC_CTX*, unsigned char*);
 
 /* HMAC types */
 enum {
-    CRYPT_HMAC_SHA    = 1, 
-    CRYPT_HMAC_SHA256 = 2, 
-    CRYPT_HMAC_SHA384 = 5, 
-    CRYPT_HMAC_SHA512 = 4 
+    CRYPT_HMAC_SHA    = 1,
+    CRYPT_HMAC_SHA256 = 2,
+    CRYPT_HMAC_SHA384 = 5,
+    CRYPT_HMAC_SHA512 = 4
 };
 
 
@@ -128,7 +128,7 @@ int CRYPT_HUFFMAN_DeCompress(unsigned char*, unsigned int, const unsigned char*,
 
 /* flag to use static huffman */
 enum {
-    CRYPT_HUFFMAN_COMPRESS_STATIC = 1 
+    CRYPT_HUFFMAN_COMPRESS_STATIC = 1
 };
 
 
@@ -144,7 +144,7 @@ int CRYPT_RNG_BlockGenerate(CRYPT_RNG_CTX*, unsigned char*, unsigned int);
 
 /* TDES */
 typedef struct CRYPT_TDES_CTX {
-    int holder[100];   /* big enough to hold internal, but check on init */
+    int holder[104];   /* big enough to hold internal, but check on init */
 } CRYPT_TDES_CTX;
 
 int CRYPT_TDES_KeySet(CRYPT_TDES_CTX*, const unsigned char*,
@@ -158,13 +158,13 @@ int CRYPT_TDES_CBC_Decrypt(CRYPT_TDES_CTX*, unsigned char*,
 /* key direction flags for setup */
 enum {
     CRYPT_TDES_ENCRYPTION = 0,
-    CRYPT_TDES_DECRYPTION = 1 
+    CRYPT_TDES_DECRYPTION = 1
 };
 
 
 /* AES */
 typedef struct CRYPT_AES_CTX {
-    int holder[76];   /* big enough to hold internal, but check on init */
+    int holder[78];   /* big enough to hold internal, but check on init */
 } CRYPT_AES_CTX;
 
 /* key */
@@ -262,7 +262,7 @@ int CRYPT_ERROR_StringGet(int, char*);
 
 
 #ifdef __cplusplus
-    }  /* extern "C" */ 
+    }  /* extern "C" */
 #endif
 
 
diff --git a/src/internal.c b/src/internal.c
old mode 100644
new mode 100755
index 80a97d374..3435b8776
--- a/src/internal.c
+++ b/src/internal.c
@@ -120,15 +120,44 @@ WOLFSSL_CALLBACKS needs LARGE_STATIC_BUFFERS, please add LARGE_STATIC_BUFFERS
 #endif
 
 
-typedef enum {
+enum processReply {
     doProcessInit = 0,
 #ifndef NO_WOLFSSL_SERVER
     runProcessOldClientHello,
 #endif
     getRecordLayerHeader,
     getData,
+    decryptMessage,
+    verifyMessage,
     runProcessingOneMessage
-} processReply;
+};
+
+/* sub-states for build message */
+enum buildMsgState {
+    BUILD_MSG_BEGIN = 0,
+    BUILD_MSG_SIZE,
+    BUILD_MSG_HASH,
+    BUILD_MSG_VERIFY_MAC,
+    BUILD_MSG_ENCRYPT,
+};
+
+/* sub-states for cipher operations */
+enum cipherState {
+    CIPHER_STATE_BEGIN = 0,
+    CIPHER_STATE_DO,
+    CIPHER_STATE_END,
+};
+
+/* sub-states for send/do key share (key exchange) */
+enum keyShareState {
+    KEYSHARE_BEGIN = 0,
+    KEYSHARE_BUILD,
+    KEYSHARE_DO,
+    KEYSHARE_VERIFY,
+    KEYSHARE_FINALIZE,
+    KEYSHARE_END
+};
+
 
 #ifndef NO_OLD_TLS
 static int SSL_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
@@ -1437,8 +1466,8 @@ void SSL_CtxResourceFree(WOLFSSL_CTX* ctx)
         XFREE(ctx->suites, ctx->heap, DYNAMIC_TYPE_SUITES);
 
 #ifndef NO_DH
-    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH);
-    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH);
+    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
 #endif /* !NO_DH */
 
 #ifdef SINGLE_THREADED
@@ -1601,31 +1630,25 @@ void FreeCiphers(WOLFSSL* ssl)
 {
     (void)ssl;
 #ifdef BUILD_ARC4
-    #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ssl->devId != INVALID_DEVID) {
-        wc_Arc4AsyncFree(ssl->encrypt.arc4);
-        wc_Arc4AsyncFree(ssl->decrypt.arc4);
-    }
-    #endif
+    wc_Arc4Free(ssl->encrypt.arc4);
+    wc_Arc4Free(ssl->decrypt.arc4);
     XFREE(ssl->encrypt.arc4, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.arc4, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
 #ifdef BUILD_DES3
-    #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ssl->devId != INVALID_DEVID) {
-        wc_Des3AsyncFree(ssl->encrypt.des3);
-        wc_Des3AsyncFree(ssl->decrypt.des3);
-    }
-    #endif
+    wc_Des3Free(ssl->encrypt.des3);
+    wc_Des3Free(ssl->decrypt.des3);
     XFREE(ssl->encrypt.des3, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.des3, ssl->heap, DYNAMIC_TYPE_CIPHER);
 #endif
 #ifdef BUILD_AES
-    #ifdef WOLFSSL_ASYNC_CRYPT
-    if (ssl->devId != INVALID_DEVID) {
-        wc_AesAsyncFree(ssl->encrypt.aes);
-        wc_AesAsyncFree(ssl->decrypt.aes);
-    }
+    wc_AesFree(ssl->encrypt.aes);
+    wc_AesFree(ssl->decrypt.aes);
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        XFREE(ssl->decrypt.additional, ssl->heap, DYNAMIC_TYPE_AES);
+        XFREE(ssl->decrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES);
+        XFREE(ssl->encrypt.additional, ssl->heap, DYNAMIC_TYPE_AES);
+        XFREE(ssl->encrypt.nonce, ssl->heap, DYNAMIC_TYPE_AES);
     #endif
     XFREE(ssl->encrypt.aes, ssl->heap, DYNAMIC_TYPE_CIPHER);
     XFREE(ssl->decrypt.aes, ssl->heap, DYNAMIC_TYPE_CIPHER);
@@ -2697,11 +2720,7 @@ int RsaSign(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2741,11 +2760,7 @@ int RsaVerify(WOLFSSL* ssl, byte* in, word32 inSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2789,11 +2804,7 @@ int VerifyRsaSign(WOLFSSL* ssl, byte* verifySig, word32 sigSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2833,11 +2844,7 @@ int RsaDec(WOLFSSL* ssl, byte* in, word32 inSz, byte** out, word32* outSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2878,11 +2885,7 @@ int RsaEnc(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out, word32* outSz,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret =  WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2927,11 +2930,7 @@ int EccSign(WOLFSSL* ssl, const byte* in, word32 inSz, byte* out,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -2944,7 +2943,7 @@ int EccVerify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* out,
     word32 outSz, ecc_key* key, byte* keyBuf, word32 keySz,
     void* ctx)
 {
-    int ret, verify;
+    int ret;
 
     (void)ssl;
     (void)keyBuf;
@@ -2956,27 +2955,23 @@ int EccVerify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* out,
 #ifdef HAVE_PK_CALLBACKS
     if (ssl->ctx->EccVerifyCb) {
         ret = ssl->ctx->EccVerifyCb(ssl, in, inSz, out, outSz, keyBuf, keySz,
-            &verify, ctx);
+            &ssl->eccVerifyRes, ctx);
     }
     else
 #endif /* HAVE_PK_CALLBACKS  */
     {
-        ret = wc_ecc_verify_hash(in, inSz, out, outSz, &verify, key);
+        ret = wc_ecc_verify_hash(in, inSz, out, outSz, &ssl->eccVerifyRes, key);
     }
 
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret =  WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     }
     else
 #endif /* WOLFSSL_ASYNC_CRYPT */
     {
-        ret = (ret != 0 || verify == 0) ? VERIFY_SIGN_ERROR : 0;
+        ret = (ret != 0 || ssl->eccVerifyRes == 0) ? VERIFY_SIGN_ERROR : 0;
     }
 
     WOLFSSL_LEAVE("EccVerify", ret);
@@ -3016,10 +3011,10 @@ int EccVerify(WOLFSSL* ssl, const byte* in, word32 inSz, const byte* out,
         }
         else if (ssl->options.side == WOLFSSL_SERVER_END) {
             if (ssl->specs.static_ecdh) {
-                if (ssl->sigKey == NULL) {
+                if (ssl->hsKey == NULL) {
                     return NO_PRIVATE_KEY;
                 }
-                tmpKey = (struct ecc_key*)ssl->sigKey;
+                tmpKey = (struct ecc_key*)ssl->hsKey;
             }
             else {
                 if (!ssl->eccTempKeyPresent) {
@@ -3071,11 +3066,8 @@ int EccSharedSecret(WOLFSSL* ssl, ecc_key* priv_key, ecc_key* pub_key,
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &priv_key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &priv_key->asyncDev,
+                                                    WC_ASYNC_FLAG_CALL_AGAIN);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -3111,11 +3103,7 @@ int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer)
     /* Handle async pending response */
 #if defined(WOLFSSL_ASYNC_CRYPT)
     if (ret == WC_PENDING_E) {
-        ret = wolfAsync_EventInit(&ssl->event,
-            WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret = WC_PENDING_E;
-        }
+        ret = wolfSSL_AsyncPush(ssl, &key->asyncDev, WC_ASYNC_FLAG_NONE);
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -3131,50 +3119,49 @@ int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer)
 #if !defined(NO_CERTS) || !defined(NO_PSK)
 #if !defined(NO_DH)
 
-int DhGenKeyPair(WOLFSSL* ssl,
-    byte* p, word32 pSz,
-    byte* g, word32 gSz,
+int DhGenKeyPair(WOLFSSL* ssl, DhKey* dhKey,
     byte* priv, word32* privSz,
     byte* pub, word32* pubSz)
 {
     int ret;
-    DhKey dhKey;
 
-    ret = wc_InitDhKey(&dhKey);
-    if (ret == 0) {
-        ret = wc_DhSetKey(&dhKey, p, pSz, g, gSz);
-        if (ret == 0) {
-            ret = wc_DhGenerateKeyPair(&dhKey, ssl->rng, priv, privSz, pub, pubSz);
-        }
-        wc_FreeDhKey(&dhKey);
+    WOLFSSL_ENTER("DhGenKeyPair");
+
+    ret = wc_DhGenerateKeyPair(dhKey, ssl->rng, priv, privSz, pub, pubSz);
+
+    /* Handle async pending response */
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &dhKey->asyncDev, WC_ASYNC_FLAG_NONE);
     }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("DhGenKeyPair", ret);
 
     return ret;
 }
 
-int DhAgree(WOLFSSL* ssl,
-    byte* p, word32 pSz,
-    byte* g, word32 gSz,
-    byte* priv, word32* privSz,
-    byte* pub, word32* pubSz,
+int DhAgree(WOLFSSL* ssl, DhKey* dhKey,
+    const byte* priv, word32 privSz,
     const byte* otherPub, word32 otherPubSz,
     byte* agree, word32* agreeSz)
 {
     int ret;
-    DhKey dhKey;
 
-    ret = wc_InitDhKey(&dhKey);
-    if (ret == 0) {
-        ret = wc_DhSetKey(&dhKey, p, pSz, g, gSz);
-        if (ret == 0 && pub) {
-            /* for DH, encSecret is Yc, agree is pre-master */
-            ret = wc_DhGenerateKeyPair(&dhKey, ssl->rng, priv, privSz, pub, pubSz);
-        }
-        if (ret == 0) {
-            ret = wc_DhAgree(&dhKey, agree, agreeSz, priv, *privSz, otherPub, otherPubSz);
-        }
-        wc_FreeDhKey(&dhKey);
+    (void)ssl;
+
+    WOLFSSL_ENTER("DhAgree");
+
+    ret = wc_DhAgree(dhKey, agree, agreeSz, priv, privSz, otherPub, otherPubSz);
+
+    /* Handle async pending response */
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    if (ret == WC_PENDING_E) {
+        ret = wolfSSL_AsyncPush(ssl, &dhKey->asyncDev, WC_ASYNC_FLAG_NONE);
     }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    WOLFSSL_LEAVE("DhAgree", ret);
 
     return ret;
 }
@@ -3368,6 +3355,75 @@ int SetSSL_CTX(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
     return SSL_SUCCESS;
 }
 
+static int InitHashes(WOLFSSL* ssl)
+{
+    int ret;
+
+    ssl->hsHashes = (HS_Hashes*)XMALLOC(sizeof(HS_Hashes), ssl->heap,
+                                                           DYNAMIC_TYPE_HASHES);
+    if (ssl->hsHashes == NULL) {
+        WOLFSSL_MSG("HS_Hashes Memory error");
+        return MEMORY_E;
+    }
+    XMEMSET(ssl->hsHashes, 0, sizeof(HS_Hashes));
+
+#ifndef NO_OLD_TLS
+#ifndef NO_MD5
+    ret = wc_InitMd5_ex(&ssl->hsHashes->hashMd5, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#ifndef NO_SHA
+    ret = wc_InitSha_ex(&ssl->hsHashes->hashSha, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#endif /* !NO_OLD_TLS */
+#ifndef NO_SHA256
+    ret = wc_InitSha256_ex(&ssl->hsHashes->hashSha256, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#ifdef WOLFSSL_SHA384
+    ret = wc_InitSha384_ex(&ssl->hsHashes->hashSha384, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+#ifdef WOLFSSL_SHA512
+    ret = wc_InitSha512_ex(&ssl->hsHashes->hashSha512, ssl->heap, ssl->devId);
+    if (ret != 0)
+        return ret;
+#endif
+
+    return ret;
+}
+
+static void FreeHashes(WOLFSSL* ssl)
+{
+    if (ssl->hsHashes) {
+#ifndef NO_OLD_TLS
+    #ifndef NO_MD5
+        wc_Md5Free(&ssl->hsHashes->hashMd5);
+    #endif
+    #ifndef NO_SHA
+        wc_ShaFree(&ssl->hsHashes->hashSha);
+    #endif
+#endif /* !NO_OLD_TLS */
+    #ifndef NO_SHA256
+        wc_Sha256Free(&ssl->hsHashes->hashSha256);
+    #endif
+    #ifdef WOLFSSL_SHA384
+        wc_Sha384Free(&ssl->hsHashes->hashSha384);
+    #endif
+    #ifdef WOLFSSL_SHA512
+        wc_Sha512Free(&ssl->hsHashes->hashSha512);
+    #endif
+
+        XFREE(ssl->hsHashes, ssl->heap, DYNAMIC_TYPE_HASHES);
+        ssl->hsHashes = NULL;
+    }
+}
+
 
 /* init everything to 0, NULL, default values before calling anything that may
    fail so that destructor has a "good" state to cleanup
@@ -3488,6 +3544,7 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
 
     ssl->rfd = -1;   /* set to invalid descriptor */
     ssl->wfd = -1;
+    ssl->devId = ctx->devId; /* device for async HW (from wolfAsync_DevOpen) */
 
     ssl->IOCB_ReadCtx  = &ssl->rfd;  /* prevent invalid pointer access if not */
     ssl->IOCB_WriteCtx = &ssl->wfd;  /* correctly set */
@@ -3497,12 +3554,17 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
     ssl->IOCB_WriteCtx = &ssl->nxCtx;  /* and write */
 #endif
 
+    /* initialize states */
     ssl->options.serverState = NULL_STATE;
     ssl->options.clientState = NULL_STATE;
     ssl->options.connectState = CONNECT_BEGIN;
     ssl->options.acceptState  = ACCEPT_BEGIN;
     ssl->options.handShakeState  = NULL_STATE;
     ssl->options.processReply = doProcessInit;
+    ssl->options.keyShareState = KEYSHARE_BEGIN;
+    ssl->options.buildMsgState = BUILD_MSG_BEGIN;
+    ssl->encrypt.state = CIPHER_STATE_BEGIN;
+    ssl->decrypt.state = CIPHER_STATE_BEGIN;
 
 #ifdef WOLFSSL_DTLS
     #ifdef WOLFSSL_SCTP
@@ -3564,11 +3626,16 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
         /* arrays */
         ssl->arrays = (Arrays*)XMALLOC(sizeof(Arrays), ssl->heap,
                                                            DYNAMIC_TYPE_ARRAYS);
-        if (ssl->arrays == NULL) {
-            WOLFSSL_MSG("Arrays Memory error");
-            return MEMORY_E;
-        }
-        XMEMSET(ssl->arrays, 0, sizeof(Arrays));
+    if (ssl->arrays == NULL) {
+        WOLFSSL_MSG("Arrays Memory error");
+        return MEMORY_E;
+    }
+    XMEMSET(ssl->arrays, 0, sizeof(Arrays));
+    ssl->arrays->preMasterSecret = (byte*)XMALLOC(ENCRYPT_LEN, ssl->heap,
+        DYNAMIC_TYPE_TMP_BUFFER);
+    if (ssl->arrays->preMasterSecret == NULL) {
+        return MEMORY_E;
+    }
 
         /* suites */
         ssl->suites = (Suites*)XMALLOC(sizeof(Suites), ssl->heap,
@@ -3602,7 +3669,7 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
 
         /* FIPS RNG API does not accept a heap hint */
 #ifndef HAVE_FIPS
-        if ( (ret = wc_InitRng_ex(ssl->rng, ssl->heap)) != 0) {
+        if ( (ret = wc_InitRng_ex(ssl->rng, ssl->heap, ssl->devId)) != 0) {
             WOLFSSL_MSG("RNG Init error");
             return ret;
         }
@@ -3620,43 +3687,37 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
     }
 
     /* hsHashes */
-    ssl->hsHashes = (HS_Hashes*)XMALLOC(sizeof(HS_Hashes), ssl->heap,
-                                                           DYNAMIC_TYPE_HASHES);
-    if (ssl->hsHashes == NULL) {
-        WOLFSSL_MSG("HS_Hashes Memory error");
-        return MEMORY_E;
-    }
-    XMEMSET(ssl->hsHashes, 0, sizeof(HS_Hashes));
+    ret = InitHashes(ssl);
+    if (ret != 0)
+        return ret;
 
-#ifndef NO_OLD_TLS
-#ifndef NO_MD5
-    wc_InitMd5(&ssl->hsHashes->hashMd5);
+#ifdef SINGLE_THREADED
+    ssl->rng = ctx->rng;   /* CTX may have one, if so use it */
+#endif
+
+    if (ssl->rng == NULL) {
+        /* RNG */
+        ssl->rng = (WC_RNG*)XMALLOC(sizeof(WC_RNG), ssl->heap,DYNAMIC_TYPE_RNG);
+        if (ssl->rng == NULL) {
+            WOLFSSL_MSG("RNG Memory error");
+            return MEMORY_E;
+        }
+        XMEMSET(ssl->rng, 0, sizeof(WC_RNG));
+        ssl->options.weOwnRng = 1;
+
+        /* FIPS RNG API does not accept a heap hint */
+#ifndef HAVE_FIPS
+        if ( (ret = wc_InitRng_ex(ssl->rng, ssl->heap, ssl->devId)) != 0) {
+            WOLFSSL_MSG("RNG Init error");
+            return ret;
+        }
+#else
+        if ( (ret = wc_InitRng(ssl->rng)) != 0) {
+            WOLFSSL_MSG("RNG Init error");
+            return ret;
+        }
 #endif
-#ifndef NO_SHA
-    ret = wc_InitSha(&ssl->hsHashes->hashSha);
-    if (ret != 0) {
-        return ret;
     }
-#endif
-#endif
-#ifndef NO_SHA256
-    ret = wc_InitSha256(&ssl->hsHashes->hashSha256);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
-#ifdef WOLFSSL_SHA384
-    ret = wc_InitSha384(&ssl->hsHashes->hashSha384);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
-#ifdef WOLFSSL_SHA512
-    ret = wc_InitSha512(&ssl->hsHashes->hashSha512);
-    if (ret != 0) {
-        return ret;
-    }
-#endif
 
 #if defined(WOLFSSL_DTLS) && !defined(NO_WOLFSSL_SERVER)
     if (ssl->options.dtls && ssl->options.side == WOLFSSL_SERVER_END) {
@@ -3683,12 +3744,16 @@ int InitSSL(WOLFSSL* ssl, WOLFSSL_CTX* ctx, int writeDup)
 /* free use of temporary arrays */
 void FreeArrays(WOLFSSL* ssl, int keep)
 {
-    if (ssl->arrays && keep) {
-        /* keeps session id for user retrieval */
-        XMEMCPY(ssl->session.sessionID, ssl->arrays->sessionID, ID_LEN);
-        ssl->session.sessionIDSz = ssl->arrays->sessionIDSz;
-    }
     if (ssl->arrays) {
+        if (keep) {
+            /* keeps session id for user retrieval */
+            XMEMCPY(ssl->session.sessionID, ssl->arrays->sessionID, ID_LEN);
+            ssl->session.sessionIDSz = ssl->arrays->sessionIDSz;
+        }
+        if (ssl->arrays->preMasterSecret) {
+            XFREE(ssl->arrays->preMasterSecret, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            ssl->arrays->preMasterSecret = NULL;
+        }
         XFREE(ssl->arrays->pendingMsg, ssl->heap, DYNAMIC_TYPE_ARRAYS);
         ssl->arrays->pendingMsg = NULL;
         ForceZero(ssl->arrays, sizeof(Arrays)); /* clear arrays struct */
@@ -3697,6 +3762,110 @@ void FreeArrays(WOLFSSL* ssl, int keep)
     ssl->arrays = NULL;
 }
 
+void FreeKey(WOLFSSL* ssl, int type, void** pKey)
+{
+    if (ssl && pKey && *pKey) {
+        switch (type) {
+        #ifndef NO_RSA
+            case DYNAMIC_TYPE_RSA:
+                wc_FreeRsaKey((RsaKey*)*pKey);
+                break;
+        #endif /* ! NO_RSA */
+        #ifdef HAVE_ECC
+            case DYNAMIC_TYPE_ECC:
+                wc_ecc_free((ecc_key*)*pKey);
+                break;
+        #endif /* HAVE_ECC */
+        #ifndef NO_DH
+            case DYNAMIC_TYPE_DH:
+                wc_FreeDhKey((DhKey*)*pKey);
+                break;
+        #endif /* !NO_DH */
+            default:
+                break;
+        }
+        XFREE(*pKey, ssl->heap, type);
+
+        /* Reset pointer */
+        *pKey = NULL;
+    }
+}
+
+int AllocKey(WOLFSSL* ssl, int type, void** pKey)
+{
+    int ret = BAD_FUNC_ARG;
+    int sz = 0;
+
+    if (ssl == NULL || pKey == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* Sanity check key destination */
+    if (*pKey != NULL) {
+        WOLFSSL_MSG("Key already present!");
+        return BAD_STATE_E;
+    }
+
+    /* Determine size */
+    switch (type) {
+    #ifndef NO_RSA
+        case DYNAMIC_TYPE_RSA:
+            sz = sizeof(RsaKey);
+            break;
+    #endif /* ! NO_RSA */
+    #ifdef HAVE_ECC
+        case DYNAMIC_TYPE_ECC:
+            sz = sizeof(ecc_key);
+            break;
+    #endif /* HAVE_ECC */
+    #ifndef NO_DH
+        case DYNAMIC_TYPE_DH:
+            sz = sizeof(DhKey);
+            break;
+    #endif /* !NO_DH */
+        default:
+            return BAD_FUNC_ARG;
+    }
+
+    if (sz == 0) {
+        return NOT_COMPILED_IN;
+    }
+
+    /* Allocate memeory for key */
+    *pKey = XMALLOC(sz, ssl->heap, type);
+    if (*pKey == NULL) {
+        return MEMORY_E;
+    }
+
+    /* Initialize key */
+    switch (type) {
+    #ifndef NO_RSA
+        case DYNAMIC_TYPE_RSA:
+            ret = wc_InitRsaKey_ex((RsaKey*)*pKey, ssl->heap, ssl->devId);
+            break;
+    #endif /* ! NO_RSA */
+    #ifdef HAVE_ECC
+        case DYNAMIC_TYPE_ECC:
+            ret = wc_ecc_init_ex((ecc_key*)*pKey, ssl->heap, ssl->devId);
+            break;
+    #endif /* HAVE_ECC */
+    #ifndef NO_DH
+        case DYNAMIC_TYPE_DH:
+            ret = wc_InitDhKey_ex((DhKey*)*pKey, ssl->heap, ssl->devId);
+            break;
+    #endif /* !NO_DH */
+        default:
+            return BAD_FUNC_ARG;
+    }
+
+    /* On error free handshake key */
+    if (ret != 0) {
+        FreeKey(ssl, type, pKey);
+    }
+
+    return ret;
+}
+
 static void FreeKeyExchange(WOLFSSL* ssl)
 {
     /* Cleanup signature buffer */
@@ -3713,33 +3882,21 @@ static void FreeKeyExchange(WOLFSSL* ssl)
         ssl->buffers.digest.length = 0;
     }
 
-    /* Free sigKey */
-    if (ssl->sigKey) {
-        switch (ssl->sigType)
-        {
-        #ifndef NO_RSA
-            case DYNAMIC_TYPE_RSA:
-            {
-                wc_FreeRsaKey((RsaKey*)ssl->sigKey);
-                XFREE(ssl->sigKey, ssl->heap, DYNAMIC_TYPE_RSA);
-                break;
-            }
-        #endif /* ! NO_RSA */
-        #ifdef HAVE_ECC
-            case DYNAMIC_TYPE_ECC:
-            {
-                wc_ecc_free((ecc_key*)ssl->sigKey);
-                XFREE(ssl->sigKey, ssl->heap, DYNAMIC_TYPE_ECC);
-                break;
-            }
-        #endif /* HAVE_ECC */
-            default:
-                break;
-        }
-        /* Reset type and pointer */
-        ssl->sigType = 0;
-        ssl->sigKey = NULL;
+    /* Free handshake key */
+    FreeKey(ssl, ssl->hsType, &ssl->hsKey);
+
+#ifndef NO_DH
+    /* Free temp DH key */
+    FreeKey(ssl, DYNAMIC_TYPE_DH, (void**)&ssl->buffers.serverDH_Key);
+#endif
+
+    /* Cleanup async */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (ssl->async.freeArgs) {
+        ssl->async.freeArgs(ssl, ssl->async.args);
+        ssl->async.freeArgs = NULL;
     }
+#endif
 }
 
 /* In case holding SSL object in array and don't want to free actual ssl */
@@ -3759,23 +3916,23 @@ void SSL_ResourceFree(WOLFSSL* ssl)
         XFREE(ssl->rng, ssl->heap, DYNAMIC_TYPE_RNG);
     }
     XFREE(ssl->suites, ssl->heap, DYNAMIC_TYPE_SUITES);
-    XFREE(ssl->hsHashes, ssl->heap, DYNAMIC_TYPE_HASHES);
+    FreeHashes(ssl);
     XFREE(ssl->buffers.domainName.buffer, ssl->heap, DYNAMIC_TYPE_DOMAIN);
 
     /* clear keys struct after session */
-    ForceZero(&(ssl->keys), sizeof(Keys));
+    ForceZero(&ssl->keys, sizeof(Keys));
 
 #ifndef NO_DH
     if (ssl->buffers.serverDH_Priv.buffer) {
         ForceZero(ssl->buffers.serverDH_Priv.buffer,
                                              ssl->buffers.serverDH_Priv.length);
     }
-    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH);
-    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     /* parameters (p,g) may be owned by ctx */
     if (ssl->buffers.weOwnDH || ssl->options.side == WOLFSSL_CLIENT_END) {
-        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH);
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     }
 #endif /* !NO_DH */
 #ifndef NO_CERTS
@@ -3783,10 +3940,8 @@ void SSL_ResourceFree(WOLFSSL* ssl)
     wolfSSL_UnloadCertsKeys(ssl);
 #endif
 #ifndef NO_RSA
-    if (ssl->peerRsaKey) {
-        wc_FreeRsaKey(ssl->peerRsaKey);
-        XFREE(ssl->peerRsaKey, ssl->heap, DYNAMIC_TYPE_RSA);
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
+    ssl->peerRsaKeyPresent = 0;
 #endif
     if (ssl->buffers.inputBuffer.dynamicFlag)
         ShrinkInputBuffer(ssl, FORCED_FREE);
@@ -3815,21 +3970,12 @@ void SSL_ResourceFree(WOLFSSL* ssl)
     FreeStreams(ssl);
 #endif
 #ifdef HAVE_ECC
-    if (ssl->peerEccKey) {
-        if (ssl->peerEccKeyPresent)
-            wc_ecc_free(ssl->peerEccKey);
-        XFREE(ssl->peerEccKey, ssl->heap, DYNAMIC_TYPE_ECC);
-    }
-    if (ssl->peerEccDsaKey) {
-        if (ssl->peerEccDsaKeyPresent)
-            wc_ecc_free(ssl->peerEccDsaKey);
-        XFREE(ssl->peerEccDsaKey, ssl->heap, DYNAMIC_TYPE_ECC);
-    }
-    if (ssl->eccTempKey) {
-        if (ssl->eccTempKeyPresent)
-            wc_ecc_free(ssl->eccTempKey);
-        XFREE(ssl->eccTempKey, ssl->heap, DYNAMIC_TYPE_ECC);
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccKey);
+    ssl->peerEccKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
+    ssl->peerEccDsaKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->eccTempKey);
+    ssl->eccTempKeyPresent = 0;
 #endif /* HAVE_ECC */
 #ifdef HAVE_PK_CALLBACKS
     #ifdef HAVE_ECC
@@ -3915,15 +4061,6 @@ void SSL_ResourceFree(WOLFSSL* ssl)
 /* Free any handshake resources no longer needed */
 void FreeHandshakeResources(WOLFSSL* ssl)
 {
-#ifndef NO_MD5
-    wc_Md5Free(&ssl->hsHashes->hashMd5);
-#endif
-#ifndef NO_SHA
-    wc_ShaFree(&ssl->hsHashes->hashSha);
-#endif
-#ifndef NO_SHA256
-    wc_Sha256Free(&ssl->hsHashes->hashSha256);
-#endif
 
 #ifdef HAVE_SECURE_RENEGOTIATION
     if (ssl->secure_renegotiation && ssl->secure_renegotiation->enabled) {
@@ -3941,8 +4078,7 @@ void FreeHandshakeResources(WOLFSSL* ssl)
     ssl->suites = NULL;
 
     /* hsHashes */
-    XFREE(ssl->hsHashes, ssl->heap, DYNAMIC_TYPE_HASHES);
-    ssl->hsHashes = NULL;
+    FreeHashes(ssl);
 
     /* RNG */
     if (ssl->specs.cipher_type == stream || ssl->options.tls1_1 == 0) {
@@ -3970,56 +4106,32 @@ void FreeHandshakeResources(WOLFSSL* ssl)
 
 #ifndef NO_RSA
     /* peerRsaKey */
-    if (ssl->peerRsaKey) {
-        wc_FreeRsaKey(ssl->peerRsaKey);
-        XFREE(ssl->peerRsaKey, ssl->heap, DYNAMIC_TYPE_RSA);
-        ssl->peerRsaKey = NULL;
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_RSA, (void**)&ssl->peerRsaKey);
+    ssl->peerRsaKeyPresent = 0;
 #endif
 
 #ifdef HAVE_ECC
-    if (ssl->peerEccKey)
-    {
-        if (ssl->peerEccKeyPresent) {
-            wc_ecc_free(ssl->peerEccKey);
-            ssl->peerEccKeyPresent = 0;
-        }
-        XFREE(ssl->peerEccKey, ssl->heap, DYNAMIC_TYPE_ECC);
-        ssl->peerEccKey = NULL;
-    }
-    if (ssl->peerEccDsaKey)
-    {
-        if (ssl->peerEccDsaKeyPresent) {
-            wc_ecc_free(ssl->peerEccDsaKey);
-            ssl->peerEccDsaKeyPresent = 0;
-        }
-        XFREE(ssl->peerEccDsaKey, ssl->heap, DYNAMIC_TYPE_ECC);
-        ssl->peerEccDsaKey = NULL;
-    }
-    if (ssl->eccTempKey)
-    {
-        if (ssl->eccTempKeyPresent) {
-            wc_ecc_free(ssl->eccTempKey);
-            ssl->eccTempKeyPresent = 0;
-        }
-        XFREE(ssl->eccTempKey, ssl->heap, DYNAMIC_TYPE_ECC);
-        ssl->eccTempKey = NULL;
-    }
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccKey);
+    ssl->peerEccKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->peerEccDsaKey);
+    ssl->peerEccDsaKeyPresent = 0;
+    FreeKey(ssl, DYNAMIC_TYPE_ECC, (void**)&ssl->eccTempKey);
+    ssl->eccTempKeyPresent = 0;
 #endif /* HAVE_ECC */
 #ifndef NO_DH
     if (ssl->buffers.serverDH_Priv.buffer) {
         ForceZero(ssl->buffers.serverDH_Priv.buffer,
                                              ssl->buffers.serverDH_Priv.length);
     }
-    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(ssl->buffers.serverDH_Priv.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     ssl->buffers.serverDH_Priv.buffer = NULL;
-    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(ssl->buffers.serverDH_Pub.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     ssl->buffers.serverDH_Pub.buffer = NULL;
     /* parameters (p,g) may be owned by ctx */
     if (ssl->buffers.weOwnDH || ssl->options.side == WOLFSSL_CLIENT_END) {
-        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_G.buffer = NULL;
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_P.buffer = NULL;
     }
 #endif /* !NO_DH */
@@ -4588,7 +4700,7 @@ int DtlsMsgPoolSend(WOLFSSL* ssl, int sendOnlyFirstPacket)
                 output = ssl->buffers.outputBuffer.buffer +
                          ssl->buffers.outputBuffer.length;
                 sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                      handshake, 0, 0);
+                                      handshake, 0, 0, 0);
                 if (sendSz < 0)
                     return BUILD_MSG_ERROR;
 
@@ -4909,7 +5021,9 @@ static int HashOutput(WOLFSSL* ssl, const byte* output, int sz, int ivSz)
 static int HashInput(WOLFSSL* ssl, const byte* input, int sz)
 {
     int ret = 0;
-    const byte* adj = input - HANDSHAKE_HEADER_SZ;
+    const byte* adj;
+
+    adj = input - HANDSHAKE_HEADER_SZ;
     sz += HANDSHAKE_HEADER_SZ;
 
     (void)adj;
@@ -5334,7 +5448,6 @@ int GrowInputBuffer(WOLFSSL* ssl, int size, int usedLength)
 /* check available size into output buffer, make room if needed */
 int CheckAvailableSize(WOLFSSL *ssl, int size)
 {
-
     if (size < 0) {
         WOLFSSL_MSG("CheckAvailableSize() called with negative number");
         return BAD_FUNC_ARG;
@@ -5549,96 +5662,93 @@ static const byte PAD2[PAD_MD5] =
 
 static int BuildMD5(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
-
+    int ret;
     byte md5_result[MD5_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Md5* md5   = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Md5* md5_2 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-        if (md5 == NULL || md5_2 == NULL) {
-            if (md5) {
-                XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            if (md5_2) {
-                XFREE(md5_2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            return MEMORY_E;
-        }
+    Md5* md5 = (Md5*)XMALLOC(sizeof(Md5), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (md5 == NULL)
+        return MEMORY_E;
 #else
-        Md5 md5[1];
-        Md5 md5_2[1];
+    Md5  md5[1];
 #endif
 
     /* make md5 inner */
-    md5[0] = ssl->hsHashes->hashMd5 ; /* Save current position */
-
-    wc_Md5Update(&ssl->hsHashes->hashMd5, sender, SIZEOF_SENDER);
-    wc_Md5Update(&ssl->hsHashes->hashMd5, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_Md5Update(&ssl->hsHashes->hashMd5, PAD1, PAD_MD5);
-    wc_Md5GetHash(&ssl->hsHashes->hashMd5, md5_result);
-    wc_Md5RestorePos(&ssl->hsHashes->hashMd5, md5) ; /* Restore current position */
+    ret = wc_Md5Copy(&ssl->hsHashes->hashMd5, md5);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, sender, SIZEOF_SENDER);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, PAD1, PAD_MD5);
+    if (ret == 0)
+        ret = wc_Md5Final(md5, md5_result);
 
     /* make md5 outer */
-    wc_InitMd5(md5_2) ;
-    wc_Md5Update(md5_2, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_Md5Update(md5_2, PAD2, PAD_MD5);
-    wc_Md5Update(md5_2, md5_result, MD5_DIGEST_SIZE);
-    wc_Md5Final(md5_2, hashes->md5);
+    if (ret == 0) {
+        ret = wc_InitMd5_ex(md5, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_Md5Update(md5, ssl->arrays->masterSecret,SECRET_LEN);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, PAD2, PAD_MD5);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, md5_result, MD5_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_Md5Final(md5, hashes->md5);
+            wc_Md5Free(md5);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(md5_2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(md5, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
-    return 0;
+    return ret;
 }
 
 
 /* calculate SHA hash for finished */
 static int BuildSHA(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
+    int ret;
     byte sha_result[SHA_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Sha* sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Sha* sha2 = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-        if (sha == NULL || sha2 == NULL) {
-            if (sha) {
-                XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            if (sha2) {
-                XFREE(sha2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            }
-            return MEMORY_E;
-        }
+    Sha* sha = (Sha*)XMALLOC(sizeof(Sha), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (sha == NULL)
+        return MEMORY_E;
 #else
-        Sha sha[1];
-        Sha sha2[1] ;
+    Sha  sha[1];
 #endif
     /* make sha inner */
-    sha[0] = ssl->hsHashes->hashSha ; /* Save current position */
-
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, sender, SIZEOF_SENDER);
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, PAD1, PAD_SHA);
-    wc_ShaGetHash(&ssl->hsHashes->hashSha, sha_result);
-    wc_ShaRestorePos(&ssl->hsHashes->hashSha, sha) ; /* Restore current position */
+    ret = wc_ShaCopy(&ssl->hsHashes->hashSha, sha); /* Save current position */
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, sender, SIZEOF_SENDER);
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, PAD1, PAD_SHA);
+    if (ret == 0)
+        ret = wc_ShaFinal(sha, sha_result);
 
     /* make sha outer */
-    wc_InitSha(sha2) ;
-    wc_ShaUpdate(sha2, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(sha2, PAD2, PAD_SHA);
-    wc_ShaUpdate(sha2, sha_result, SHA_DIGEST_SIZE);
-    wc_ShaFinal(sha2, hashes->sha);
+    if (ret == 0) {
+        ret = wc_InitSha_ex(sha, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, PAD2, PAD_SHA);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, sha_result, SHA_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_ShaFinal(sha, hashes->sha);
+            wc_ShaFree(sha);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(sha2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(sha, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
-    return 0;
+    return ret;
 }
 #endif
 
@@ -5646,15 +5756,14 @@ static int BuildSHA(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 static int BuildFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
     int ret = 0;
+#ifdef WOLFSSL_SHA384
 #ifdef WOLFSSL_SMALL_STACK
-    #ifdef WOLFSSL_SHA384
-        Sha384* sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL,                                                                        DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
+    Sha384* sha384 = (Sha384*)XMALLOC(sizeof(Sha384), ssl->heap,
+                                                DYNAMIC_TYPE_TMP_BUFFER);
 #else
-    #ifdef WOLFSSL_SHA384
-        Sha384 sha384[1];
-    #endif
-#endif
+    Sha384 sha384[1];
+#endif /* WOLFSSL_SMALL_STACK */
+#endif /* WOLFSSL_SHA384 */
 
 #ifdef WOLFSSL_SMALL_STACK
     if (ssl == NULL
@@ -5663,7 +5772,7 @@ static int BuildFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
     #endif
         ) {
     #ifdef WOLFSSL_SHA384
-        XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(sha384, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
     #endif
         return MEMORY_E;
     }
@@ -5695,9 +5804,9 @@ static int BuildFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
     #endif
     }
 
-#ifdef WOLFSSL_SMALL_STACK
 #ifdef WOLFSSL_SHA384
-    XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+#ifdef WOLFSSL_SMALL_STACK
+    XFREE(sha384, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 #endif
 
@@ -6244,8 +6353,8 @@ static int MatchDomainName(const char* pattern, int len, const char* str)
 
         if (p == '*') {
             while (--len > 0 &&
-                         (p = (char)XTOLOWER((unsigned char)*pattern++)) == '*')
-                ;
+                (p = (char)XTOLOWER((unsigned char)*pattern++)) == '*') {
+            }
 
             if (len == 0)
                 p = '\0';
@@ -6598,727 +6707,952 @@ int CopyDecodedToX509(WOLFSSL_X509* x509, DecodedCert* dCert)
 
 #endif /* KEEP_PEER_CERT || SESSION_CERTS */
 
+typedef struct DoCertArgs {
+    buffer*      certs;
+    DecodedCert* dCert;
+    char*  domain;
+    word32 idx;
+    word32 begin;
+    int    totalCerts; /* number of certs in certs buffer */
+    int    count;
+    int    dCertInit;
+    int    certIdx;
+#ifdef WOLFSSL_TRUST_PEER_CERT
+    byte haveTrustPeer; /* was cert verified by loaded trusted peer cert */
+#endif
+} DoCertArgs;
+
+static void FreeDoCertArgs(WOLFSSL* ssl, void* pArgs)
+{
+    DoCertArgs* args = (DoCertArgs*)pArgs;
+
+    (void)ssl;
+
+    if (args->domain) {
+        XFREE(args->domain, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->domain = NULL;
+    }
+    if (args->certs) {
+        XFREE(args->certs, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->certs = NULL;
+    }
+    if (args->dCert) {
+        if (args->dCertInit) {
+            FreeDecodedCert(args->dCert);
+            args->dCertInit = 0;
+        }
+        XFREE(args->dCert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->dCert = NULL;
+    }
+}
 
 static int DoCertificate(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                                                                 word32 size)
 {
-    word32 listSz;
-    word32 begin = *inOutIdx;
-    int    ret = 0;
-    int    anyError = 0;
-    int    totalCerts = 0;    /* number of certs in certs buffer */
-    int    count;
-    buffer certs[MAX_CHAIN_DEPTH];
-
-#ifdef WOLFSSL_SMALL_STACK
-    char*                  domain = NULL;
-    DecodedCert*           dCert  = NULL;
-    WOLFSSL_X509_STORE_CTX* store  = NULL;
+    int ret = 0, lastErr = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    DoCertArgs* args = (DoCertArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
 #else
-    char                   domain[ASN_NAME_MAX];
-    DecodedCert            dCert[1];
-    WOLFSSL_X509_STORE_CTX  store[1];
+    DoCertArgs  args[1];
 #endif
 
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    byte haveTrustPeer = 0; /* was cert verified by loaded trusted peer cert */
-#endif
+    WOLFSSL_ENTER("DoCertificate");
 
-    #ifdef WOLFSSL_CALLBACKS
-        if (ssl->hsInfoOn) AddPacketName("Certificate", &ssl->handShakeInfo);
-        if (ssl->toInfoOn) AddLateName("Certificate", &ssl->timeoutInfo);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
+    if (ret != WC_NOT_PENDING_E) {
+        /* Check for error */
+        if (ret < 0)
+            goto exit_dc;
+    }
+    else
+#endif
+    {
+        /* Reset state */
+        ret = 0;
+        ssl->options.keyShareState = KEYSHARE_BEGIN;
+        XMEMSET(args, 0, sizeof(DoCertArgs));
+        args->idx = *inOutIdx;
+        args->begin = *inOutIdx;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeDoCertArgs;
     #endif
-
-    if ((*inOutIdx - begin) + OPAQUE24_LEN > size)
-        return BUFFER_ERROR;
-
-    c24to32(input + *inOutIdx, &listSz);
-    *inOutIdx += OPAQUE24_LEN;
-
-    if (listSz > MAX_RECORD_SIZE)
-        return BUFFER_E;
-
-    if ((*inOutIdx - begin) + listSz != size)
-        return BUFFER_ERROR;
-
-    WOLFSSL_MSG("Loading peer's cert chain");
-    /* first put cert chain into buffer so can verify top down
-       we're sent bottom up */
-    while (listSz) {
-        word32 certSz;
-
-        if (totalCerts >= MAX_CHAIN_DEPTH) {
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_CERT_CHAIN_TOO_LONG;
-        #endif
-            return MAX_CHAIN_ERROR;
-        }
-
-        if ((*inOutIdx - begin) + OPAQUE24_LEN > size)
-            return BUFFER_ERROR;
-
-        c24to32(input + *inOutIdx, &certSz);
-        *inOutIdx += OPAQUE24_LEN;
-
-        if ((*inOutIdx - begin) + certSz > size)
-            return BUFFER_ERROR;
-
-        certs[totalCerts].length = certSz;
-        certs[totalCerts].buffer = input + *inOutIdx;
-
-#ifdef SESSION_CERTS
-        if (ssl->session.chain.count < MAX_CHAIN_DEPTH &&
-                                       certSz < MAX_X509_SIZE) {
-            ssl->session.chain.certs[ssl->session.chain.count].length = certSz;
-            XMEMCPY(ssl->session.chain.certs[ssl->session.chain.count].buffer,
-                    input + *inOutIdx, certSz);
-            ssl->session.chain.count++;
-        } else {
-            WOLFSSL_MSG("Couldn't store chain cert for session");
-        }
-#endif
-
-        *inOutIdx += certSz;
-        listSz -= certSz + CERT_HEADER_SZ;
-
-        totalCerts++;
-        WOLFSSL_MSG("\tPut another cert into chain");
     }
 
-    count = totalCerts;
-
-#ifdef WOLFSSL_SMALL_STACK
-    dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-    if (dCert == NULL)
-        return MEMORY_E;
-#endif
-
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    /* if using trusted peer certs check before verify chain and CA test */
-    if (count > 0) {
-        TrustedPeerCert* tp = NULL;
-
-        InitDecodedCert(dCert, certs[0].buffer, certs[0].length, ssl->heap);
-        ret = ParseCertRelative(dCert, CERT_TYPE, 0, ssl->ctx->cm);
-        #ifndef NO_SKID
-            if (dCert->extAuthKeyIdSet) {
-                tp = GetTrustedPeer(ssl->ctx->cm, dCert->extSubjKeyId,
-                                                                 WC_MATCH_SKID);
-            }
-            else { /* if the cert has no SKID try to match by name */
-                tp = GetTrustedPeer(ssl->ctx->cm, dCert->subjectHash,
-                                                                 WC_MATCH_NAME);
-            }
-        #else /* NO_SKID */
-            tp = GetTrustedPeer(ssl->ctx->cm, dCert->subjectHash,
-                                                                 WC_MATCH_NAME);
-        #endif /* NO SKID */
-        WOLFSSL_MSG("Checking for trusted peer cert");
-
-        if (tp == NULL) {
-            /* no trusted peer cert */
-            WOLFSSL_MSG("No matching trusted peer cert. Checking CAs");
-            FreeDecodedCert(dCert);
-        } else if (MatchTrustedPeer(tp, dCert)){
-            WOLFSSL_MSG("Found matching trusted peer cert");
-            haveTrustPeer = 1;
-        } else {
-            WOLFSSL_MSG("Trusted peer cert did not match!");
-            FreeDecodedCert(dCert);
-        }
-    }
-    if (!haveTrustPeer) { /* do not verify chain if trusted peer cert found */
-#endif /* WOLFSSL_TRUST_PEER_CERT */
-
-    /* verify up to peer's first */
-    while (count > 1) {
-        buffer myCert = certs[count - 1];
-        byte* subjectHash;
-
-        InitDecodedCert(dCert, myCert.buffer, myCert.length, ssl->heap);
-        ret = ParseCertRelative(dCert, CERT_TYPE, !ssl->options.verifyNone,
-                                ssl->ctx->cm);
-        #ifndef NO_SKID
-            subjectHash = dCert->extSubjKeyId;
-        #else
-            subjectHash = dCert->subjectHash;
-        #endif
-
-        /* Check key sizes for certs. Is redundent check since ProcessBuffer
-           also performs this check. */
-        if (!ssl->options.verifyNone) {
-            switch (dCert->keyOID) {
-                #ifndef NO_RSA
-                case RSAk:
-                    if (ssl->options.minRsaKeySz < 0 ||
-                         dCert->pubKeySize < (word16)ssl->options.minRsaKeySz) {
-                        WOLFSSL_MSG("RSA key size in cert chain error");
-                        ret = RSA_KEY_SIZE_E;
-                    }
-                    break;
-                #endif /* !NO_RSA */
-                #ifdef HAVE_ECC
-                case ECDSAk:
-                    if (ssl->options.minEccKeySz < 0 ||
-                        dCert->pubKeySize < (word16)ssl->options.minEccKeySz) {
-                        WOLFSSL_MSG("ECC key size in cert chain error");
-                        ret = ECC_KEY_SIZE_E;
-                    }
-                    break;
-                #endif /* HAVE_ECC */
-
-                default:
-                    WOLFSSL_MSG("Key size not checked");
-                    break; /* key not being checked for size if not in switch */
-            }
-        }
-
-        if (ret == 0 && dCert->isCA == 0) {
-            WOLFSSL_MSG("Chain cert is not a CA, not adding as one");
-        }
-        else if (ret == 0 && ssl->options.verifyNone) {
-            WOLFSSL_MSG("Chain cert not verified by option, not adding as CA");
-        }
-        else if (ret == 0 && !AlreadySigner(ssl->ctx->cm, subjectHash)) {
-            DerBuffer* add = NULL;
-            ret = AllocDer(&add, myCert.length, CA_TYPE, ssl->heap);
-            if (ret < 0) {
-            #ifdef WOLFSSL_SMALL_STACK
-                XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            #endif
-                return ret;
-            }
-
-            WOLFSSL_MSG("Adding CA from chain");
-
-            XMEMCPY(add->buffer, myCert.buffer, myCert.length);
-
-            /* already verified above */
-            ret = AddCA(ssl->ctx->cm, &add, WOLFSSL_CHAIN_CA, 0);
-            if (ret == 1) ret = 0;   /* SSL_SUCCESS for external */
-        }
-        else if (ret != 0) {
-            WOLFSSL_MSG("Failed to verify CA from chain");
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_INVALID_CA;
-        #endif
-        }
-        else {
-            WOLFSSL_MSG("Verified CA from chain and already had it");
-        }
-
-#if defined(HAVE_OCSP) || defined(HAVE_CRL)
-        if (ret == 0) {
-            int doCrlLookup = 1;
-
-#ifdef HAVE_OCSP
-        #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
-            if (ssl->status_request_v2)
-                ret = TLSX_CSR2_InitRequests(ssl->extensions, dCert, 0,
-                                                                     ssl->heap);
-            else /* skips OCSP and force CRL check */
-        #endif
-            if (ssl->ctx->cm->ocspEnabled && ssl->ctx->cm->ocspCheckAll) {
-                WOLFSSL_MSG("Doing Non Leaf OCSP check");
-                ret = CheckCertOCSP(ssl->ctx->cm->ocsp, dCert, NULL);
-                doCrlLookup = (ret == OCSP_CERT_UNKNOWN);
-                if (ret != 0) {
-                    doCrlLookup = 0;
-                    WOLFSSL_MSG("\tOCSP Lookup not ok");
-                }
-            }
-#endif /* HAVE_OCSP */
-
-#ifdef HAVE_CRL
-            if (ret == 0 && doCrlLookup && ssl->ctx->cm->crlEnabled
-                                                 && ssl->ctx->cm->crlCheckAll) {
-                WOLFSSL_MSG("Doing Non Leaf CRL check");
-                ret = CheckCertCRL(ssl->ctx->cm->crl, dCert);
-
-                if (ret != 0) {
-                    WOLFSSL_MSG("\tCRL check not ok");
-                }
-            }
-#else
-            (void)doCrlLookup;
-#endif /* HAVE_CRL */
-        }
-#endif /* HAVE_OCSP || HAVE_CRL */
-
-        if (ret != 0 && anyError == 0)
-            anyError = ret;   /* save error from last time */
-
-        FreeDecodedCert(dCert);
-        count--;
-    }
-
-#ifdef WOLFSSL_TRUST_PEER_CERT
-    } /* end of if (haveTrustPeer) -- a check for if already verified */
-#endif
-
-    /* peer's, may not have one if blank client cert sent by TLSv1.2 */
-    if (count) {
-        buffer myCert = certs[0];
-        int    fatal  = 0;
-
-        WOLFSSL_MSG("Verifying Peer's cert");
-
-#ifdef WOLFSSL_TRUST_PEER_CERT
-        if (!haveTrustPeer) { /* do not parse again if previously verified */
-#endif
-        InitDecodedCert(dCert, myCert.buffer, myCert.length, ssl->heap);
-        ret = ParseCertRelative(dCert, CERT_TYPE, !ssl->options.verifyNone,
-                                ssl->ctx->cm);
-#ifdef WOLFSSL_TRUST_PEER_CERT
-        }
-#endif
-
-        if (ret == 0) {
-            WOLFSSL_MSG("Verified Peer's cert");
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_OK;
-        #endif
-            fatal = 0;
-        }
-        else if (ret == ASN_PARSE_E) {
-            WOLFSSL_MSG("Got Peer cert ASN PARSE ERROR, fatal");
-            fatal = 1;
-        }
-        else {
-            WOLFSSL_MSG("Failed to verify Peer's cert");
-        #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE;
-        #endif
-            if (ssl->verifyCallback) {
-                WOLFSSL_MSG("\tCallback override available, will continue");
-                fatal = 0;
-            }
-            else {
-                WOLFSSL_MSG("\tNo callback override available, fatal");
-                fatal = 1;
-            }
-        }
-
-#ifdef HAVE_SECURE_RENEGOTIATION
-        if (fatal == 0 && ssl->secure_renegotiation
-                       && ssl->secure_renegotiation->enabled) {
-
-            if (IsEncryptionOn(ssl, 0)) {
-                /* compare against previous time */
-                if (XMEMCMP(dCert->subjectHash,
-                            ssl->secure_renegotiation->subject_hash,
-                            SHA_DIGEST_SIZE) != 0) {
-                    WOLFSSL_MSG("Peer sent different cert during scr, fatal");
-                    fatal = 1;
-                    ret   = SCR_DIFFERENT_CERT_E;
-                }
-            }
-
-            /* cache peer's hash */
-            if (fatal == 0) {
-                XMEMCPY(ssl->secure_renegotiation->subject_hash,
-                        dCert->subjectHash, SHA_DIGEST_SIZE);
-            }
-        }
-#endif
-
-#if defined(HAVE_OCSP) || defined(HAVE_CRL)
-        if (fatal == 0) {
-            int doLookup = 1;
-
-            if (ssl->options.side == WOLFSSL_CLIENT_END) {
-#ifdef HAVE_CERTIFICATE_STATUS_REQUEST
-                if (ssl->status_request) {
-                    fatal = TLSX_CSR_InitRequest(ssl->extensions, dCert,
-                                                                     ssl->heap);
-                    doLookup = 0;
-                }
-#endif
-#ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
-                if (ssl->status_request_v2) {
-                    fatal = TLSX_CSR2_InitRequests(ssl->extensions, dCert, 1,
-                                                                     ssl->heap);
-                    doLookup = 0;
-                }
-#endif
-            }
-
-#ifdef HAVE_OCSP
-            if (doLookup && ssl->ctx->cm->ocspEnabled) {
-                WOLFSSL_MSG("Doing Leaf OCSP check");
-                ret = CheckCertOCSP(ssl->ctx->cm->ocsp, dCert, NULL);
-                doLookup = (ret == OCSP_CERT_UNKNOWN);
-                if (ret != 0) {
-                    WOLFSSL_MSG("\tOCSP Lookup not ok");
-                    fatal = 0;
-        #ifdef OPENSSL_EXTRA
-                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
-        #endif
-                }
-            }
-#endif /* HAVE_OCSP */
-
-#ifdef HAVE_CRL
-            if (doLookup && ssl->ctx->cm->crlEnabled) {
-                WOLFSSL_MSG("Doing Leaf CRL check");
-                ret = CheckCertCRL(ssl->ctx->cm->crl, dCert);
-                if (ret != 0) {
-                    WOLFSSL_MSG("\tCRL check not ok");
-                    fatal = 0;
-        #ifdef OPENSSL_EXTRA
-                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
-        #endif
-                }
-            }
-#endif /* HAVE_CRL */
-            (void)doLookup;
-        }
-#endif /* HAVE_OCSP || HAVE_CRL */
-
-#ifdef KEEP_PEER_CERT
+    switch(ssl->options.keyShareState)
+    {
+        case KEYSHARE_BEGIN:
         {
-            /* set X509 format for peer cert even if fatal */
-            int copyRet = CopyDecodedToX509(&ssl->peerCert, dCert);
-            if (copyRet == MEMORY_E)
-                fatal = 1;
-        }
-#endif
+            word32 listSz;
 
-#ifndef IGNORE_KEY_EXTENSIONS
-        if (dCert->extKeyUsageSet) {
-            if ((ssl->specs.kea == rsa_kea) &&
-                (ssl->options.side == WOLFSSL_CLIENT_END) &&
-                (dCert->extKeyUsage & KEYUSE_KEY_ENCIPHER) == 0) {
-                ret = KEYUSE_ENCIPHER_E;
-            }
-            if ((ssl->specs.sig_algo == rsa_sa_algo ||
-                    (ssl->specs.sig_algo == ecc_dsa_sa_algo &&
-                         !ssl->specs.static_ecdh)) &&
-                (dCert->extKeyUsage & KEYUSE_DIGITAL_SIG) == 0) {
-                WOLFSSL_MSG("KeyUse Digital Sig not set");
-                ret = KEYUSE_SIGNATURE_E;
-            }
-        }
-
-        if (dCert->extExtKeyUsageSet) {
-            if (ssl->options.side == WOLFSSL_CLIENT_END) {
-                if ((dCert->extExtKeyUsage &
-                        (EXTKEYUSE_ANY | EXTKEYUSE_SERVER_AUTH)) == 0) {
-                    WOLFSSL_MSG("ExtKeyUse Server Auth not set");
-                    ret = EXTKEYUSE_AUTH_E;
-                }
-            }
-            else {
-                if ((dCert->extExtKeyUsage &
-                        (EXTKEYUSE_ANY | EXTKEYUSE_CLIENT_AUTH)) == 0) {
-                    WOLFSSL_MSG("ExtKeyUse Client Auth not set");
-                    ret = EXTKEYUSE_AUTH_E;
-                }
-            }
-        }
-#endif /* IGNORE_KEY_EXTENSIONS */
-
-        if (fatal) {
-            FreeDecodedCert(dCert);
-        #ifdef WOLFSSL_SMALL_STACK
-            XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #ifdef WOLFSSL_CALLBACKS
+            if (ssl->hsInfoOn)
+                AddPacketName("Certificate", &ssl->handShakeInfo);
+            if (ssl->toInfoOn)
+                AddLateName("Certificate", &ssl->timeoutInfo);
         #endif
-            ssl->error = ret;
+
+            /* allocate buffer for certs */
+            args->certs = (buffer*)XMALLOC(sizeof(buffer) * MAX_CHAIN_DEPTH,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            if (args->certs == NULL) {
+                ERROR_OUT(MEMORY_E, exit_dc);
+            }
+
+            if ((args->idx - args->begin) + OPAQUE24_LEN > size) {
+                ERROR_OUT(BUFFER_ERROR, exit_dc);
+            }
+
+            c24to32(input + args->idx, &listSz);
+            args->idx += OPAQUE24_LEN;
+
+            if (listSz > MAX_RECORD_SIZE) {
+                ERROR_OUT(BUFFER_ERROR, exit_dc);
+            }
+
+            if ((args->idx - args->begin) + listSz != size) {
+                ERROR_OUT(BUFFER_ERROR, exit_dc);
+            }
+
+            WOLFSSL_MSG("Loading peer's cert chain");
+            /* first put cert chain into buffer so can verify top down
+               we're sent bottom up */
+            while (listSz) {
+                word32 certSz;
+
+                if (args->totalCerts >= MAX_CHAIN_DEPTH) {
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_ERR_CERT_CHAIN_TOO_LONG;
+                #endif
+                    ERROR_OUT(MAX_CHAIN_ERROR, exit_dc);
+                }
+
+                if ((args->idx - args->begin) + OPAQUE24_LEN > size) {
+                    ERROR_OUT(BUFFER_ERROR, exit_dc);
+                }
+
+                c24to32(input + args->idx, &certSz);
+                args->idx += OPAQUE24_LEN;
+
+                if ((args->idx - args->begin) + certSz > size) {
+                    ERROR_OUT(BUFFER_ERROR, exit_dc);
+                }
+
+                args->certs[args->totalCerts].length = certSz;
+                args->certs[args->totalCerts].buffer = input + args->idx;
+
+            #ifdef SESSION_CERTS
+                if (ssl->session.chain.count < MAX_CHAIN_DEPTH &&
+                                               certSz < MAX_X509_SIZE) {
+                    ssl->session.chain.certs[
+                        ssl->session.chain.count].length = certSz;
+                    XMEMCPY(ssl->session.chain.certs[
+                        ssl->session.chain.count].buffer,
+                            input + args->idx, certSz);
+                    ssl->session.chain.count++;
+                }
+                else {
+                    WOLFSSL_MSG("Couldn't store chain cert for session");
+                }
+            #endif /* SESSION_CERTS */
+
+                args->idx += certSz;
+                listSz -= certSz + CERT_HEADER_SZ;
+
+                args->totalCerts++;
+                WOLFSSL_MSG("\tPut another cert into chain");
+            } /* while (listSz) */
+
+            args->count = args->totalCerts;
+            args->certIdx = 0;
+
+            args->dCertInit = 0;
+            args->dCert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), ssl->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+            if (args->dCert == NULL) {
+                ERROR_OUT(MEMORY_E, exit_dc);
+            }
+
+            /* Advance state and proceed */
+            ssl->options.keyShareState = KEYSHARE_BUILD;
+        } /* case KEYSHARE_BEGIN */
+
+        case KEYSHARE_BUILD:
+        {
+            if (args->count > 0) {
+            #ifdef WOLFSSL_TRUST_PEER_CERT
+                if (args->certIdx == 0) {
+                    /* if using trusted peer certs check before verify chain
+                       and CA test */
+                    TrustedPeerCert* tp;
+
+                    if (!args->dCertInit) {
+                        InitDecodedCert(args->dCert,
+                            args->certs[args->certIdx].buffer,
+                            args->certs[args->certIdx].length, ssl->heap);
+                        args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
+                        args->dCertInit = 1;
+                    }
+
+                    ret = ParseCertRelative(args->dCert, CERT_TYPE, 0,
+                                                                ssl->ctx->cm);
+                    if (ret != 0) {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPush(ssl,
+                                args->dCert->sigCtx.asyncDev,
+                                WC_ASYNC_FLAG_CALL_AGAIN);
+                        }
+                    #endif
+                        goto exit_dc;
+                    }
+
+                #ifndef NO_SKID
+                    if (args->dCert->extAuthKeyIdSet) {
+                        tp = GetTrustedPeer(ssl->ctx->cm,
+                                    args->dCert->extSubjKeyId, WC_MATCH_SKID);
+                    }
+                    else { /* if the cert has no SKID try to match by name */
+                        tp = GetTrustedPeer(ssl->ctx->cm,
+                                    args->dCert->subjectHash, WC_MATCH_NAME);
+                    }
+                #else /* NO_SKID */
+                    tp = GetTrustedPeer(ssl->ctx->cm, args->dCert->subjectHash,
+                                                                 WC_MATCH_NAME);
+                #endif /* NO SKID */
+                    WOLFSSL_MSG("Checking for trusted peer cert");
+
+                    if (tp == NULL) {
+                        /* no trusted peer cert */
+                        WOLFSSL_MSG("No matching trusted peer cert. "
+                            "Checking CAs");
+                        FreeDecodedCert(args->dCert);
+                        args->dCertInit = 0;
+                    } else if (MatchTrustedPeer(tp, args->dCert)){
+                        WOLFSSL_MSG("Found matching trusted peer cert");
+                        haveTrustPeer = 1;
+                    } else {
+                        WOLFSSL_MSG("Trusted peer cert did not match!");
+                        FreeDecodedCert(args->dCert);
+                        args->dCertInit = 0;
+                    }
+                }
+            #endif /* WOLFSSL_TRUST_PEER_CERT */
+
+                /* verify up to peer's first */
+                /* do not verify chain if trusted peer cert found */
+                while (args->count > 1
+                #ifdef WOLFSSL_TRUST_PEER_CERT
+                    && !haveTrustPeer
+                #endif /* WOLFSSL_TRUST_PEER_CERT */
+                ) {
+                    byte* subjectHash;
+
+                    args->certIdx = args->count - 1;
+
+                    if (!args->dCertInit) {
+                        InitDecodedCert(args->dCert,
+                            args->certs[args->certIdx].buffer,
+                            args->certs[args->certIdx].length, ssl->heap);
+                        args->dCert->sigCtx.devId = ssl->devId; /* setup async dev */
+                        args->dCertInit = 1;
+                    }
+
+                    ret = ParseCertRelative(args->dCert, CERT_TYPE,
+                                    !ssl->options.verifyNone, ssl->ctx->cm);
+                    if (ret != 0) {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPush(ssl,
+                                args->dCert->sigCtx.asyncDev,
+                                WC_ASYNC_FLAG_CALL_AGAIN);
+                        }
+                    #endif
+                        goto exit_dc;
+                    }
+
+                #ifndef NO_SKID
+                    subjectHash = args->dCert->extSubjKeyId;
+                #else
+                    subjectHash = args->dCert->subjectHash;
+                #endif
+
+                    /* Check key sizes for certs. Is redundent check since
+                       ProcessBuffer also performs this check. */
+                    if (!ssl->options.verifyNone) {
+                        switch (args->dCert->keyOID) {
+                        #ifndef NO_RSA
+                            case RSAk:
+                                if (ssl->options.minRsaKeySz < 0 ||
+                                        args->dCert->pubKeySize <
+                                         (word16)ssl->options.minRsaKeySz) {
+                                    WOLFSSL_MSG(
+                                        "RSA key size in cert chain error");
+                                    ret = RSA_KEY_SIZE_E;
+                                }
+                                break;
+                        #endif /* !NO_RSA */
+                        #ifdef HAVE_ECC
+                            case ECDSAk:
+                                if (ssl->options.minEccKeySz < 0 ||
+                                        args->dCert->pubKeySize <
+                                         (word16)ssl->options.minEccKeySz) {
+                                    WOLFSSL_MSG(
+                                        "ECC key size in cert chain error");
+                                    ret = ECC_KEY_SIZE_E;
+                                }
+                                break;
+                        #endif /* HAVE_ECC */
+                            default:
+                                WOLFSSL_MSG("Key size not checked");
+                                /* key not being checked for size if not in
+                                   switch */
+                                break;
+                        } /* switch (dCert->keyOID) */
+                    } /* if (!ssl->options.verifyNone) */
+
+                    if (ret == 0 && args->dCert->isCA == 0) {
+                        WOLFSSL_MSG("Chain cert is not a CA, not adding as one");
+                    }
+                    else if (ret == 0 && ssl->options.verifyNone) {
+                        WOLFSSL_MSG("Chain cert not verified by option, not adding as CA");
+                    }
+                    else if (ret == 0 && !AlreadySigner(ssl->ctx->cm, subjectHash)) {
+                        DerBuffer* add = NULL;
+                        ret = AllocDer(&add, args->certs[args->certIdx].length,
+                                                            CA_TYPE, ssl->heap);
+                        if (ret < 0)
+                            goto exit_dc;
+
+                        WOLFSSL_MSG("Adding CA from chain");
+
+                        XMEMCPY(add->buffer, args->certs[args->certIdx].buffer,
+                                             args->certs[args->certIdx].length);
+
+                        /* already verified above */
+                        ret = AddCA(ssl->ctx->cm, &add, WOLFSSL_CHAIN_CA, 0);
+                        if (ret == 1) {
+                            ret = 0;   /* SSL_SUCCESS for external */
+                        }
+                    }
+                    else if (ret != 0) {
+                        WOLFSSL_MSG("Failed to verify CA from chain");
+                    #ifdef OPENSSL_EXTRA
+                        ssl->peerVerifyRet = X509_V_ERR_INVALID_CA;
+                    #endif
+                    }
+                    else {
+                        WOLFSSL_MSG("Verified CA from chain and already had it");
+                    }
+
+            #if defined(HAVE_OCSP) || defined(HAVE_CRL)
+                    if (ret == 0) {
+                        int doCrlLookup = 1;
+                #ifdef HAVE_OCSP
+                    #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+                        if (ssl->status_request_v2) {
+                            ret = TLSX_CSR2_InitRequests(ssl->extensions,
+                                                    args->dCert, 0, ssl->heap);
+                        }
+                        else /* skips OCSP and force CRL check */
+                    #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
+                        if (ssl->ctx->cm->ocspEnabled &&
+                                            ssl->ctx->cm->ocspCheckAll) {
+                            WOLFSSL_MSG("Doing Non Leaf OCSP check");
+                            ret = CheckCertOCSP(ssl->ctx->cm->ocsp, args->dCert,
+                                                                          NULL);
+                            doCrlLookup = (ret == OCSP_CERT_UNKNOWN);
+                            if (ret != 0) {
+                                doCrlLookup = 0;
+                                WOLFSSL_MSG("\tOCSP Lookup not ok");
+                            }
+                        }
+                #endif /* HAVE_OCSP */
+
+                #ifdef HAVE_CRL
+                        if (ret == 0 && doCrlLookup &&
+                                    ssl->ctx->cm->crlEnabled &&
+                                                ssl->ctx->cm->crlCheckAll) {
+                            WOLFSSL_MSG("Doing Non Leaf CRL check");
+                            ret = CheckCertCRL(ssl->ctx->cm->crl, args->dCert);
+                            if (ret != 0) {
+                                WOLFSSL_MSG("\tCRL check not ok");
+                            }
+                        }
+                #endif /* HAVE_CRL */
+                        (void)doCrlLookup;
+                    }
+            #endif /* HAVE_OCSP || HAVE_CRL */
+
+                    if (ret != 0 && lastErr == 0) {
+                        lastErr = ret;   /* save error from last time */
+                    }
+
+                    FreeDecodedCert(args->dCert);
+                    args->dCertInit = 0;
+                    args->count--;
+                } /* while (count > 0 && !haveTrustPeer) */
+            } /* if (count > 0) */
+
+            /* Check for error */
+            if (ret != 0) {
+                goto exit_dc;
+            }
+
+            /* Advance state and proceed */
+            ssl->options.keyShareState = KEYSHARE_DO;
+        } /* case KEYSHARE_BUILD */
+
+        case KEYSHARE_DO:
+        {
+            /* peer's, may not have one if blank client cert sent by TLSv1.2 */
+            if (args->count > 0) {
+                int fatal  = 0;
+
+                WOLFSSL_MSG("Verifying Peer's cert");
+
+                args->certIdx = 0;
+
+                if (!args->dCertInit) {
+                    InitDecodedCert(args->dCert,
+                        args->certs[args->certIdx].buffer,
+                        args->certs[args->certIdx].length, ssl->heap);
+                    args->dCertInit = 1;
+                }
+
+            #ifdef WOLFSSL_TRUST_PEER_CERT
+                if (!haveTrustPeer)
+            #endif
+                { /* only parse if not already present in dCert from above */
+                    ret = ParseCertRelative(args->dCert, CERT_TYPE,
+                                    !ssl->options.verifyNone, ssl->ctx->cm);
+                    if (ret != 0) {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        if (ret == WC_PENDING_E) {
+                            ret = wolfSSL_AsyncPush(ssl,
+                                args->dCert->sigCtx.asyncDev,
+                                WC_ASYNC_FLAG_CALL_AGAIN);
+                        }
+                    #endif
+                        goto exit_dc;
+                    }
+                }
+
+                if (ret == 0) {
+                    WOLFSSL_MSG("Verified Peer's cert");
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_OK;
+                #endif
+                    fatal = 0;
         #ifdef OPENSSL_EXTRA
-            ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
         #endif
-            return ret;
-        }
-        ssl->options.havePeerCert = 1;
-
-#ifdef WOLFSSL_SMALL_STACK
-        domain = (char*)XMALLOC(ASN_NAME_MAX, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        if (domain == NULL) {
-            FreeDecodedCert(dCert);
-            XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            return MEMORY_E;
-        }
-#endif
-        /* store for callback use */
-        if (dCert->subjectCNLen < ASN_NAME_MAX) {
-            XMEMCPY(domain, dCert->subjectCN, dCert->subjectCNLen);
-            domain[dCert->subjectCNLen] = '\0';
-        }
-        else
-            domain[0] = '\0';
-
-        if (!ssl->options.verifyNone && ssl->buffers.domainName.buffer) {
-            if (MatchDomainName(dCert->subjectCN, dCert->subjectCNLen,
-                                (char*)ssl->buffers.domainName.buffer) == 0) {
-                WOLFSSL_MSG("DomainName match on common name failed");
-                if (CheckAltNames(dCert,
-                                 (char*)ssl->buffers.domainName.buffer) == 0 ) {
-                    WOLFSSL_MSG("DomainName match on alt names failed too");
-                    ret = DOMAIN_NAME_MISMATCH; /* try to get peer key still */
                 }
+                else if (ret == ASN_PARSE_E) {
+                    WOLFSSL_MSG("Got Peer cert ASN PARSE ERROR, fatal");
+                    fatal = 1;
+                }
+                else {
+                    WOLFSSL_MSG("Failed to verify Peer's cert");
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE;
+                #endif
+                    if (ssl->verifyCallback) {
+                        WOLFSSL_MSG(
+                            "\tCallback override available, will continue");
+                        fatal = 0;
+                    }
+                    else {
+                        WOLFSSL_MSG("\tNo callback override available, fatal");
+                        fatal = 1;
+                    }
+                }
+
+            #ifdef HAVE_SECURE_RENEGOTIATION
+                if (fatal == 0 && ssl->secure_renegotiation
+                               && ssl->secure_renegotiation->enabled) {
+
+                    if (IsEncryptionOn(ssl, 0)) {
+                        /* compare against previous time */
+                        if (XMEMCMP(args->dCert->subjectHash,
+                                    ssl->secure_renegotiation->subject_hash,
+                                    SHA_DIGEST_SIZE) != 0) {
+                            WOLFSSL_MSG(
+                                "Peer sent different cert during scr, fatal");
+                            fatal = 1;
+                            ret   = SCR_DIFFERENT_CERT_E;
+                        }
+                    }
+
+                    /* cache peer's hash */
+                    if (fatal == 0) {
+                        XMEMCPY(ssl->secure_renegotiation->subject_hash,
+                                args->dCert->subjectHash, SHA_DIGEST_SIZE);
+                    }
+                }
+            #endif /* HAVE_SECURE_RENEGOTIATION */
+
+            #if defined(HAVE_OCSP) || defined(HAVE_CRL)
+                if (fatal == 0) {
+                    int doLookup = 1;
+
+                    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+                #ifdef HAVE_CERTIFICATE_STATUS_REQUEST
+                        if (ssl->status_request) {
+                            fatal = TLSX_CSR_InitRequest(ssl->extensions,
+                                                    args->dCert, ssl->heap);
+                            doLookup = 0;
+                        }
+                #endif /* HAVE_CERTIFICATE_STATUS_REQUEST */
+                #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+                        if (ssl->status_request_v2) {
+                            fatal = TLSX_CSR2_InitRequests(ssl->extensions,
+                                                    args->dCert, 1, ssl->heap);
+                            doLookup = 0;
+                        }
+                #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
+                    }
+
+                #ifdef HAVE_OCSP
+                    if (doLookup && ssl->ctx->cm->ocspEnabled) {
+                        WOLFSSL_MSG("Doing Leaf OCSP check");
+                        ret = CheckCertOCSP(ssl->ctx->cm->ocsp,
+                                                            args->dCert, NULL);
+                        doLookup = (ret == OCSP_CERT_UNKNOWN);
+                        if (ret != 0) {
+                            WOLFSSL_MSG("\tOCSP Lookup not ok");
+                            fatal = 0;
+                        #ifdef OPENSSL_EXTRA
+                            ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                        #endif
+                        }
+                    }
+                #endif /* HAVE_OCSP */
+
+                #ifdef HAVE_CRL
+                    if (doLookup && ssl->ctx->cm->crlEnabled) {
+                        WOLFSSL_MSG("Doing Leaf CRL check");
+                        ret = CheckCertCRL(ssl->ctx->cm->crl, args->dCert);
+                        if (ret != 0) {
+                            WOLFSSL_MSG("\tCRL check not ok");
+                            fatal = 0;
+                        #ifdef OPENSSL_EXTRA
+                            ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                        #endif
+                        }
+                    }
+                #endif /* HAVE_CRL */
+                    (void)doLookup;
+                }
+            #endif /* HAVE_OCSP || HAVE_CRL */
+
+            #ifdef KEEP_PEER_CERT
+                {
+                    /* set X509 format for peer cert even if fatal */
+                    int copyRet = CopyDecodedToX509(&ssl->peerCert,
+                                                                args->dCert);
+                    if (copyRet == MEMORY_E)
+                        fatal = 1;
+                }
+            #endif /* KEEP_PEER_CERT */
+
+            #ifndef IGNORE_KEY_EXTENSIONS
+                if (args->dCert->extKeyUsageSet) {
+                    if ((ssl->specs.kea == rsa_kea) &&
+                        (ssl->options.side == WOLFSSL_CLIENT_END) &&
+                        (args->dCert->extKeyUsage & KEYUSE_KEY_ENCIPHER) == 0) {
+                        ret = KEYUSE_ENCIPHER_E;
+                    }
+                    if ((ssl->specs.sig_algo == rsa_sa_algo ||
+                            (ssl->specs.sig_algo == ecc_dsa_sa_algo &&
+                                 !ssl->specs.static_ecdh)) &&
+                        (args->dCert->extKeyUsage & KEYUSE_DIGITAL_SIG) == 0) {
+                        WOLFSSL_MSG("KeyUse Digital Sig not set");
+                        ret = KEYUSE_SIGNATURE_E;
+                    }
+                }
+
+                if (args->dCert->extExtKeyUsageSet) {
+                    if (ssl->options.side == WOLFSSL_CLIENT_END) {
+                        if ((args->dCert->extExtKeyUsage &
+                                (EXTKEYUSE_ANY | EXTKEYUSE_SERVER_AUTH)) == 0) {
+                            WOLFSSL_MSG("ExtKeyUse Server Auth not set");
+                            ret = EXTKEYUSE_AUTH_E;
+                        }
+                    }
+                    else {
+                        if ((args->dCert->extExtKeyUsage &
+                                (EXTKEYUSE_ANY | EXTKEYUSE_CLIENT_AUTH)) == 0) {
+                            WOLFSSL_MSG("ExtKeyUse Client Auth not set");
+                            ret = EXTKEYUSE_AUTH_E;
+                        }
+                    }
+                }
+            #endif /* IGNORE_KEY_EXTENSIONS */
+
+                if (fatal) {
+                    ssl->error = ret;
+                #ifdef OPENSSL_EXTRA
+                    ssl->peerVerifyRet = X509_V_ERR_CERT_REJECTED;
+                #endif
+                    goto exit_dc;
+                }
+
+                ssl->options.havePeerCert = 1;
+            } /* if (count > 0) */
+
+            /* Check for error */
+            if (ret != 0) {
+                goto exit_dc;
             }
-        }
 
-        /* decode peer key */
-        switch (dCert->keyOID) {
-        #ifndef NO_RSA
-            case RSAk:
-                {
-                    word32 idx = 0;
-                    int    keyRet = 0;
+            /* Advance state and proceed */
+            ssl->options.keyShareState = KEYSHARE_VERIFY;
+        } /* case KEYSHARE_DO */
+
+        case KEYSHARE_VERIFY:
+        {
+            if (args->count > 0) {
+                args->domain = (char*)XMALLOC(ASN_NAME_MAX, ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+                if (args->domain == NULL) {
+                    ERROR_OUT(MEMORY_E, exit_dc);
+                }
+
+                /* store for callback use */
+                if (args->dCert->subjectCNLen < ASN_NAME_MAX) {
+                    XMEMCPY(args->domain, args->dCert->subjectCN, args->dCert->subjectCNLen);
+                    args->domain[args->dCert->subjectCNLen] = '\0';
+                }
+                else {
+                    args->domain[0] = '\0';
+                }
+
+                if (!ssl->options.verifyNone && ssl->buffers.domainName.buffer) {
+                    if (MatchDomainName(args->dCert->subjectCN,
+                                args->dCert->subjectCNLen,
+                                (char*)ssl->buffers.domainName.buffer) == 0) {
+                        WOLFSSL_MSG("DomainName match on common name failed");
+                        if (CheckAltNames(args->dCert,
+                                 (char*)ssl->buffers.domainName.buffer) == 0 ) {
+                            WOLFSSL_MSG(
+                                "DomainName match on alt names failed too");
+                            /* try to get peer key still */
+                            ret = DOMAIN_NAME_MISMATCH;
+                        }
+                    }
+                }
+
+                /* decode peer key */
+                switch (args->dCert->keyOID) {
+                #ifndef NO_RSA
+                    case RSAk:
+                    {
+                        word32 keyIdx = 0;
+                        int keyRet = 0;
 
-                    if (ssl->peerRsaKey == NULL) {
-                        ssl->peerRsaKey = (RsaKey*)XMALLOC(sizeof(RsaKey),
-                                                   ssl->heap, DYNAMIC_TYPE_RSA);
                         if (ssl->peerRsaKey == NULL) {
-                            WOLFSSL_MSG("PeerRsaKey Memory error");
-                            keyRet = MEMORY_E;
-                        } else {
+                            keyRet = AllocKey(ssl, DYNAMIC_TYPE_RSA,
+                                                (void**)&ssl->peerRsaKey);
+                        } else if (ssl->peerRsaKeyPresent) {
+                            /* don't leak on reuse */
+                            wc_FreeRsaKey(ssl->peerRsaKey);
+                            ssl->peerRsaKeyPresent = 0;
                             keyRet = wc_InitRsaKey_ex(ssl->peerRsaKey,
-                                                       ssl->heap, ssl->devId);
+                                                    ssl->heap, ssl->devId);
                         }
-                    } else if (ssl->peerRsaKeyPresent) {
-                        /* don't leak on reuse */
-                        wc_FreeRsaKey(ssl->peerRsaKey);
-                        ssl->peerRsaKeyPresent = 0;
-                        keyRet = wc_InitRsaKey_ex(ssl->peerRsaKey, ssl->heap, ssl->devId);
-                    }
 
-                    if (keyRet != 0 || wc_RsaPublicKeyDecode(dCert->publicKey,
-                               &idx, ssl->peerRsaKey, dCert->pubKeySize) != 0) {
-                        ret = PEER_KEY_ERROR;
-                    }
-                    else {
-                        ssl->peerRsaKeyPresent = 1;
-                        #ifdef HAVE_PK_CALLBACKS
-                            #ifndef NO_RSA
-                                ssl->buffers.peerRsaKey.buffer =
-                                       (byte*)XMALLOC(dCert->pubKeySize,
-                                               ssl->heap, DYNAMIC_TYPE_RSA);
-                                if (ssl->buffers.peerRsaKey.buffer == NULL)
-                                    ret = MEMORY_ERROR;
-                                else {
-                                    XMEMCPY(ssl->buffers.peerRsaKey.buffer,
-                                           dCert->publicKey, dCert->pubKeySize);
-                                    ssl->buffers.peerRsaKey.length =
-                                            dCert->pubKeySize;
-                                }
-                            #endif /* NO_RSA */
-                        #endif /*HAVE_PK_CALLBACKS */
-                    }
+                        if (keyRet != 0 || wc_RsaPublicKeyDecode(
+                                args->dCert->publicKey, &keyIdx, ssl->peerRsaKey,
+                                                args->dCert->pubKeySize) != 0) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            ssl->peerRsaKeyPresent = 1;
+                    #ifdef HAVE_PK_CALLBACKS
+                        #ifndef NO_RSA
+                            ssl->buffers.peerRsaKey.buffer =
+                                   (byte*)XMALLOC(args->dCert->pubKeySize,
+                                                ssl->heap, DYNAMIC_TYPE_RSA);
+                            if (ssl->buffers.peerRsaKey.buffer == NULL) {
+                                ret = MEMORY_ERROR;
+                            }
+                            else {
+                                XMEMCPY(ssl->buffers.peerRsaKey.buffer,
+                                        args->dCert->publicKey,
+                                        args->dCert->pubKeySize);
+                                ssl->buffers.peerRsaKey.length =
+                                    args->dCert->pubKeySize;
+                            }
+                        #endif /* NO_RSA */
+                    #endif /* HAVE_PK_CALLBACKS */
+                        }
 
-                    /* check size of peer RSA key */
-                    if (ret == 0 && ssl->peerRsaKeyPresent &&
-                                              !ssl->options.verifyNone &&
-                                              wc_RsaEncryptSize(ssl->peerRsaKey)
+                        /* check size of peer RSA key */
+                        if (ret == 0 && ssl->peerRsaKeyPresent &&
+                                          !ssl->options.verifyNone &&
+                                          wc_RsaEncryptSize(ssl->peerRsaKey)
                                               < ssl->options.minRsaKeySz) {
-                        ret = RSA_KEY_SIZE_E;
-                        WOLFSSL_MSG("Peer RSA key is too small");
-                    }
-
-                }
-                break;
-        #endif /* NO_RSA */
-        #ifdef HAVE_NTRU
-            case NTRUk:
-                {
-                    if (dCert->pubKeySize > sizeof(ssl->peerNtruKey)) {
-                        ret = PEER_KEY_ERROR;
-                    }
-                    else {
-                        XMEMCPY(ssl->peerNtruKey, dCert->publicKey,
-                                                             dCert->pubKeySize);
-                        ssl->peerNtruKeyLen = (word16)dCert->pubKeySize;
-                        ssl->peerNtruKeyPresent = 1;
-                    }
-                }
-                break;
-        #endif /* HAVE_NTRU */
-        #ifdef HAVE_ECC
-            case ECDSAk:
-                {
-                    int curveId;
-                    if (ssl->peerEccDsaKey == NULL) {
-                        /* alloc/init on demand */
-                        ssl->peerEccDsaKey = (ecc_key*)XMALLOC(sizeof(ecc_key),
-                                              ssl->heap, DYNAMIC_TYPE_ECC);
-                        if (ssl->peerEccDsaKey == NULL) {
-                            WOLFSSL_MSG("PeerEccDsaKey Memory error");
-                            return MEMORY_E;
+                            ret = RSA_KEY_SIZE_E;
+                            WOLFSSL_MSG("Peer RSA key is too small");
                         }
-                        wc_ecc_init_ex(ssl->peerEccDsaKey, ssl->heap,
-                                                                ssl->devId);
-                    } else if (ssl->peerEccDsaKeyPresent) {
-                        /* don't leak on reuse */
-                        wc_ecc_free(ssl->peerEccDsaKey);
-                        ssl->peerEccDsaKeyPresent = 0;
-                        wc_ecc_init_ex(ssl->peerEccDsaKey, ssl->heap,
-                                                                ssl->devId);
+                        break;
                     }
+                #endif /* NO_RSA */
+                #ifdef HAVE_NTRU
+                    case NTRUk:
+                    {
+                        if (args->dCert->pubKeySize > sizeof(ssl->peerNtruKey)) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            XMEMCPY(ssl->peerNtruKey, args->dCert->publicKey,
+                                                      args->dCert->pubKeySize);
+                            ssl->peerNtruKeyLen =
+                                (word16)args->dCert->pubKeySize;
+                            ssl->peerNtruKeyPresent = 1;
+                        }
+                        break;
+                    }
+                #endif /* HAVE_NTRU */
+                #ifdef HAVE_ECC
+                    case ECDSAk:
+                    {
+                        int curveId;
+                        if (ssl->peerEccDsaKey == NULL) {
+                            /* alloc/init on demand */
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                    (void**)&ssl->peerEccDsaKey);
+                        } else if (ssl->peerEccDsaKeyPresent) {
+                            /* don't leak on reuse */
+                            wc_ecc_free(ssl->peerEccDsaKey);
+                            ssl->peerEccDsaKeyPresent = 0;
+                            ret = wc_ecc_init_ex(ssl->peerEccDsaKey,
+                                                    ssl->heap, ssl->devId);
+                        }
+                        if (ret != 0) {
+                            break;
+                        }
 
-                    curveId = wc_ecc_get_oid(dCert->keyOID, NULL, NULL);
-                    if (wc_ecc_import_x963_ex(dCert->publicKey,
-                        dCert->pubKeySize, ssl->peerEccDsaKey, curveId) != 0) {
-                        ret = PEER_KEY_ERROR;
-                    }
-                    else {
-                        ssl->peerEccDsaKeyPresent = 1;
-                        #ifdef HAVE_PK_CALLBACKS
-                            #ifdef HAVE_ECC
-                                ssl->buffers.peerEccDsaKey.buffer =
-                                       (byte*)XMALLOC(dCert->pubKeySize,
-                                               ssl->heap, DYNAMIC_TYPE_ECC);
-                                if (ssl->buffers.peerEccDsaKey.buffer == NULL)
-                                    ret = MEMORY_ERROR;
-                                else {
-                                    XMEMCPY(ssl->buffers.peerEccDsaKey.buffer,
-                                           dCert->publicKey, dCert->pubKeySize);
-                                    ssl->buffers.peerEccDsaKey.length =
-                                            dCert->pubKeySize;
-                                }
-                            #endif /* HAVE_ECC */
-                        #endif /*HAVE_PK_CALLBACKS */
-                    }
+                        curveId = wc_ecc_get_oid(args->dCert->keyOID, NULL, NULL);
+                        if (wc_ecc_import_x963_ex(args->dCert->publicKey,
+                                    args->dCert->pubKeySize, ssl->peerEccDsaKey,
+                                                            curveId) != 0) {
+                            ret = PEER_KEY_ERROR;
+                        }
+                        else {
+                            ssl->peerEccDsaKeyPresent = 1;
+                    #ifdef HAVE_PK_CALLBACKS
+                        #ifdef HAVE_ECC
+                            ssl->buffers.peerEccDsaKey.buffer =
+                                   (byte*)XMALLOC(args->dCert->pubKeySize,
+                                           ssl->heap, DYNAMIC_TYPE_ECC);
+                            if (ssl->buffers.peerEccDsaKey.buffer == NULL)
+                                ret = MEMORY_ERROR;
+                            else {
+                                XMEMCPY(ssl->buffers.peerEccDsaKey.buffer,
+                                        args->dCert->publicKey,
+                                        args->dCert->pubKeySize);
+                                ssl->buffers.peerEccDsaKey.length =
+                                        args->dCert->pubKeySize;
+                            }
+                        #endif /* HAVE_ECC */
+                    #endif /*HAVE_PK_CALLBACKS */
+                        }
 
-                    /* check size of peer ECC key */
-                    if (ret == 0 && ssl->peerEccDsaKeyPresent &&
+                        /* check size of peer ECC key */
+                        if (ret == 0 && ssl->peerEccDsaKeyPresent &&
                                               !ssl->options.verifyNone &&
                                               wc_ecc_size(ssl->peerEccDsaKey)
                                               < ssl->options.minEccKeySz) {
-                        ret = ECC_KEY_SIZE_E;
-                        WOLFSSL_MSG("Peer ECC key is too small");
+                            ret = ECC_KEY_SIZE_E;
+                            WOLFSSL_MSG("Peer ECC key is too small");
+                        }
+                        break;
                     }
-
+                #endif /* HAVE_ECC */
+                    default:
+                        break;
                 }
-                break;
-        #endif /* HAVE_ECC */
-            default:
-                break;
-        }
 
-        FreeDecodedCert(dCert);
-    }
+                FreeDecodedCert(args->dCert);
+                args->dCertInit = 0;
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(dCert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(sizeof(WOLFSSL_X509_STORE_CTX),
-                                                 NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (store == NULL) {
-        XFREE(domain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-    XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
-
-    if (anyError != 0 && ret == 0)
-        ret = anyError;
-
-    if (ret != 0) {
-        if (!ssl->options.verifyNone) {
-            int why = bad_certificate;
-
-            if (ret == ASN_AFTER_DATE_E || ret == ASN_BEFORE_DATE_E)
-                why = certificate_expired;
-            if (ssl->verifyCallback) {
-                int ok;
-
-                store->error = ret;
-                store->error_depth = totalCerts;
-                store->discardSessionCerts = 0;
-                store->domain = domain;
-                store->userCtx = ssl->verifyCbCtx;
-                store->certs = certs;
-                store->totalCerts = totalCerts;
-#ifdef KEEP_PEER_CERT
-                if (ssl->peerCert.subject.sz > 0)
-                    store->current_cert = &ssl->peerCert;
-                else
-                    store->current_cert = NULL;
-#else
-                store->current_cert = NULL;
-#endif
-#if defined(HAVE_EX_DATA) || defined(HAVE_FORTRESS)
-                store->ex_data = ssl;
-#endif
-                ok = ssl->verifyCallback(0, store);
-                if (ok) {
-                    WOLFSSL_MSG("Verify callback overriding error!");
-                    ret = 0;
+                /* release since we don't need it anymore */
+                if (args->dCert) {
+                    XFREE(args->dCert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    args->dCert = NULL;
                 }
-                #ifdef SESSION_CERTS
-                if (store->discardSessionCerts) {
-                    WOLFSSL_MSG("Verify callback requested discard sess certs");
-                    ssl->session.chain.count = 0;
-                }
-                #endif
-            }
+            } /* if (count > 0) */
+
+            /* Check for error */
             if (ret != 0) {
-                SendAlert(ssl, alert_fatal, why);   /* try to send */
-                ssl->options.isClosed = 1;
+                goto exit_dc;
             }
+
+            /* Advance state and proceed */
+            ssl->options.keyShareState = KEYSHARE_FINALIZE;
+        } /* case KEYSHARE_VERIFY */
+
+        case KEYSHARE_FINALIZE:
+        {
+        #ifdef WOLFSSL_SMALL_STACK
+            WOLFSSL_X509_STORE_CTX* store = (WOLFSSL_X509_STORE_CTX*)XMALLOC(
+                                    sizeof(WOLFSSL_X509_STORE_CTX), ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+            if (store == NULL) {
+                ERROR_OUT(MEMORY_E, exit_dc);
+            }
+        #else
+            WOLFSSL_X509_STORE_CTX  store[1];
+        #endif
+
+            XMEMSET(store, 0, sizeof(WOLFSSL_X509_STORE_CTX));
+
+            /* load last error */
+            if (lastErr != 0 && ret == 0) {
+                ret = lastErr;
+            }
+
+            if (ret != 0) {
+                if (!ssl->options.verifyNone) {
+                    int why = bad_certificate;
+
+                    if (ret == ASN_AFTER_DATE_E || ret == ASN_BEFORE_DATE_E) {
+                        why = certificate_expired;
+                    }
+                    if (ssl->verifyCallback) {
+                        int ok;
+
+                        store->error = ret;
+                        store->error_depth = args->totalCerts;
+                        store->discardSessionCerts = 0;
+                        store->domain = args->domain;
+                        store->userCtx = ssl->verifyCbCtx;
+                        store->certs = args->certs;
+                        store->totalCerts = args->totalCerts;
+                    #ifdef KEEP_PEER_CERT
+                        if (ssl->peerCert.subject.sz > 0)
+                            store->current_cert = &ssl->peerCert;
+                        else
+                            store->current_cert = NULL;
+                    #else
+                        store->current_cert = NULL;
+                    #endif /* KEEP_PEER_CERT */
+                    #if defined(HAVE_EX_DATA) || defined(HAVE_FORTRESS)
+                        store->ex_data = ssl;
+                    #endif
+                        ok = ssl->verifyCallback(0, store);
+                        if (ok) {
+                            WOLFSSL_MSG("Verify callback overriding error!");
+                            ret = 0;
+                        }
+                    #ifdef SESSION_CERTS
+                        if (store->discardSessionCerts) {
+                            WOLFSSL_MSG("Verify callback requested discard sess certs");
+                            ssl->session.chain.count = 0;
+                        }
+                    #endif /* SESSION_CERTS */
+                    }
+                    if (ret != 0) {
+                        SendAlert(ssl, alert_fatal, why);   /* try to send */
+                        ssl->options.isClosed = 1;
+                    }
+                }
+                ssl->error = ret;
+            }
+        #ifdef WOLFSSL_ALWAYS_VERIFY_CB
+            else {
+                if (ssl->verifyCallback) {
+                    int ok;
+
+                    store->error = ret;
+                #ifdef WOLFSSL_WPAS
+                    store->error_depth = 0;
+                #else
+                    store->error_depth = totalCerts;
+                #endif
+                    store->discardSessionCerts = 0;
+                    store->domain = args->domain;
+                    store->userCtx = ssl->verifyCbCtx;
+                    store->certs = args->certs;
+                    store->totalCerts = args->totalCerts;
+                #ifdef KEEP_PEER_CERT
+                    if (ssl->peerCert.subject.sz > 0)
+                        store->current_cert = &ssl->peerCert;
+                    else
+                        store->current_cert = NULL;
+                #endif
+                    store->ex_data = ssl;
+
+                    ok = ssl->verifyCallback(1, store);
+                    if (!ok) {
+                        WOLFSSL_MSG("Verify callback overriding valid certificate!");
+                        ret = -1;
+                        SendAlert(ssl, alert_fatal, bad_certificate);
+                        ssl->options.isClosed = 1;
+                    }
+                #ifdef SESSION_CERTS
+                    if (store->discardSessionCerts) {
+                        WOLFSSL_MSG("Verify callback requested discard sess certs");
+                        ssl->session.chain.count = 0;
+                    }
+                #endif /* SESSION_CERTS */
+                }
+            }
+        #endif /* WOLFSSL_ALWAYS_VERIFY_CB */
+
+            if (ssl->options.verifyNone &&
+                                      (ret == CRL_MISSING || ret == CRL_CERT_REVOKED)) {
+                WOLFSSL_MSG("Ignoring CRL problem based on verify setting");
+                ret = ssl->error = 0;
+            }
+
+            if (ret == 0 && ssl->options.side == WOLFSSL_CLIENT_END) {
+                ssl->options.serverState = SERVER_CERT_COMPLETE;
+            }
+
+            if (IsEncryptionOn(ssl, 0)) {
+                args->idx += ssl->keys.padSz;
+            }
+
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(store, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            /* Advance state and proceed */
+            ssl->options.keyShareState = KEYSHARE_END;
+        } /* case KEYSHARE_FINALIZE */
+
+        case KEYSHARE_END:
+        {
+            /* Set final index */
+            *inOutIdx = args->idx;
+
+            break;
         }
-        ssl->error = ret;
+        default:
+            ret = INPUT_CASE_ERROR;
+            break;
+    } /* switch(ssl->options.keyShareState) */
+
+exit_dc:
+
+    WOLFSSL_LEAVE("DoCertificate", ret);
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Handle WC_PENDING_E */
+    if (ret == WC_PENDING_E) {
+        /* Mark message as not recevied so it can process again */
+        ssl->msgsReceived.got_certificate = 0;
+
+        return ret;
     }
-#ifdef WOLFSSL_ALWAYS_VERIFY_CB
-    else {
-        if (ssl->verifyCallback) {
-            int ok;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
-            store->error = ret;
-#ifdef WOLFSSL_WPAS
-            store->error_depth = 0;
-#else
-            store->error_depth = totalCerts;
-#endif
-            store->discardSessionCerts = 0;
-            store->domain = domain;
-            store->userCtx = ssl->verifyCbCtx;
-            store->certs = certs;
-            store->totalCerts = totalCerts;
-#ifdef KEEP_PEER_CERT
-            if (ssl->peerCert.subject.sz > 0)
-                store->current_cert = &ssl->peerCert;
-            else
-                store->current_cert = NULL;
-#endif
-            store->ex_data = ssl;
-
-            ok = ssl->verifyCallback(1, store);
-            if (!ok) {
-                WOLFSSL_MSG("Verify callback overriding valid certificate!");
-                ret = -1;
-                SendAlert(ssl, alert_fatal, bad_certificate);
-                ssl->options.isClosed = 1;
-            }
-            #ifdef SESSION_CERTS
-            if (store->discardSessionCerts) {
-                WOLFSSL_MSG("Verify callback requested discard sess certs");
-                ssl->session.chain.count = 0;
-            }
-            #endif
-        }
-    }
-#endif
-
-    if (ssl->options.verifyNone &&
-                              (ret == CRL_MISSING || ret == CRL_CERT_REVOKED)) {
-        WOLFSSL_MSG("Ignoring CRL problem based on verify setting");
-        ret = ssl->error = 0;
-    }
-
-    if (ret == 0 && ssl->options.side == WOLFSSL_CLIENT_END)
-        ssl->options.serverState = SERVER_CERT_COMPLETE;
-
-    if (IsEncryptionOn(ssl, 0)) {
-        *inOutIdx += ssl->keys.padSz;
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(store,  NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(domain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    FreeDoCertArgs(ssl, args);
+    FreeKeyExchange(ssl);
 
     return ret;
 }
@@ -7385,9 +7719,9 @@ static int DoCertificateStatus(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                 return BAD_CERTIFICATE_STATUS_ERROR; /* not expected */
 
             #ifdef WOLFSSL_SMALL_STACK
-                status = (CertStatus*)XMALLOC(sizeof(CertStatus), NULL,
+                status = (CertStatus*)XMALLOC(sizeof(CertStatus), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), NULL,
+                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
 
                 if (status == NULL || response == NULL) {
@@ -7416,8 +7750,8 @@ static int DoCertificateStatus(WOLFSSL* ssl, byte* input, word32* inOutIdx,
             *inOutIdx += status_length;
 
             #ifdef WOLFSSL_SMALL_STACK
-                XFREE(status,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                XFREE(response, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(status,   ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(response, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
             #endif
 
         }
@@ -7450,16 +7784,16 @@ static int DoCertificateStatus(WOLFSSL* ssl, byte* input, word32* inOutIdx,
             } while(0);
 
             #ifdef WOLFSSL_SMALL_STACK
-                status = (CertStatus*)XMALLOC(sizeof(CertStatus), NULL,
+                status = (CertStatus*)XMALLOC(sizeof(CertStatus), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
-                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), NULL,
+                response = (OcspResponse*)XMALLOC(sizeof(OcspResponse), ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
 
                 if (status == NULL || response == NULL) {
                     if (status)
-                        XFREE(status, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        XFREE(status, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                     if (response)
-                        XFREE(response, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        XFREE(response, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 
                     return MEMORY_ERROR;
                 }
@@ -8048,12 +8382,12 @@ static int DoHandShakeMsgType(WOLFSSL* ssl, byte* input, word32* inOutIdx,
 
     case server_hello_done:
         WOLFSSL_MSG("processing server hello done");
-        #ifdef WOLFSSL_CALLBACKS
-            if (ssl->hsInfoOn)
-                AddPacketName("ServerHelloDone", &ssl->handShakeInfo);
-            if (ssl->toInfoOn)
-                AddLateName("ServerHelloDone", &ssl->timeoutInfo);
-        #endif
+    #ifdef WOLFSSL_CALLBACKS
+        if (ssl->hsInfoOn)
+            AddPacketName("ServerHelloDone", &ssl->handShakeInfo);
+        if (ssl->toInfoOn)
+            AddLateName("ServerHelloDone", &ssl->timeoutInfo);
+    #endif
         ssl->options.serverState = SERVER_HELLODONE_COMPLETE;
         if (IsEncryptionOn(ssl, 0)) {
             *inOutIdx += ssl->keys.padSz;
@@ -8806,185 +9140,250 @@ static int ChachaAEADDecrypt(WOLFSSL* ssl, byte* plain, const byte* input,
 #endif /* HAVE_AEAD */
 
 
-static INLINE int Encrypt(WOLFSSL* ssl, byte* out, const byte* input, word16 sz)
+static INLINE int EncryptDo(WOLFSSL* ssl, byte* out, const byte* input,
+    word16 sz, int asyncOkay)
 {
     int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV* asyncDev = NULL;
+    word32 event_flags = WC_ASYNC_FLAG_CALL_AGAIN;
+#else
+    (void)asyncOkay;
+#endif
 
     (void)out;
     (void)input;
     (void)sz;
 
-    if (ssl->encrypt.setup == 0) {
-        WOLFSSL_MSG("Encrypt ciphers not setup");
-        return ENCRYPT_ERROR;
-    }
-
-#ifdef HAVE_FUZZER
-    if (ssl->fuzzerCb)
-        ssl->fuzzerCb(ssl, input, sz, FUZZ_ENCRYPT, ssl->fuzzerCtx);
-#endif
-
     switch (ssl->specs.bulk_cipher_algorithm) {
-        #ifdef BUILD_ARC4
-            case wolfssl_rc4:
-                wc_Arc4Process(ssl->encrypt.arc4, out, input, sz);
-                break;
+    #ifdef BUILD_ARC4
+        case wolfssl_rc4:
+            wc_Arc4Process(ssl->encrypt.arc4, out, input, sz);
+            break;
+    #endif
+
+    #ifdef BUILD_DES3
+        case wolfssl_triple_des:
+            ret = wc_Des3_CbcEncrypt(ssl->encrypt.des3, out, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                asyncDev = &ssl->encrypt.des3->asyncDev;
+                if (asyncOkay)
+                    ret = wolfSSL_AsyncPush(ssl, asyncDev, event_flags);
+            }
         #endif
+            break;
+    #endif
 
-        #ifdef BUILD_DES3
-            case wolfssl_triple_des:
-                ret = wc_Des3_CbcEncrypt(ssl->encrypt.des3, out, input, sz);
+    #ifdef BUILD_AES
+        case wolfssl_aes:
+            ret = wc_AesCbcEncrypt(ssl->encrypt.aes, out, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                asyncDev = &ssl->encrypt.aes->asyncDev;
+                if (asyncOkay)
+                    ret = wolfSSL_AsyncPush(ssl, asyncDev, event_flags);
                 break;
+            }
         #endif
+            break;
+    #endif
 
-        #ifdef BUILD_AES
-            case wolfssl_aes:
-                ret = wc_AesCbcEncrypt(ssl->encrypt.aes, out, input, sz);
-                break;
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        case wolfssl_aes_gcm:
+        case wolfssl_aes_ccm:/* GCM AEAD macros use same size as CCM */
+        {
+            wc_AesAuthEncryptFunc aes_auth_fn;
+        #if defined(BUILD_AESGCM) && defined(HAVE_AESCCM)
+            aes_auth_fn = (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
+                            ? wc_AesGcmEncrypt : wc_AesCcmEncrypt;
+        #elif defined(BUILD_AESGCM)
+            aes_auth_fn = wc_AesGcmEncrypt;
+        #else
+            aes_auth_fn = wc_AesCcmEncrypt;
         #endif
+            const byte* additionalSrc = input - 5;
 
-        #ifdef BUILD_AESGCM
-            case wolfssl_aes_gcm:
-                {
-                    byte additional[AEAD_AUTH_DATA_SZ];
-                    byte nonce[AESGCM_NONCE_SZ];
-                    const byte* additionalSrc = input - 5;
+            XMEMSET(ssl->encrypt.additional, 0, AEAD_AUTH_DATA_SZ);
 
-                    XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
+            /* sequence number field is 64-bits */
+            WriteSEQ(ssl, CUR_ORDER, ssl->encrypt.additional);
 
-                    /* sequence number field is 64-bits */
-                    WriteSEQ(ssl, CUR_ORDER, additional);
-
-                    /* Store the type, version. Unfortunately, they are in
-                     * the input buffer ahead of the plaintext. */
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls) {
-                            additionalSrc -= DTLS_HANDSHAKE_EXTRA;
-                        }
-                    #endif
-                    XMEMCPY(additional + AEAD_TYPE_OFFSET, additionalSrc, 3);
-
-                    /* Store the length of the plain text minus the explicit
-                     * IV length minus the authentication tag size. */
-                    c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                                additional + AEAD_LEN_OFFSET);
-                    XMEMCPY(nonce,
-                                 ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
-                    XMEMCPY(nonce + AESGCM_IMP_IV_SZ,
-                                     ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
-                    ret = wc_AesGcmEncrypt(ssl->encrypt.aes,
-                               out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
-                               sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                 nonce, AESGCM_NONCE_SZ,
-                                 out + sz - ssl->specs.aead_mac_size,
-                                 ssl->specs.aead_mac_size,
-                                 additional, AEAD_AUTH_DATA_SZ);
-                    AeadIncrementExpIV(ssl);
-                    ForceZero(nonce, AESGCM_NONCE_SZ);
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls)
-                            DtlsSEQIncrement(ssl, CUR_ORDER);
-                    #endif
-                }
-                break;
+            /* Store the type, version. Unfortunately, they are in
+             * the input buffer ahead of the plaintext. */
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls) {
+                additionalSrc -= DTLS_HANDSHAKE_EXTRA;
+            }
         #endif
+            XMEMCPY(ssl->encrypt.additional + AEAD_TYPE_OFFSET,
+                                                        additionalSrc, 3);
 
-        #ifdef HAVE_AESCCM
-            /* AEAD CCM uses same size as macros for AESGCM */
-            case wolfssl_aes_ccm:
-                {
-                    byte additional[AEAD_AUTH_DATA_SZ];
-                    byte nonce[AESGCM_NONCE_SZ];
-                    const byte* additionalSrc = input - 5;
-
-                    XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
-
-                    /* sequence number field is 64-bits */
-                    WriteSEQ(ssl, CUR_ORDER, additional);
-
-                    /* Store the type, version. Unfortunately, they are in
-                     * the input buffer ahead of the plaintext. */
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls) {
-                            additionalSrc -= DTLS_HANDSHAKE_EXTRA;
-                        }
-                    #endif
-                    XMEMCPY(additional + AEAD_TYPE_OFFSET, additionalSrc, 3);
-
-                    /* Store the length of the plain text minus the explicit
-                     * IV length minus the authentication tag size. */
-                    c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                                additional + AEAD_LEN_OFFSET);
-                    XMEMCPY(nonce,
-                                 ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
-                    XMEMCPY(nonce + AESGCM_IMP_IV_SZ,
-                                     ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
-                    ret = wc_AesCcmEncrypt(ssl->encrypt.aes,
-                        out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
-                            sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                        nonce, AESGCM_NONCE_SZ,
-                        out + sz - ssl->specs.aead_mac_size,
-                        ssl->specs.aead_mac_size,
-                        additional, AEAD_AUTH_DATA_SZ);
-                    AeadIncrementExpIV(ssl);
-                    ForceZero(nonce, AESGCM_NONCE_SZ);
-                    #ifdef WOLFSSL_DTLS
-                        if (ssl->options.dtls)
-                            DtlsSEQIncrement(ssl, CUR_ORDER);
-                    #endif
-                }
-                break;
+            /* Store the length of the plain text minus the explicit
+             * IV length minus the authentication tag size. */
+            c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                                ssl->encrypt.additional + AEAD_LEN_OFFSET);
+            XMEMCPY(ssl->encrypt.nonce,
+                                ssl->keys.aead_enc_imp_IV, AESGCM_IMP_IV_SZ);
+            XMEMCPY(ssl->encrypt.nonce + AESGCM_IMP_IV_SZ,
+                                ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+            ret = aes_auth_fn(ssl->encrypt.aes,
+                    out + AESGCM_EXP_IV_SZ, input + AESGCM_EXP_IV_SZ,
+                    sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                    ssl->encrypt.nonce, AESGCM_NONCE_SZ,
+                    out + sz - ssl->specs.aead_mac_size,
+                    ssl->specs.aead_mac_size,
+                    ssl->encrypt.additional, AEAD_AUTH_DATA_SZ);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                asyncDev = &ssl->encrypt.aes->asyncDev;
+                if (asyncOkay)
+                    ret = wolfSSL_AsyncPush(ssl, asyncDev, event_flags);
+            }
         #endif
+        }
+        break;
+    #endif /* BUILD_AESGCM || HAVE_AESCCM */
 
-        #ifdef HAVE_CAMELLIA
-            case wolfssl_camellia:
-                wc_CamelliaCbcEncrypt(ssl->encrypt.cam, out, input, sz);
-                break;
-        #endif
+    #ifdef HAVE_CAMELLIA
+        case wolfssl_camellia:
+            wc_CamelliaCbcEncrypt(ssl->encrypt.cam, out, input, sz);
+            break;
+    #endif
 
-        #ifdef HAVE_HC128
-            case wolfssl_hc128:
-                ret = wc_Hc128_Process(ssl->encrypt.hc128, out, input, sz);
-                break;
-        #endif
+    #ifdef HAVE_HC128
+        case wolfssl_hc128:
+            ret = wc_Hc128_Process(ssl->encrypt.hc128, out, input, sz);
+            break;
+    #endif
 
-        #ifdef BUILD_RABBIT
-            case wolfssl_rabbit:
-                ret = wc_RabbitProcess(ssl->encrypt.rabbit, out, input, sz);
-                break;
-        #endif
+    #ifdef BUILD_RABBIT
+        case wolfssl_rabbit:
+            ret = wc_RabbitProcess(ssl->encrypt.rabbit, out, input, sz);
+            break;
+    #endif
 
-        #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
-            case wolfssl_chacha:
-                ret = ChachaAEADEncrypt(ssl, out, input, sz);
-                break;
-        #endif
+    #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
+        case wolfssl_chacha:
+            ret = ChachaAEADEncrypt(ssl, out, input, sz);
+            break;
+    #endif
 
-        #ifdef HAVE_NULL_CIPHER
-            case wolfssl_cipher_null:
-                if (input != out) {
-                    XMEMMOVE(out, input, sz);
-                }
-                break;
-        #endif
+    #ifdef HAVE_NULL_CIPHER
+        case wolfssl_cipher_null:
+            if (input != out) {
+                XMEMMOVE(out, input, sz);
+            }
+            break;
+    #endif
 
-        #ifdef HAVE_IDEA
-            case wolfssl_idea:
-                ret = wc_IdeaCbcEncrypt(ssl->encrypt.idea, out, input, sz);
-                break;
-        #endif
+    #ifdef HAVE_IDEA
+        case wolfssl_idea:
+            ret = wc_IdeaCbcEncrypt(ssl->encrypt.idea, out, input, sz);
+            break;
+    #endif
 
-            default:
-                WOLFSSL_MSG("wolfSSL Encrypt programming error");
-                ret = ENCRYPT_ERROR;
+        default:
+            WOLFSSL_MSG("wolfSSL Encrypt programming error");
+            ret = ENCRYPT_ERROR;
     }
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* if async is not okay, then block */
+    if (ret == WC_PENDING_E && !asyncOkay) {
+        ret = wc_AsyncWait(ret, asyncDev, event_flags);
+    }
+#endif
+
     return ret;
 }
 
+static INLINE int Encrypt(WOLFSSL* ssl, byte* out, const byte* input, word16 sz,
+    int asyncOkay)
+{
+    int ret = 0;
 
+    if (asyncOkay && ssl->error == WC_PENDING_E) {
+        ssl->error = 0; /* clear async */
+    }
 
-static INLINE int Decrypt(WOLFSSL* ssl, byte* plain, const byte* input,
+    switch (ssl->encrypt.state) {
+        case CIPHER_STATE_BEGIN:
+        {
+            if (ssl->encrypt.setup == 0) {
+                WOLFSSL_MSG("Encrypt ciphers not setup");
+                return ENCRYPT_ERROR;
+            }
+
+        #ifdef HAVE_FUZZER
+            if (ssl->fuzzerCb)
+                ssl->fuzzerCb(ssl, input, sz, FUZZ_ENCRYPT, ssl->fuzzerCtx);
+        #endif
+
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            /* make sure AES GCM/CCM memory is allocated */
+            /* free for these happens in FreeCiphers */
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm) {
+                /* make sure auth iv and auth are allocated */
+                if (ssl->encrypt.additional == NULL)
+                    ssl->encrypt.additional = (byte*)XMALLOC(AEAD_AUTH_DATA_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->encrypt.nonce == NULL)
+                    ssl->encrypt.nonce = (byte*)XMALLOC(AESGCM_NONCE_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->encrypt.additional == NULL ||
+                         ssl->encrypt.nonce == NULL) {
+                    return MEMORY_E;
+                }
+            }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
+
+            /* Advance state and proceed */
+            ssl->encrypt.state = CIPHER_STATE_DO;
+        }
+        case CIPHER_STATE_DO:
+        {
+            ret = EncryptDo(ssl, out, input, sz, asyncOkay);
+
+            /* Advance state */
+            ssl->encrypt.state = CIPHER_STATE_END;
+
+            /* If pending, then leave and return will resume below */
+            if (ret == WC_PENDING_E) {
+                return ret;
+            }
+        }
+
+        case CIPHER_STATE_END:
+        {
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
+            {
+                /* finalize authentication cipher */
+                AeadIncrementExpIV(ssl);
+
+                if (ssl->encrypt.nonce)
+                    ForceZero(ssl->encrypt.nonce, AESGCM_NONCE_SZ);
+
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+            }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
+            break;
+        }
+    }
+
+    /* Reset state */
+    ssl->encrypt.state = CIPHER_STATE_BEGIN;
+
+    return ret;
+}
+
+static INLINE int DecryptDo(WOLFSSL* ssl, byte* plain, const byte* input,
                            word16 sz)
 {
     int ret = 0;
@@ -8993,144 +9392,224 @@ static INLINE int Decrypt(WOLFSSL* ssl, byte* plain, const byte* input,
     (void)input;
     (void)sz;
 
-    if (ssl->decrypt.setup == 0) {
-        WOLFSSL_MSG("Decrypt ciphers not setup");
-        return DECRYPT_ERROR;
+    switch (ssl->specs.bulk_cipher_algorithm)
+    {
+    #ifdef BUILD_ARC4
+        case wolfssl_rc4:
+            wc_Arc4Process(ssl->decrypt.arc4, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef BUILD_DES3
+        case wolfssl_triple_des:
+            ret = wc_Des3_CbcDecrypt(ssl->decrypt.des3, plain, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                ret = wolfSSL_AsyncPush(ssl, &ssl->decrypt.des3->asyncDev,
+                                                    WC_ASYNC_FLAG_CALL_AGAIN);
+            }
+        #endif
+            break;
+    #endif
+
+    #ifdef BUILD_AES
+        case wolfssl_aes:
+            ret = wc_AesCbcDecrypt(ssl->decrypt.aes, plain, input, sz);
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            if (ret == WC_PENDING_E) {
+                ret = wolfSSL_AsyncPush(ssl, &ssl->decrypt.aes->asyncDev,
+                                                    WC_ASYNC_FLAG_CALL_AGAIN);
+            }
+        #endif
+            break;
+    #endif
+
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        case wolfssl_aes_gcm:
+        case wolfssl_aes_ccm: /* GCM AEAD macros use same size as CCM */
+        {
+            wc_AesAuthDecryptFunc aes_auth_fn;
+        #if defined(BUILD_AESGCM) && defined(HAVE_AESCCM)
+            aes_auth_fn = (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm)
+                            ? wc_AesGcmDecrypt : wc_AesCcmDecrypt;
+        #elif defined(BUILD_AESGCM)
+            aes_auth_fn = wc_AesGcmDecrypt;
+        #else
+            aes_auth_fn = wc_AesCcmDecrypt;
+        #endif
+
+            XMEMSET(ssl->decrypt.additional, 0, AEAD_AUTH_DATA_SZ);
+
+            /* sequence number field is 64-bits */
+            WriteSEQ(ssl, PEER_ORDER, ssl->decrypt.additional);
+
+            ssl->decrypt.additional[AEAD_TYPE_OFFSET] = ssl->curRL.type;
+            ssl->decrypt.additional[AEAD_VMAJ_OFFSET] = ssl->curRL.pvMajor;
+            ssl->decrypt.additional[AEAD_VMIN_OFFSET] = ssl->curRL.pvMinor;
+
+            c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                                    ssl->decrypt.additional + AEAD_LEN_OFFSET);
+            XMEMCPY(ssl->decrypt.nonce, ssl->keys.aead_dec_imp_IV,
+                                                            AESGCM_IMP_IV_SZ);
+            XMEMCPY(ssl->decrypt.nonce + AESGCM_IMP_IV_SZ, input,
+                                                            AESGCM_EXP_IV_SZ);
+            if ((ret = aes_auth_fn(ssl->decrypt.aes,
+                        plain + AESGCM_EXP_IV_SZ,
+                        input + AESGCM_EXP_IV_SZ,
+                           sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
+                        ssl->decrypt.nonce, AESGCM_NONCE_SZ,
+                        input + sz - ssl->specs.aead_mac_size,
+                        ssl->specs.aead_mac_size,
+                        ssl->decrypt.additional, AEAD_AUTH_DATA_SZ)) < 0) {
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E) {
+                    ret = wolfSSL_AsyncPush(ssl,
+                        &ssl->decrypt.aes->asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+                    break;
+                }
+            #endif
+            }
+        }
+        break;
+    #endif /* BUILD_AESGCM || HAVE_AESCCM */
+
+    #ifdef HAVE_CAMELLIA
+        case wolfssl_camellia:
+            wc_CamelliaCbcDecrypt(ssl->decrypt.cam, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef HAVE_HC128
+        case wolfssl_hc128:
+            ret = wc_Hc128_Process(ssl->decrypt.hc128, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef BUILD_RABBIT
+        case wolfssl_rabbit:
+            ret = wc_RabbitProcess(ssl->decrypt.rabbit, plain, input, sz);
+            break;
+    #endif
+
+    #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
+        case wolfssl_chacha:
+            ret = ChachaAEADDecrypt(ssl, plain, input, sz);
+            break;
+    #endif
+
+    #ifdef HAVE_NULL_CIPHER
+        case wolfssl_cipher_null:
+            if (input != plain) {
+                XMEMMOVE(plain, input, sz);
+            }
+            break;
+    #endif
+
+    #ifdef HAVE_IDEA
+        case wolfssl_idea:
+            ret = wc_IdeaCbcDecrypt(ssl->decrypt.idea, plain, input, sz);
+            break;
+    #endif
+
+        default:
+            WOLFSSL_MSG("wolfSSL Decrypt programming error");
+            ret = DECRYPT_ERROR;
     }
 
-    switch (ssl->specs.bulk_cipher_algorithm) {
-        #ifdef BUILD_ARC4
-            case wolfssl_rc4:
-                wc_Arc4Process(ssl->decrypt.arc4, plain, input, sz);
-                break;
-        #endif
+    return ret;
+}
 
-        #ifdef BUILD_DES3
-            case wolfssl_triple_des:
-                ret = wc_Des3_CbcDecrypt(ssl->decrypt.des3, plain, input, sz);
-                break;
-        #endif
+static INLINE int Decrypt(WOLFSSL* ssl, byte* plain, const byte* input,
+                           word16 sz)
+{
+    int ret = 0;
 
-        #ifdef BUILD_AES
-            case wolfssl_aes:
-                ret = wc_AesCbcDecrypt(ssl->decrypt.aes, plain, input, sz);
-                break;
-        #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfSSL_AsyncPop(ssl, &ssl->decrypt.state);
+    if (ret != WC_NOT_PENDING_E) {
+        /* check for still pending */
+        if (ret == WC_PENDING_E)
+            return ret;
 
-        #ifdef BUILD_AESGCM
-            case wolfssl_aes_gcm:
-            {
-                byte additional[AEAD_AUTH_DATA_SZ];
-                byte nonce[AESGCM_NONCE_SZ];
+        ssl->error = 0; /* clear async */
 
-                XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
+        /* let failures through so CIPHER_STATE_END logic is run */
+    }
+    else
+#endif
+    {
+        /* Reset state */
+        ret = 0;
+        ssl->decrypt.state = CIPHER_STATE_BEGIN;
+    }
 
-                /* sequence number field is 64-bits */
-                WriteSEQ(ssl, PEER_ORDER, additional);
-
-                additional[AEAD_TYPE_OFFSET] = ssl->curRL.type;
-                additional[AEAD_VMAJ_OFFSET] = ssl->curRL.pvMajor;
-                additional[AEAD_VMIN_OFFSET] = ssl->curRL.pvMinor;
-
-                c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                        additional + AEAD_LEN_OFFSET);
-                XMEMCPY(nonce, ssl->keys.aead_dec_imp_IV, AESGCM_IMP_IV_SZ);
-                XMEMCPY(nonce + AESGCM_IMP_IV_SZ, input, AESGCM_EXP_IV_SZ);
-                if (wc_AesGcmDecrypt(ssl->decrypt.aes,
-                            plain + AESGCM_EXP_IV_SZ,
-                            input + AESGCM_EXP_IV_SZ,
-                               sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                            nonce, AESGCM_NONCE_SZ,
-                            input + sz - ssl->specs.aead_mac_size,
-                            ssl->specs.aead_mac_size,
-                            additional, AEAD_AUTH_DATA_SZ) < 0) {
-                    if (!ssl->options.dtls)
-                        SendAlert(ssl, alert_fatal, bad_record_mac);
-                    ret = VERIFY_MAC_ERROR;
-                }
-                ForceZero(nonce, AESGCM_NONCE_SZ);
+    switch (ssl->decrypt.state) {
+        case CIPHER_STATE_BEGIN:
+        {
+            if (ssl->decrypt.setup == 0) {
+                WOLFSSL_MSG("Decrypt ciphers not setup");
+                return DECRYPT_ERROR;
             }
-            break;
-        #endif
 
-        #ifdef HAVE_AESCCM
-            /* AESGCM AEAD macros use same size as AESCCM */
-            case wolfssl_aes_ccm:
-            {
-                byte additional[AEAD_AUTH_DATA_SZ];
-                byte nonce[AESGCM_NONCE_SZ];
-
-                XMEMSET(additional, 0, AEAD_AUTH_DATA_SZ);
-
-                /* sequence number field is 64-bits */
-                WriteSEQ(ssl, PEER_ORDER, additional);
-
-                additional[AEAD_TYPE_OFFSET] = ssl->curRL.type;
-                additional[AEAD_VMAJ_OFFSET] = ssl->curRL.pvMajor;
-                additional[AEAD_VMIN_OFFSET] = ssl->curRL.pvMinor;
-
-                c16toa(sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                                        additional + AEAD_LEN_OFFSET);
-                XMEMCPY(nonce, ssl->keys.aead_dec_imp_IV, AESGCM_IMP_IV_SZ);
-                XMEMCPY(nonce + AESGCM_IMP_IV_SZ, input, AESGCM_EXP_IV_SZ);
-                if (wc_AesCcmDecrypt(ssl->decrypt.aes,
-                            plain + AESGCM_EXP_IV_SZ,
-                            input + AESGCM_EXP_IV_SZ,
-                               sz - AESGCM_EXP_IV_SZ - ssl->specs.aead_mac_size,
-                            nonce, AESGCM_NONCE_SZ,
-                            input + sz - ssl->specs.aead_mac_size,
-                            ssl->specs.aead_mac_size,
-                            additional, AEAD_AUTH_DATA_SZ) < 0) {
-                    if (!ssl->options.dtls)
-                        SendAlert(ssl, alert_fatal, bad_record_mac);
-                    ret = VERIFY_MAC_ERROR;
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            /* make sure AES GCM/CCM memory is allocated */
+            /* free for these happens in FreeCiphers */
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm) {
+                /* make sure auth iv and auth are allocated */
+                if (ssl->decrypt.additional == NULL)
+                    ssl->decrypt.additional = (byte*)XMALLOC(AEAD_AUTH_DATA_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->decrypt.nonce == NULL)
+                    ssl->decrypt.nonce = (byte*)XMALLOC(AESGCM_NONCE_SZ,
+                                                   ssl->heap, DYNAMIC_TYPE_AES);
+                if (ssl->decrypt.additional == NULL ||
+                         ssl->decrypt.nonce == NULL) {
+                    return MEMORY_E;
                 }
-                ForceZero(nonce, AESGCM_NONCE_SZ);
             }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
+
+            /* Advance state and proceed */
+            ssl->decrypt.state = CIPHER_STATE_DO;
+        }
+        case CIPHER_STATE_DO:
+        {
+            ret = DecryptDo(ssl, plain, input, sz);
+
+            /* Advance state */
+            ssl->decrypt.state = CIPHER_STATE_END;
+
+            /* If pending, leave and return below */
+            if (ret == WC_PENDING_E) {
+                return ret;
+            }
+        }
+
+        case CIPHER_STATE_END:
+        {
+        #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+            /* make sure AES GCM/CCM nonce is cleared */
+            if (ssl->specs.bulk_cipher_algorithm == wolfssl_aes_ccm ||
+                ssl->specs.bulk_cipher_algorithm == wolfssl_aes_gcm) {
+                if (ssl->decrypt.nonce)
+                    ForceZero(ssl->decrypt.nonce, AESGCM_NONCE_SZ);
+
+                if (ret < 0)
+                    ret = VERIFY_MAC_ERROR;
+            }
+        #endif /* BUILD_AESGCM || HAVE_AESCCM */
             break;
-        #endif
+        }
+    }
 
-        #ifdef HAVE_CAMELLIA
-            case wolfssl_camellia:
-                wc_CamelliaCbcDecrypt(ssl->decrypt.cam, plain, input, sz);
-                break;
-        #endif
+    /* Reset state */
+    ssl->decrypt.state = CIPHER_STATE_BEGIN;
 
-        #ifdef HAVE_HC128
-            case wolfssl_hc128:
-                ret = wc_Hc128_Process(ssl->decrypt.hc128, plain, input, sz);
-                break;
-        #endif
-
-        #ifdef BUILD_RABBIT
-            case wolfssl_rabbit:
-                ret = wc_RabbitProcess(ssl->decrypt.rabbit, plain, input, sz);
-                break;
-        #endif
-
-        #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
-            case wolfssl_chacha:
-                ret = ChachaAEADDecrypt(ssl, plain, input, sz);
-                break;
-        #endif
-
-        #ifdef HAVE_NULL_CIPHER
-            case wolfssl_cipher_null:
-                if (input != plain) {
-                    XMEMMOVE(plain, input, sz);
-                }
-                break;
-        #endif
-
-        #ifdef HAVE_IDEA
-            case wolfssl_idea:
-                ret = wc_IdeaCbcDecrypt(ssl->decrypt.idea, plain, input, sz);
-                break;
-        #endif
-
-            default:
-                WOLFSSL_MSG("wolfSSL Decrypt programming error");
-                ret = DECRYPT_ERROR;
+    /* handle mac error case */
+    if (ret == VERIFY_MAC_ERROR) {
+        if (!ssl->options.dtls)
+            SendAlert(ssl, alert_fatal, bad_record_mac);
     }
 
     return ret;
@@ -9183,11 +9662,11 @@ static INLINE void Md5Rounds(int rounds, const byte* data, int sz)
     Md5 md5;
     int i;
 
-    wc_InitMd5(&md5);
+    wc_InitMd5(&md5);   /* no error check on purpose, dummy round */
 
     for (i = 0; i < rounds; i++)
         wc_Md5Update(&md5, data, sz);
-    wc_Md5Free(&md5) ; /* in case needed to release resources */
+    wc_Md5Free(&md5); /* in case needed to release resources */
 }
 
 
@@ -9202,7 +9681,7 @@ static INLINE void ShaRounds(int rounds, const byte* data, int sz)
 
     for (i = 0; i < rounds; i++)
         wc_ShaUpdate(&sha, data, sz);
-    wc_ShaFree(&sha) ; /* in case needed to release resources */
+    wc_ShaFree(&sha); /* in case needed to release resources */
 }
 #endif
 
@@ -9220,7 +9699,7 @@ static INLINE void Sha256Rounds(int rounds, const byte* data, int sz)
         wc_Sha256Update(&sha256, data, sz);
         /* no error check on purpose, dummy round */
     }
-    wc_Sha256Free(&sha256) ; /* in case needed to release resources */
+    wc_Sha256Free(&sha256); /* in case needed to release resources */
 }
 
 #endif
@@ -9239,7 +9718,7 @@ static INLINE void Sha384Rounds(int rounds, const byte* data, int sz)
         wc_Sha384Update(&sha384, data, sz);
         /* no error check on purpose, dummy round */
     }
-    wc_Sha384Free(&sha384) ; /* in case needed to release resources */
+    wc_Sha384Free(&sha384); /* in case needed to release resources */
 }
 
 #endif
@@ -9258,7 +9737,7 @@ static INLINE void Sha512Rounds(int rounds, const byte* data, int sz)
         wc_Sha512Update(&sha512, data, sz);
         /* no error check on purpose, dummy round */
     }
-    wc_Sha512Free(&sha512) ; /* in case needed to release resources */
+    wc_Sha512Free(&sha512); /* in case needed to release resources */
 }
 
 #endif
@@ -9424,9 +9903,11 @@ static int TimingPadVerify(WOLFSSL* ssl, const byte* input, int padLen, int t,
         return VERIFY_MAC_ERROR;
     }
 
+    /* treat any faulure as verify MAC error */
     if (ret != 0)
-        return VERIFY_MAC_ERROR;
-    return 0;
+        ret = VERIFY_MAC_ERROR;
+
+    return ret;
 }
 
 
@@ -9502,9 +9983,10 @@ static int DoAlert(WOLFSSL* ssl, byte* input, word32* inOutIdx, int* type,
         if (ssl->hsInfoOn)
             AddPacketName("Alert", &ssl->handShakeInfo);
         if (ssl->toInfoOn)
-            /* add record header back on to info + 2 byte level, data */
+            /* add record header back on to info + alert bytes level/code */
             AddPacketInfo("Alert", &ssl->timeoutInfo, input + *inOutIdx -
-                          RECORD_HEADER_SZ, 2 + RECORD_HEADER_SZ, ssl->heap);
+                          RECORD_HEADER_SZ, RECORD_HEADER_SZ + ALERT_SIZE,
+                          ssl->heap);
     #endif
 
     /* make sure can read the message */
@@ -9702,10 +10184,10 @@ int ProcessReply(WOLFSSL* ssl)
 
             readSz = RECORD_HEADER_SZ;
 
-            #ifdef WOLFSSL_DTLS
-                if (ssl->options.dtls)
-                    readSz = DTLS_RECORD_HEADER_SZ;
-            #endif
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls)
+                readSz = DTLS_RECORD_HEADER_SZ;
+        #endif
 
             /* get header or return error */
             if (!ssl->options.dtls) {
@@ -9716,9 +10198,10 @@ int ProcessReply(WOLFSSL* ssl)
                 /* read ahead may already have header */
                 used = ssl->buffers.inputBuffer.length -
                        ssl->buffers.inputBuffer.idx;
-                if (used < readSz)
+                if (used < readSz) {
                     if ((ret = GetInputData(ssl, readSz)) < 0)
                         return ret;
+                }
             #endif
             }
 
@@ -9735,15 +10218,15 @@ int ProcessReply(WOLFSSL* ssl)
 
                 /* sanity checks before getting size at front */
                 if (ssl->buffers.inputBuffer.buffer[
-                          ssl->buffers.inputBuffer.idx + 2] != OLD_HELLO_ID) {
+                          ssl->buffers.inputBuffer.idx + OPAQUE16_LEN] != OLD_HELLO_ID) {
                     WOLFSSL_MSG("Not a valid old client hello");
                     return PARSE_ERROR;
                 }
 
                 if (ssl->buffers.inputBuffer.buffer[
-                          ssl->buffers.inputBuffer.idx + 3] != SSLv3_MAJOR &&
+                          ssl->buffers.inputBuffer.idx + OPAQUE24_LEN] != SSLv3_MAJOR &&
                     ssl->buffers.inputBuffer.buffer[
-                          ssl->buffers.inputBuffer.idx + 3] != DTLS_MAJOR) {
+                          ssl->buffers.inputBuffer.idx + OPAQUE24_LEN] != DTLS_MAJOR) {
                     WOLFSSL_MSG("Not a valid version in old client hello");
                     return PARSE_ERROR;
                 }
@@ -9839,14 +10322,13 @@ int ProcessReply(WOLFSSL* ssl)
 #endif
             }
 
-            ssl->options.processReply = runProcessingOneMessage;
+            ssl->options.processReply = decryptMessage;
             startIdx = ssl->buffers.inputBuffer.idx;  /* in case > 1 msg per */
 
-        /* the record layer is here */
-        case runProcessingOneMessage:
+        /* decrypt message */
+        case decryptMessage:
 
-            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0)
-            {
+            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0) {
                 ret = SanityCheckCipherText(ssl, ssl->curSize);
                 if (ret < 0)
                     return ret;
@@ -9860,12 +10342,6 @@ int ProcessReply(WOLFSSL* ssl)
                                   ssl->buffers.inputBuffer.idx,
                                   ssl->curSize, ssl->curRL.type, 1,
                                   &ssl->keys.padSz, ssl->DecryptVerifyCtx);
-                    if (ssl->options.tls1_1 && ssl->specs.cipher_type == block)
-                        ssl->buffers.inputBuffer.idx += ssl->specs.block_size;
-                        /* go past TLSv1.1 IV */
-                    if (ssl->specs.cipher_type == aead &&
-                            ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
-                        ssl->buffers.inputBuffer.idx += AESGCM_EXP_IV_SZ;
                 #endif /* ATOMIC_USER */
                 }
                 else {
@@ -9874,46 +10350,72 @@ int ProcessReply(WOLFSSL* ssl)
                                   ssl->buffers.inputBuffer.buffer +
                                   ssl->buffers.inputBuffer.idx,
                                   ssl->curSize);
-                    if (ret < 0) {
-                        WOLFSSL_MSG("Decrypt failed");
-                        WOLFSSL_ERROR(ret);
-                        #ifdef WOLFSSL_DTLS
-                            /* If in DTLS mode, if the decrypt fails for any
-                             * reason, pretend the datagram never happened. */
-                            if (ssl->options.dtls) {
-                                ssl->options.processReply = doProcessInit;
-                                ssl->buffers.inputBuffer.idx =
-                                                ssl->buffers.inputBuffer.length;
-                            }
-                        #endif /* WOLFSSL_DTLS */
-                        return DECRYPT_ERROR;
-                    }
+                }
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    return ret;
+            #endif
+
+                if (ret == 0) {
+                    /* handle success */
                     if (ssl->options.tls1_1 && ssl->specs.cipher_type == block)
                         ssl->buffers.inputBuffer.idx += ssl->specs.block_size;
                         /* go past TLSv1.1 IV */
                     if (ssl->specs.cipher_type == aead &&
                             ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
                         ssl->buffers.inputBuffer.idx += AESGCM_EXP_IV_SZ;
+                }
+                else {
+                    WOLFSSL_MSG("Decrypt failed");
+                    WOLFSSL_ERROR(ret);
+                #ifdef WOLFSSL_DTLS
+                    /* If in DTLS mode, if the decrypt fails for any
+                     * reason, pretend the datagram never happened. */
+                    if (ssl->options.dtls) {
+                        ssl->options.processReply = doProcessInit;
+                        ssl->buffers.inputBuffer.idx =
+                                        ssl->buffers.inputBuffer.length;
+                    }
+                #endif /* WOLFSSL_DTLS */
+                    return DECRYPT_ERROR;
+                }
+            }
 
+            ssl->options.processReply = verifyMessage;
+
+        /* verify digest of message */
+        case verifyMessage:
+
+            if (IsEncryptionOn(ssl, 0) && ssl->keys.decryptedCur == 0) {
+                if (!atomicUser) {
                     ret = VerifyMac(ssl, ssl->buffers.inputBuffer.buffer +
                                     ssl->buffers.inputBuffer.idx,
                                     ssl->curSize, ssl->curRL.type,
                                     &ssl->keys.padSz);
+                    if (ret < 0) {
+                        if (ret == WC_PENDING_E)
+                            return ret;
+
+                        WOLFSSL_MSG("VerifyMac failed");
+                        WOLFSSL_ERROR(ret);
+                        return DECRYPT_ERROR;
+                    }
                 }
-                if (ret < 0) {
-                    WOLFSSL_MSG("VerifyMac failed");
-                    WOLFSSL_ERROR(ret);
-                    return DECRYPT_ERROR;
-                }
+
                 ssl->keys.encryptSz    = ssl->curSize;
                 ssl->keys.decryptedCur = 1;
             }
 
-            #ifdef WOLFSSL_DTLS
+            ssl->options.processReply = runProcessingOneMessage;
+
+        /* the record layer is here */
+        case runProcessingOneMessage:
+
+        #ifdef WOLFSSL_DTLS
             if (IsDtlsNotSctpMode(ssl)) {
                 DtlsUpdateWindow(ssl);
             }
-            #endif /* WOLFSSL_DTLS */
+        #endif /* WOLFSSL_DTLS */
 
             WOLFSSL_MSG("received record layer msg");
 
@@ -9927,12 +10429,12 @@ int ProcessReply(WOLFSSL* ssl)
                                             ssl->buffers.inputBuffer.length);
                     }
                     else {
-#ifdef WOLFSSL_DTLS
+                    #ifdef WOLFSSL_DTLS
                         ret = DoDtlsHandShakeMsg(ssl,
                                             ssl->buffers.inputBuffer.buffer,
                                             &ssl->buffers.inputBuffer.idx,
                                             ssl->buffers.inputBuffer.length);
-#endif
+                    #endif
                     }
                     if (ret != 0)
                         return ret;
@@ -9959,7 +10461,7 @@ int ProcessReply(WOLFSSL* ssl)
                             return ret;
                         }
                         else {
-#ifdef WOLFSSL_DTLS
+                        #ifdef WOLFSSL_DTLS
                         /* Check for duplicate CCS message in DTLS mode.
                          * DTLS allows for duplicate messages, and it should be
                          * skipped. Also skip if out of order. */
@@ -9979,7 +10481,7 @@ int ProcessReply(WOLFSSL* ssl)
                             }
                             ssl->buffers.inputBuffer.idx++;
                             break;
-#endif /* WOLFSSL_DTLS */
+                        #endif /* WOLFSSL_DTLS */
                         }
                     }
 
@@ -10143,9 +10645,10 @@ int SendChangeCipher(WOLFSSL* ssl)
 
         input[0] = 1;  /* turn it on */
         sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                              change_cipher_spec, 0, 0);
-        if (sendSz < 0)
+                              change_cipher_spec, 0, 0, 0);
+        if (sendSz < 0) {
             return sendSz;
+        }
     }
 
     #ifdef WOLFSSL_DTLS
@@ -10204,109 +10707,174 @@ static int SSL_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
     WriteSEQ(ssl, verify, seq);
 
     if (ssl->specs.mac_algorithm == md5_mac) {
-        wc_InitMd5(&md5);
-        /* inner */
-        wc_Md5Update(&md5, macSecret, digestSz);
-        wc_Md5Update(&md5, PAD1, padSz);
-        wc_Md5Update(&md5, seq, SEQ_SZ);
-        wc_Md5Update(&md5, conLen, sizeof(conLen));
-        /* in buffer */
-        wc_Md5Update(&md5, in, sz);
-        wc_Md5Final(&md5, result);
-        /* outer */
-        wc_Md5Update(&md5, macSecret, digestSz);
-        wc_Md5Update(&md5, PAD2, padSz);
-        wc_Md5Update(&md5, result, digestSz);
-        wc_Md5Final(&md5, digest);
-    }
-    else {
-        ret = wc_InitSha(&sha);
+        ret =  wc_InitMd5_ex(&md5, ssl->heap, ssl->devId);
         if (ret != 0)
             return ret;
+
         /* inner */
-        wc_ShaUpdate(&sha, macSecret, digestSz);
-        wc_ShaUpdate(&sha, PAD1, padSz);
-        wc_ShaUpdate(&sha, seq, SEQ_SZ);
-        wc_ShaUpdate(&sha, conLen, sizeof(conLen));
+        ret =  wc_Md5Update(&md5, macSecret, digestSz);
+        ret += wc_Md5Update(&md5, PAD1, padSz);
+        ret += wc_Md5Update(&md5, seq, SEQ_SZ);
+        ret += wc_Md5Update(&md5, conLen, sizeof(conLen));
         /* in buffer */
-        wc_ShaUpdate(&sha, in, sz);
-        wc_ShaFinal(&sha, result);
+        ret += wc_Md5Update(&md5, in, sz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret = wc_Md5Final(&md5, result);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &md5.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
         /* outer */
-        wc_ShaUpdate(&sha, macSecret, digestSz);
-        wc_ShaUpdate(&sha, PAD2, padSz);
-        wc_ShaUpdate(&sha, result, digestSz);
-        wc_ShaFinal(&sha, digest);
+        ret =  wc_Md5Update(&md5, macSecret, digestSz);
+        ret += wc_Md5Update(&md5, PAD2, padSz);
+        ret += wc_Md5Update(&md5, result, digestSz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret =  wc_Md5Final(&md5, digest);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &md5.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
+        wc_Md5Free(&md5);
+    }
+    else {
+        ret =  wc_InitSha_ex(&sha, ssl->heap, ssl->devId);
+        if (ret != 0)
+            return ret;
+
+        /* inner */
+        ret =  wc_ShaUpdate(&sha, macSecret, digestSz);
+        ret += wc_ShaUpdate(&sha, PAD1, padSz);
+        ret += wc_ShaUpdate(&sha, seq, SEQ_SZ);
+        ret += wc_ShaUpdate(&sha, conLen, sizeof(conLen));
+        /* in buffer */
+        ret += wc_ShaUpdate(&sha, in, sz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret = wc_ShaFinal(&sha, result);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &sha.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
+        /* outer */
+        ret =  wc_ShaUpdate(&sha, macSecret, digestSz);
+        ret += wc_ShaUpdate(&sha, PAD2, padSz);
+        ret += wc_ShaUpdate(&sha, result, digestSz);
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+        ret =  wc_ShaFinal(&sha, digest);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* TODO: Make non-blocking */
+        if (ret == WC_PENDING_E) {
+            ret = wc_AsyncWait(ret, &sha.asyncDev, WC_ASYNC_FLAG_NONE);
+        }
+    #endif
+        if (ret != 0)
+            return VERIFY_MAC_ERROR;
+
+        wc_ShaFree(&sha);
     }
     return 0;
 }
 
 #ifndef NO_CERTS
-static void BuildMD5_CertVerify(WOLFSSL* ssl, byte* digest)
+static int BuildMD5_CertVerify(WOLFSSL* ssl, byte* digest)
 {
+    int ret;
     byte md5_result[MD5_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Md5* md5   = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Md5* md5_2 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    Md5* md5 = (Md5*)XMALLOC(sizeof(Md5), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #else
-        Md5 md5[1];
-        Md5 md5_2[1];
+    Md5  md5[1];
 #endif
 
     /* make md5 inner */
-    md5[0] = ssl->hsHashes->hashMd5 ; /* Save current position */
-    wc_Md5Update(&ssl->hsHashes->hashMd5, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_Md5Update(&ssl->hsHashes->hashMd5, PAD1, PAD_MD5);
-    wc_Md5GetHash(&ssl->hsHashes->hashMd5, md5_result);
-    wc_Md5RestorePos(&ssl->hsHashes->hashMd5, md5) ; /* Restore current position */
+    ret = wc_Md5Copy(&ssl->hsHashes->hashMd5, md5); /* Save current position */
+    if (ret == 0)
+        ret = wc_Md5Update(md5, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_Md5Update(md5, PAD1, PAD_MD5);
+    if (ret == 0)
+        ret = wc_Md5Final(md5, md5_result);
 
     /* make md5 outer */
-    wc_InitMd5(md5_2) ;
-    wc_Md5Update(md5_2, ssl->arrays->masterSecret, SECRET_LEN);
-    wc_Md5Update(md5_2, PAD2, PAD_MD5);
-    wc_Md5Update(md5_2, md5_result, MD5_DIGEST_SIZE);
-
-    wc_Md5Final(md5_2, digest);
+    if (ret == 0) {
+        ret = wc_InitMd5_ex(md5, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_Md5Update(md5, ssl->arrays->masterSecret, SECRET_LEN);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, PAD2, PAD_MD5);
+            if (ret == 0)
+                ret = wc_Md5Update(md5, md5_result, MD5_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_Md5Final(md5, digest);
+            wc_Md5Free(md5);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(md5_2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(md5, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
+
+    return ret;
 }
 
 
-static void BuildSHA_CertVerify(WOLFSSL* ssl, byte* digest)
+static int BuildSHA_CertVerify(WOLFSSL* ssl, byte* digest)
 {
+    int ret;
     byte sha_result[SHA_DIGEST_SIZE];
-
 #ifdef WOLFSSL_SMALL_STACK
-        Sha* sha   = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        Sha* sha2 = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    Sha* sha = (Sha*)XMALLOC(sizeof(Sha), ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #else
-        Sha sha[1];
-        Sha sha2[1];
+    Sha  sha[1];
 #endif
 
     /* make sha inner */
-    sha[0] = ssl->hsHashes->hashSha ; /* Save current position */
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(&ssl->hsHashes->hashSha, PAD1, PAD_SHA);
-    wc_ShaGetHash(&ssl->hsHashes->hashSha, sha_result);
-    wc_ShaRestorePos(&ssl->hsHashes->hashSha, sha) ; /* Restore current position */
+    ret = wc_ShaCopy(&ssl->hsHashes->hashSha, sha); /* Save current position */
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+    if (ret == 0)
+        ret = wc_ShaUpdate(sha, PAD1, PAD_SHA);
+    if (ret == 0)
+        ret = wc_ShaFinal(sha, sha_result);
 
     /* make sha outer */
-    wc_InitSha(sha2) ;
-    wc_ShaUpdate(sha2, ssl->arrays->masterSecret,SECRET_LEN);
-    wc_ShaUpdate(sha2, PAD2, PAD_SHA);
-    wc_ShaUpdate(sha2, sha_result, SHA_DIGEST_SIZE);
-
-    wc_ShaFinal(sha2, digest);
+    if (ret == 0) {
+        ret = wc_InitSha_ex(sha, ssl->heap, ssl->devId);
+        if (ret == 0) {
+            ret = wc_ShaUpdate(sha, ssl->arrays->masterSecret,SECRET_LEN);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, PAD2, PAD_SHA);
+            if (ret == 0)
+                ret = wc_ShaUpdate(sha, sha_result, SHA_DIGEST_SIZE);
+            if (ret == 0)
+                ret = wc_ShaFinal(sha, digest);
+            wc_ShaFree(sha);
+        }
+    }
 
 #ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(sha2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    XFREE(sha, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
 
+    return ret;
 }
 #endif /* NO_CERTS */
 #endif /* NO_OLD_TLS */
@@ -10322,10 +10890,14 @@ static int BuildCertHashes(WOLFSSL* ssl, Hashes* hashes)
 
     if (ssl->options.tls) {
     #if !defined(NO_MD5) && !defined(NO_OLD_TLS)
-        wc_Md5GetHash(&ssl->hsHashes->hashMd5, hashes->md5);
+        ret = wc_Md5GetHash(&ssl->hsHashes->hashMd5, hashes->md5);
+        if (ret != 0)
+            return ret;
     #endif
     #if !defined(NO_SHA)
-        wc_ShaGetHash(&ssl->hsHashes->hashSha, hashes->sha);
+        ret = wc_ShaGetHash(&ssl->hsHashes->hashSha, hashes->sha);
+        if (ret != 0)
+            return ret;
     #endif
         if (IsAtLeastTLSv1_2(ssl)) {
             #ifndef NO_SHA256
@@ -10350,11 +10922,15 @@ static int BuildCertHashes(WOLFSSL* ssl, Hashes* hashes)
     }
     else {
     #if !defined(NO_MD5) && !defined(NO_OLD_TLS)
-        BuildMD5_CertVerify(ssl, hashes->md5);
+        ret = BuildMD5_CertVerify(ssl, hashes->md5);
+        if (ret != 0)
+            return ret;
     #endif
     #if !defined(NO_SHA) && (!defined(NO_OLD_TLS) || \
                               defined(WOLFSSL_ALLOW_TLS_SHA1))
-        BuildSHA_CertVerify(ssl, hashes->sha);
+        ret = BuildSHA_CertVerify(ssl, hashes->sha);
+        if (ret != 0)
+            return ret;
     #endif
     }
 
@@ -10363,133 +10939,200 @@ static int BuildCertHashes(WOLFSSL* ssl, Hashes* hashes)
 
 #endif /* WOLFSSL_LEANPSK */
 
+/* Persistable BuildMessage arguments */
+typedef struct BuildMsgArgs {
+    word32 digestSz;
+    word32 sz;
+    word32 pad;
+    word32 idx;
+    word32 headerSz;
+    word16 size;
+    word32 ivSz;      /* TLSv1.1  IV */
+    byte   iv[AES_BLOCK_SIZE]; /* max size */
+} BuildMsgArgs;
+
+static void FreeBuildMsgArgs(WOLFSSL* ssl, void* pArgs)
+{
+    BuildMsgArgs* args = (BuildMsgArgs*)pArgs;
+
+    (void)ssl;
+    (void)args;
+
+    /* no allocations in BuildMessage */
+}
+
 /* Build SSL Message, encrypted */
 int BuildMessage(WOLFSSL* ssl, byte* output, int outSz, const byte* input,
-                 int inSz, int type, int hashOutput, int sizeOnly)
+             int inSz, int type, int hashOutput, int sizeOnly, int asyncOkay)
 {
-    word32 digestSz;
-    word32 sz = RECORD_HEADER_SZ + inSz;
-    word32 pad  = 0, i;
-    word32 idx  = RECORD_HEADER_SZ;
-    word32 ivSz = 0;      /* TLSv1.1  IV */
-    word32 headerSz = RECORD_HEADER_SZ;
-    word16 size;
-    byte               iv[AES_BLOCK_SIZE];                  /* max size */
-    int ret        = 0;
-    int atomicUser = 0;
+    int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    BuildMsgArgs* args = (BuildMsgArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    BuildMsgArgs  args[1];
+#endif
 
-    if (ssl == NULL) {
+    WOLFSSL_ENTER("BuildMessage");
+
+    if (ssl == NULL || output == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    if (!sizeOnly && (output == NULL || input == NULL) ) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* catch mistaken sizeOnly parameter */
-    if (sizeOnly && (output || input) ) {
-        WOLFSSL_MSG("BuildMessage with sizeOnly doesn't need input or output");
-        return BAD_FUNC_ARG;
-    }
-
-    digestSz = ssl->specs.hash_size;
-#ifdef HAVE_TRUNCATED_HMAC
-    if (ssl->truncated_hmac)
-        digestSz = min(TRUNCATED_HMAC_SZ, digestSz);
-#endif
-    sz += digestSz;
-
-#ifdef WOLFSSL_DTLS
-    if (ssl->options.dtls) {
-        sz       += DTLS_RECORD_EXTRA;
-        idx      += DTLS_RECORD_EXTRA;
-        headerSz += DTLS_RECORD_EXTRA;
+    ret = WC_NOT_PENDING_E;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (asyncOkay) {
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.buildMsgState);
+        if (ret != WC_NOT_PENDING_E) {
+            /* Check for error */
+            if (ret < 0)
+                goto exit_buildmsg;
+        }
     }
 #endif
 
-#ifdef ATOMIC_USER
-    if (ssl->ctx->MacEncryptCb)
-        atomicUser = 1;
-#endif
+    /* Reset state */
+    if (ret == WC_NOT_PENDING_E) {
+        ret = 0;
+        ssl->options.buildMsgState = BUILD_MSG_BEGIN;
+        XMEMSET(args, 0, sizeof(BuildMsgArgs));
 
-    if (ssl->specs.cipher_type == block) {
-        word32 blockSz = ssl->specs.block_size;
-        if (ssl->options.tls1_1) {
-            ivSz = blockSz;
-            sz  += ivSz;
+        args->sz = RECORD_HEADER_SZ + inSz;
+        args->idx  = RECORD_HEADER_SZ;
+        args->headerSz = RECORD_HEADER_SZ;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeBuildMsgArgs;
+    #endif
+    }
 
-            if (ivSz > (word32)sizeof(iv))
-                return BUFFER_E;
-
-            if (!sizeOnly) {
-                ret = wc_RNG_GenerateBlock(ssl->rng, iv, ivSz);
-                if (ret != 0)
-                    return ret;
+    switch (ssl->options.buildMsgState) {
+        case BUILD_MSG_BEGIN:
+        {
+            /* catch mistaken sizeOnly parameter */
+            if (!sizeOnly && (output == NULL || input == NULL) ) {
+                return BAD_FUNC_ARG;
+            }
+            if (sizeOnly && (output || input) ) {
+                WOLFSSL_MSG("BuildMessage w/sizeOnly doesn't need input/output");
+                return BAD_FUNC_ARG;
             }
 
+            ssl->options.buildMsgState = BUILD_MSG_SIZE;
         }
-        sz += 1;       /* pad byte */
-        pad = (sz - headerSz) % blockSz;
-        pad = blockSz - pad;
-        sz += pad;
-    }
 
-#ifdef HAVE_AEAD
-    if (ssl->specs.cipher_type == aead) {
-        if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
-            ivSz = AESGCM_EXP_IV_SZ;
+        case BUILD_MSG_SIZE:
+        {
+            args->digestSz = ssl->specs.hash_size;
+        #ifdef HAVE_TRUNCATED_HMAC
+            if (ssl->truncated_hmac)
+                args->digestSz = min(TRUNCATED_HMAC_SZ, args->digestSz);
+        #endif
+            args->sz += args->digestSz;
 
-        sz += (ivSz + ssl->specs.aead_mac_size - digestSz);
-        if (!sizeOnly) {
-            XMEMCPY(iv, ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls) {
+                args->sz       += DTLS_RECORD_EXTRA;
+                args->idx      += DTLS_RECORD_EXTRA;
+                args->headerSz += DTLS_RECORD_EXTRA;
+            }
+        #endif
+
+            if (ssl->specs.cipher_type == block) {
+                word32 blockSz = ssl->specs.block_size;
+                if (ssl->options.tls1_1) {
+                    args->ivSz = blockSz;
+                    args->sz  += args->ivSz;
+
+                    if (args->ivSz > (word32)sizeof(args->iv))
+                        ERROR_OUT(BUFFER_E, exit_buildmsg);
+                }
+                args->sz += 1;       /* pad byte */
+                args->pad = (args->sz - args->headerSz) % blockSz;
+                args->pad = blockSz - args->pad;
+                args->sz += args->pad;
+            }
+
+        #ifdef HAVE_AEAD
+            if (ssl->specs.cipher_type == aead) {
+                if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
+                    args->ivSz = AESGCM_EXP_IV_SZ;
+
+                args->sz += (args->ivSz + ssl->specs.aead_mac_size - args->digestSz);
+            }
+        #endif
+
+            /* done with size calculations */
+            if (sizeOnly)
+                goto exit_buildmsg;
+
+            if (args->sz > (word32)outSz) {
+                WOLFSSL_MSG("Oops, want to write past output buffer size");
+                ERROR_OUT(BUFFER_E, exit_buildmsg);
+            }
+
+            if (args->ivSz > 0) {
+                ret = wc_RNG_GenerateBlock(ssl->rng, args->iv, args->ivSz);
+                if (ret != 0)
+                    goto exit_buildmsg;
+
+            }
+
+        #ifdef HAVE_AEAD
+            if (ssl->specs.cipher_type == aead) {
+                if (ssl->specs.bulk_cipher_algorithm != wolfssl_chacha)
+                    XMEMCPY(args->iv, ssl->keys.aead_exp_IV, AESGCM_EXP_IV_SZ);
+            }
+        #endif
+
+            args->size = (word16)(args->sz - args->headerSz);    /* include mac and digest */
+            AddRecordHeader(output, args->size, (byte)type, ssl);
+
+            /* write to output */
+            if (args->ivSz) {
+                XMEMCPY(output + args->idx, args->iv,
+                                        min(args->ivSz, sizeof(args->iv)));
+                args->idx += args->ivSz;
+            }
+            XMEMCPY(output + args->idx, input, inSz);
+            args->idx += inSz;
+
+            ssl->options.buildMsgState = BUILD_MSG_HASH;
         }
-    }
-#endif
-    /* done with size calculations */
-    if (sizeOnly) {
-        return sz;
-    }
-    if (sz > (word32)outSz) {
-        WOLFSSL_MSG("Oops, want to write past output buffer size");
-        return BUFFER_E;
-    }
-    size = (word16)(sz - headerSz);    /* include mac and digest */
-    AddRecordHeader(output, size, (byte)type, ssl);
+        case BUILD_MSG_HASH:
+        {
+            word32 i;
 
-    /* write to output */
-    if (ivSz) {
-        XMEMCPY(output + idx, iv, min(ivSz, sizeof(iv)));
-        idx += ivSz;
-    }
-    XMEMCPY(output + idx, input, inSz);
-    idx += inSz;
+            if (type == handshake && hashOutput) {
+                ret = HashOutput(ssl, output, args->headerSz + inSz, args->ivSz);
+                if (ret != 0)
+                    goto exit_buildmsg;
+            }
+            if (ssl->specs.cipher_type == block) {
+                word32 tmpIdx = args->idx + args->digestSz;
 
-    if (type == handshake && hashOutput) {
-        ret = HashOutput(ssl, output, headerSz + inSz, ivSz);
-        if (ret != 0)
-            return ret;
-    }
+                for (i = 0; i <= args->pad; i++)
+                    output[tmpIdx++] = (byte)args->pad; /* pad byte gets pad value */
+            }
 
-    if (ssl->specs.cipher_type == block) {
-        word32 tmpIdx = idx + digestSz;
+            ssl->options.buildMsgState = BUILD_MSG_VERIFY_MAC;
+        }
+        case BUILD_MSG_VERIFY_MAC:
+        {
+            /* User Record Layer Callback handling */
+        #ifdef ATOMIC_USER
+            if (ssl->ctx->MacEncryptCb) {
+                ret = ssl->ctx->MacEncryptCb(ssl, output + args->idx,
+                                output + args->headerSz + args->ivSz, inSz, type, 0,
+                                output + args->headerSz, output + args->headerSz, args->size,
+                                ssl->MacEncryptCtx);
+                goto exit_buildmsg;
+            }
+        #endif
 
-        for (i = 0; i <= pad; i++)
-            output[tmpIdx++] = (byte)pad; /* pad byte gets pad value too */
-    }
-
-    if (atomicUser) {   /* User Record Layer Callback handling */
-#ifdef ATOMIC_USER
-        if ( (ret = ssl->ctx->MacEncryptCb(ssl, output + idx,
-                        output + headerSz + ivSz, inSz, type, 0,
-                        output + headerSz, output + headerSz, size,
-                        ssl->MacEncryptCtx)) != 0)
-            return ret;
-#endif
-    }
-    else {
-        if (ssl->specs.cipher_type != aead) {
-#ifdef HAVE_TRUNCATED_HMAC
-            if (ssl->truncated_hmac && ssl->specs.hash_size > digestSz) {
+            if (ssl->specs.cipher_type != aead) {
+        #ifdef HAVE_TRUNCATED_HMAC
+            if (ssl->truncated_hmac && ssl->specs.hash_size > args->digestSz) {
             #ifdef WOLFSSL_SMALL_STACK
                 byte* hmac = NULL;
             #else
@@ -10497,36 +11140,61 @@ int BuildMessage(WOLFSSL* ssl, byte* output, int outSz, const byte* input,
             #endif
 
             #ifdef WOLFSSL_SMALL_STACK
-                hmac = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL,
+                hmac = (byte*)XMALLOC(MAX_DIGEST_SIZE, ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
                 if (hmac == NULL)
-                    return MEMORY_E;
+                    ERROR_OUT(MEMORY_E, exit_buildmsg);
             #endif
 
-                ret = ssl->hmac(ssl, hmac, output + headerSz + ivSz, inSz,
+                ret = ssl->hmac(ssl, hmac, output + args->headerSz + args->ivSz, inSz,
                                                                        type, 0);
-                XMEMCPY(output + idx, hmac, digestSz);
+                XMEMCPY(output + args->idx, hmac, args->digestSz);
 
             #ifdef WOLFSSL_SMALL_STACK
-                XFREE(hmac, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                XFREE(hmac, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
             #endif
-            } else
-#endif
-                ret = ssl->hmac(ssl, output+idx, output + headerSz + ivSz, inSz,
-                                                                       type, 0);
-                #ifdef WOLFSSL_DTLS
-                    if (ssl->options.dtls)
-                        DtlsSEQIncrement(ssl, CUR_ORDER);
-                #endif
-        }
-        if (ret != 0)
-            return ret;
+            }
+            else
+        #endif
+                ret = ssl->hmac(ssl, output + args->idx, output + args->headerSz + args->ivSz,
+                                                                inSz, type, 0);
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+            }
+            if (ret != 0)
+                goto exit_buildmsg;
 
-        if ( (ret = Encrypt(ssl, output + headerSz, output+headerSz,size)) != 0)
-            return ret;
+            ssl->options.buildMsgState = BUILD_MSG_ENCRYPT;
+        }
+        case BUILD_MSG_ENCRYPT:
+        {
+            ret = Encrypt(ssl, output + args->headerSz, output + args->headerSz, args->size,
+                asyncOkay);
+            break;
+        }
     }
 
-    return sz;
+exit_buildmsg:
+
+    WOLFSSL_LEAVE("BuildMessage", ret);
+
+    if (ret == WC_PENDING_E) {
+        return ret;
+    }
+
+    /* make sure build message state is reset */
+    ssl->options.buildMsgState = BUILD_MSG_BEGIN;
+
+    /* return sz on success */
+    if (ret == 0)
+        ret = args->sz;
+
+    /* Final cleanup */
+    FreeBuildMsgArgs(ssl, args);
+
+    return ret;
 }
 
 
@@ -10595,7 +11263,7 @@ int SendFinished(WOLFSSL* ssl)
     #endif
 
     sendSz = BuildMessage(ssl, output, outputSz, input, headerSz + finishedSz,
-                          handshake, 1, 0);
+                                                          handshake, 1, 0, 0);
     if (sendSz < 0)
         return BUILD_MSG_ERROR;
 
@@ -10825,7 +11493,7 @@ int SendCertificate(WOLFSSL* ssl)
             }
 
             sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                  handshake, 1, 0);
+                                                          handshake, 1, 0, 0);
 
             if (inputSz > 0)
                 XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -10834,26 +11502,26 @@ int SendCertificate(WOLFSSL* ssl)
                 return sendSz;
         }
         else {
-            #ifdef WOLFSSL_DTLS
-                if (ssl->options.dtls)
-                    DtlsSEQIncrement(ssl, CUR_ORDER);
-            #endif
+        #ifdef WOLFSSL_DTLS
+            if (ssl->options.dtls)
+                DtlsSEQIncrement(ssl, CUR_ORDER);
+        #endif
         }
 
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                    return ret;
-            }
-        #endif
+    #ifdef WOLFSSL_DTLS
+        if (IsDtlsNotSctpMode(ssl)) {
+            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                return ret;
+        }
+    #endif
 
-        #ifdef WOLFSSL_CALLBACKS
-            if (ssl->hsInfoOn)
-                AddPacketName("Certificate", &ssl->handShakeInfo);
-            if (ssl->toInfoOn)
-                AddPacketInfo("Certificate", &ssl->timeoutInfo, output, sendSz,
-                               ssl->heap);
-        #endif
+    #ifdef WOLFSSL_CALLBACKS
+        if (ssl->hsInfoOn)
+            AddPacketName("Certificate", &ssl->handShakeInfo);
+        if (ssl->toInfoOn)
+            AddPacketInfo("Certificate", &ssl->timeoutInfo, output, sendSz,
+                           ssl->heap);
+    #endif
 
         ssl->buffers.outputBuffer.length += sendSz;
         if (!ssl->options.groupMessages)
@@ -11028,7 +11696,7 @@ static int BuildCertificateStatus(WOLFSSL* ssl, byte type, buffer* status,
 
             XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
             sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                                               handshake, 1, 0);
+                                                           handshake, 1, 0, 0);
             XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 
             if (sendSz < 0)
@@ -11078,13 +11746,13 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
     (void) ssl;
 
-    #ifdef HAVE_CERTIFICATE_STATUS_REQUEST
-        status_type = ssl->status_request;
-    #endif
+#ifdef HAVE_CERTIFICATE_STATUS_REQUEST
+    status_type = ssl->status_request;
+#endif
 
-    #ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
-        status_type = status_type ? status_type : ssl->status_request_v2;
-    #endif
+#ifdef HAVE_CERTIFICATE_STATUS_REQUEST_V2
+    status_type = status_type ? status_type : ssl->status_request_v2;
+#endif
 
     switch (status_type) {
 
@@ -11092,7 +11760,8 @@ int SendCertificateStatus(WOLFSSL* ssl)
     #if defined(HAVE_CERTIFICATE_STATUS_REQUEST) \
      || defined(HAVE_CERTIFICATE_STATUS_REQUEST_V2)
         /* case WOLFSSL_CSR_OCSP: */
-        case WOLFSSL_CSR2_OCSP: {
+        case WOLFSSL_CSR2_OCSP:
+        {
             OcspRequest* request = ssl->ctx->certOcspRequest;
             buffer response;
 
@@ -11114,15 +11783,15 @@ int SendCertificateStatus(WOLFSSL* ssl)
                 if (der->buffer == NULL || der->length == 0)
                     return 0;
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                    if (cert == NULL)
-                        return MEMORY_E;
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), ssl->heap,
+                                                   DYNAMIC_TYPE_TMP_BUFFER);
+                if (cert == NULL)
+                    return MEMORY_E;
+            #endif
 
                 InitDecodedCert(cert, der->buffer, der->length, ssl->heap);
-
+                /* TODO: Setup async support here */
                 if ((ret = ParseCertRelative(cert, CERT_TYPE, VERIFY,
                                                           ssl->ctx->cm)) != 0) {
                     WOLFSSL_MSG("ParseCert failed");
@@ -11133,9 +11802,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
                     if (request == NULL) {
                         FreeDecodedCert(cert);
 
-                        #ifdef WOLFSSL_SMALL_STACK
-                            XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                        #endif
+                    #ifdef WOLFSSL_SMALL_STACK
+                        XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    #endif
 
                         return MEMORY_E;
                     }
@@ -11154,9 +11823,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
                 FreeDecodedCert(cert);
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
             }
 
             if (ret == 0) {
@@ -11184,14 +11853,16 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
             if (request != ssl->ctx->certOcspRequest)
                 XFREE(request, ssl->heap, DYNAMIC_TYPE_OCSP_REQUEST);
+
+            break;
         }
-        break;
 
     #endif /* HAVE_CERTIFICATE_STATUS_REQUEST    */
            /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
 
     #if defined HAVE_CERTIFICATE_STATUS_REQUEST_V2
-        case WOLFSSL_CSR2_OCSP_MULTI: {
+        case WOLFSSL_CSR2_OCSP_MULTI:
+        {
             OcspRequest* request = ssl->ctx->certOcspRequest;
             buffer responses[1 + MAX_CHAIN_DEPTH];
             int i = 0;
@@ -11214,15 +11885,15 @@ int SendCertificateStatus(WOLFSSL* ssl)
                 if (der->buffer == NULL || der->length == 0)
                     return 0;
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
-                                                   DYNAMIC_TYPE_TMP_BUFFER);
-                    if (cert == NULL)
-                        return MEMORY_E;
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                cert = (DecodedCert*)XMALLOC(sizeof(DecodedCert), NULL,
+                                               DYNAMIC_TYPE_TMP_BUFFER);
+                if (cert == NULL)
+                    return MEMORY_E;
+            #endif
 
                 InitDecodedCert(cert, der->buffer, der->length, ssl->heap);
-
+                /* TODO: Setup async support here */
                 if ((ret = ParseCertRelative(cert, CERT_TYPE, VERIFY,
                                                           ssl->ctx->cm)) != 0) {
                     WOLFSSL_MSG("ParseCert failed");
@@ -11233,9 +11904,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
                     if (request == NULL) {
                         FreeDecodedCert(cert);
 
-                        #ifdef WOLFSSL_SMALL_STACK
-                            XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                        #endif
+                    #ifdef WOLFSSL_SMALL_STACK
+                        XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    #endif
 
                         return MEMORY_E;
                     }
@@ -11255,9 +11926,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
 
                 FreeDecodedCert(cert);
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
             }
 
             if (ret == 0) {
@@ -11281,11 +11952,11 @@ int SendCertificateStatus(WOLFSSL* ssl)
                                               || ssl->buffers.weOwnCertChain)) {
                 buffer der;
                 word32 idx = 0;
-                #ifdef WOLFSSL_SMALL_STACK
-                    DecodedCert* cert = NULL;
-                #else
-                    DecodedCert  cert[1];
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                DecodedCert* cert = NULL;
+            #else
+                DecodedCert  cert[1];
+            #endif
 
                 XMEMSET(&der, 0, sizeof(buffer));
 
@@ -11307,7 +11978,7 @@ int SendCertificateStatus(WOLFSSL* ssl)
                         break;
 
                     InitDecodedCert(cert, der.buffer, der.length, ssl->heap);
-
+                    /* TODO: Setup async support here */
                     if ((ret = ParseCertRelative(cert, CERT_TYPE, VERIFY,
                                                       ssl->ctx->cm)) != 0) {
                         WOLFSSL_MSG("ParseCert failed");
@@ -11357,9 +12028,9 @@ int SendCertificateStatus(WOLFSSL* ssl)
                     FreeDecodedCert(cert);
                 }
 
-                #ifdef WOLFSSL_SMALL_STACK
-                    XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                #endif
+            #ifdef WOLFSSL_SMALL_STACK
+                XFREE(cert, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            #endif
             }
             else {
                 while (ret == 0 &&
@@ -11388,14 +12059,14 @@ int SendCertificateStatus(WOLFSSL* ssl)
                         XFREE(responses[i].buffer, ssl->heap,
                                                        DYNAMIC_TYPE_TMP_BUFFER);
             }
-        }
-        break;
 
+            break;
+        }
     #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
     #endif /* NO_WOLFSSL_SERVER */
 
         default:
-        break;
+            break;
     }
 
     return ret;
@@ -11496,9 +12167,12 @@ int SendData(WOLFSSL* ssl, const void* data, int sz)
         }
 #endif
         sendSz = BuildMessage(ssl, out, outputSz, sendBuffer, buffSz,
-                              application_data, 0, 0);
-        if (sendSz < 0)
+                                                  application_data, 0, 0, 1);
+        if (sendSz < 0) {
+            if (sendSz == WC_PENDING_E)
+                ssl->error = sendSz;
             return BUILD_MSG_ERROR;
+        }
 
         ssl->buffers.outputBuffer.length += sendSz;
 
@@ -11532,8 +12206,10 @@ int ReceiveData(WOLFSSL* ssl, byte* output, int sz, int peek)
 
     WOLFSSL_ENTER("ReceiveData()");
 
-    if (ssl->error == WANT_READ || ssl->error == WC_PENDING_E)
+    /* reset error state */
+    if (ssl->error == WANT_READ || ssl->error == WC_PENDING_E) {
         ssl->error = 0;
+    }
 
 #ifdef WOLFSSL_DTLS
     if (ssl->options.dtls) {
@@ -11682,7 +12358,7 @@ int SendAlert(WOLFSSL* ssl, int severity, int type)
        other side may not be able to handle it */
     if (IsEncryptionOn(ssl, 1) && ssl->options.handShakeDone)
         sendSz = BuildMessage(ssl, output, outputSz, input, ALERT_SIZE,
-                              alert, 0, 0);
+                                                          alert, 0, 0, 0);
     else {
 
         AddRecordHeader(output, ALERT_SIZE, alert, ssl);
@@ -13465,7 +14141,7 @@ Set the enabled cipher suites.
 
 @param [out] suites Suites structure.
 @param [in]  list   List of cipher suites, only supports full name from
-                    cipher_name[] delimited by ':'.
+                    cipher_names[] delimited by ':'.
 
 @return true on success, else false.
 */
@@ -13572,7 +14248,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
     }
 
     /* i+1 since peek a byte ahead for type */
-    for (i = 0; (i+1) < hashSigAlgoSz; i += 2) {
+    for (i = 0; (i+1) < hashSigAlgoSz; i += HELLO_EXT_SIGALGO_SZ) {
         if (hashSigAlgo[i+1] == ssl->specs.sig_algo) {
             if (hashSigAlgo[i] == sha_mac) {
                 break;
@@ -13698,7 +14374,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
                 XMEMCPY(info->packets[info->numberPackets].value, data, sz);
             else {
                 info->packets[info->numberPackets].bufferValue =
-                           XMALLOC(sz, heap, DYNAMIC_TYPE_INFO);
+                                    (byte*)XMALLOC(sz, heap, DYNAMIC_TYPE_INFO);
                 if (!info->packets[info->numberPackets].bufferValue)
                     /* let next alloc catch, just don't fill, not fatal here  */
                     info->packets[info->numberPackets].valueSz = 0;
@@ -13835,23 +14511,23 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
         output[idx++] = ssl->version.minor;
         ssl->chVersion = ssl->version;  /* store in case changed */
 
-            /* then random */
+        /* then random */
         if (ssl->options.connectState == CONNECT_BEGIN) {
             ret = wc_RNG_GenerateBlock(ssl->rng, output + idx, RAN_LEN);
             if (ret != 0)
                 return ret;
 
-                /* store random */
+            /* store random */
             XMEMCPY(ssl->arrays->clientRandom, output + idx, RAN_LEN);
         } else {
 #ifdef WOLFSSL_DTLS
-                /* send same random on hello again */
+            /* send same random on hello again */
             XMEMCPY(output + idx, ssl->arrays->clientRandom, RAN_LEN);
 #endif
         }
         idx += RAN_LEN;
 
-            /* then session id */
+        /* then session id */
         output[idx++] = (byte)idSz;
         if (idSz) {
             XMEMCPY(output + idx, ssl->session.sessionID,
@@ -13859,7 +14535,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
             idx += ssl->session.sessionIDSz;
         }
 
-            /* then DTLS cookie */
+        /* then DTLS cookie */
 #ifdef WOLFSSL_DTLS
         if (ssl->options.dtls) {
             byte cookieSz = ssl->arrays->cookieSz;
@@ -13871,13 +14547,13 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
             }
         }
 #endif
-            /* then cipher suites */
+        /* then cipher suites */
         c16toa(ssl->suites->suiteSz, output + idx);
-        idx += 2;
+        idx += OPAQUE16_LEN;
         XMEMCPY(output + idx, &ssl->suites->suites, ssl->suites->suiteSz);
         idx += ssl->suites->suiteSz;
 
-            /* last, compression */
+        /* last, compression */
         output[idx++] = COMP_LEN;
         if (ssl->options.usingCompression)
             output[idx++] = ZLIB_COMPRESSION;
@@ -13932,7 +14608,7 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
 
             XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
             sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                  handshake, 1, 0);
+                                  handshake, 1, 0, 0);
             XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
 
             if (sendSz < 0)
@@ -14489,59 +15165,59 @@ static void PickHashSigAlgo(WOLFSSL* ssl,
 #endif /* HAVE_ECC */
 
 
+/* Persistable DoServerKeyExchange arguments */
+typedef struct DskeArgs {
+    byte*  output; /* not allocated */
+#if !defined(NO_DH) || defined(HAVE_ECC)
+    byte*  verifySig;
+#endif
+    word32 idx;
+    word32 begin;
+#ifndef NO_RSA
+    int    typeH;
+#endif
+#if !defined(NO_DH) || defined(HAVE_ECC)
+    word16 verifySigSz;
+#endif
+    word16 sigSz;
+    byte   sigAlgo;
+} DskeArgs;
+
+static void FreeDskeArgs(WOLFSSL* ssl, void* pArgs)
+{
+    DskeArgs* args = (DskeArgs*)pArgs;
+
+    (void)ssl;
+    (void)args;
+
+#if !defined(NO_DH) || defined(HAVE_ECC)
+    if (args->verifySig) {
+        XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->verifySig = NULL;
+    }
+#endif
+}
+
 static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                word32* inOutIdx, word32 size)
 {
-    int    ret = 0;
-    word16 length = 0;
-    word32 idx = *inOutIdx, begin = *inOutIdx;
-#ifndef NO_RSA
-    int    typeH = 0;
+    int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    DskeArgs* args = (DskeArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    DskeArgs  args[1];
 #endif
-    byte*  output  = NULL;
-    byte   sigAlgo = ssl->specs.sig_algo;
-    word16 sigSz = 0;
-#if !defined(NO_DH) || defined(HAVE_ECC)
-    byte*  verifySig = NULL;
-#endif
-
-    (void)output;
-    (void)sigAlgo;
-    (void)sigSz;
 
     WOLFSSL_ENTER("DoServerKeyExchange");
 
 #ifdef WOLFSSL_ASYNC_CRYPT
-    ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
     if (ret != WC_NOT_PENDING_E) {
-        WOLF_EVENT_TYPE eType = ssl->event.type;
-
-        /* Clear event */
-        XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
         /* Check for error */
-        if (ret < 0) {
+        if (ret < 0)
             goto exit_dske;
-        }
-        else  {
-            /* Restore variables needed for async */
-            idx = ssl->async.idx;
-            length = ssl->async.length;
-            output = ssl->async.output;
-            sigSz = ssl->async.sigSz;
-        #ifndef NO_RSA
-            typeH = ssl->async.hashAlgo;
-        #endif
-            sigAlgo = ssl->async.sigAlgo;
-        #if !defined(NO_DH) || defined(HAVE_ECC)
-            verifySig = ssl->async.data;
-        #endif
-
-            /* Advance key share state if not wolfCrypt */
-            if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                ssl->options.keyShareState++;
-            }
-        }
     }
     else
 #endif
@@ -14549,6 +15225,13 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
         /* Reset state */
         ret = 0;
         ssl->options.keyShareState = KEYSHARE_BEGIN;
+        XMEMSET(args, 0, sizeof(DskeArgs));
+        args->idx = *inOutIdx;
+        args->begin = *inOutIdx;
+        args->sigAlgo = ssl->specs.sig_algo;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeDskeArgs;
+    #endif
     }
 
     switch(ssl->options.keyShareState)
@@ -14568,38 +15251,42 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 case psk_kea:
                 {
                     int srvHintLen;
+                    word16 length;
 
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* get PSK server hint from the wire */
                     srvHintLen = min(length, MAX_PSK_ID_LEN - 1);
-                    XMEMCPY(ssl->arrays->server_hint, input + idx, srvHintLen);
+                    XMEMCPY(ssl->arrays->server_hint, input + args->idx,
+                                                                    srvHintLen);
                     ssl->arrays->server_hint[srvHintLen] = 0;
-                    idx += length;
+                    args->idx += length;
                     break;
                 }
             #endif /* !NO_PSK */
             #ifndef NO_DH
                 case diffie_hellman_kea:
                 {
+                    word16 length;
+
                     /* p */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14610,7 +15297,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     ssl->buffers.serverDH_P.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH);
+                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_P.buffer) {
                         ssl->buffers.serverDH_P.length = length;
                     }
@@ -14618,25 +15305,26 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     ssl->options.dhKeySz = length;
 
                     /* g */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_G.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH);
+                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_G.buffer) {
                         ssl->buffers.serverDH_G.length = length;
                     }
@@ -14644,23 +15332,24 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     /* pub */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_Pub.buffer =
-                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH);
+                        (byte*)XMALLOC(length, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_Pub.buffer) {
                         ssl->buffers.serverDH_Pub.length = length;
                     }
@@ -14668,8 +15357,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
                     break;
                 }
             #endif /* !NO_DH */
@@ -14678,25 +15368,27 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 {
                     byte b;
                     int curveId, curveOid;
+                    word16 length;
 
-                    if ((idx - begin) + ENUM_LEN + OPAQUE16_LEN + OPAQUE8_LEN > size) {
+                    if ((args->idx - args->begin) + ENUM_LEN + OPAQUE16_LEN +
+                                                        OPAQUE8_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    b = input[idx++];
+                    b = input[args->idx++];
                     if (b != named_curve) {
                         ERROR_OUT(ECC_CURVETYPE_ERROR, exit_dske);
                     }
 
-                    idx += 1;   /* curve type, eat leading 0 */
-                    b = input[idx++];
+                    args->idx += 1;   /* curve type, eat leading 0 */
+                    b = input[args->idx++];
                     if ((curveOid = CheckCurveId(b)) < 0) {
                         ERROR_OUT(ECC_CURVE_ERROR, exit_dske);
                     }
                     ssl->ecdhCurveOID = curveOid;
 
-                    length = input[idx++];
-                    if ((idx - begin) + length > size) {
+                    length = input[args->idx++];
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14723,12 +15415,12 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     curveId = wc_ecc_get_oid(curveOid, NULL, NULL);
-                    if (wc_ecc_import_x963_ex(input + idx, length,
+                    if (wc_ecc_import_x963_ex(input + args->idx, length,
                                         ssl->peerEccKey, curveId) != 0) {
                         ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
                     }
 
-                    idx += length;
+                    args->idx += length;
                     ssl->peerEccKeyPresent = 1;
                     break;
                 }
@@ -14737,33 +15429,35 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 case dhe_psk_kea:
                 {
                     int srvHintLen;
+                    word16 length;
 
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* get PSK server hint from the wire */
                     srvHintLen = min(length, MAX_PSK_ID_LEN - 1);
-                    XMEMCPY(ssl->arrays->server_hint, input + idx, srvHintLen);
+                    XMEMCPY(ssl->arrays->server_hint, input + args->idx,
+                                                                srvHintLen);
                     ssl->arrays->server_hint[srvHintLen] = 0;
-                    idx += length;
+                    args->idx += length;
 
                     /* p */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -14774,7 +15468,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     ssl->buffers.serverDH_P.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_DH);
+                                                ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_P.buffer) {
                         ssl->buffers.serverDH_P.length = length;
                     }
@@ -14782,25 +15476,26 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_P.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     ssl->options.dhKeySz = length;
 
                     /* g */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_G.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_DH);
+                                                ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_G.buffer) {
                         ssl->buffers.serverDH_G.length = length;
                     }
@@ -14808,23 +15503,24 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_G.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
 
                     /* pub */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     ssl->buffers.serverDH_Pub.buffer = (byte*)XMALLOC(length,
-                                                ssl->heap, DYNAMIC_TYPE_DH);
+                                                ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                     if (ssl->buffers.serverDH_Pub.buffer) {
                         ssl->buffers.serverDH_Pub.length = length;
                     }
@@ -14832,8 +15528,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         ERROR_OUT(MEMORY_ERROR, exit_dske);
                     }
 
-                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + idx, length);
-                    idx += length;
+                    XMEMCPY(ssl->buffers.serverDH_Pub.buffer, input + args->idx,
+                                                                        length);
+                    args->idx += length;
                     break;
                 }
             #endif /* !NO_DH || !NO_PSK */
@@ -14843,75 +15540,78 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     byte b;
                     int curveOid, curveId;
                     int srvHintLen;
+                    word16 length;
 
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &length);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* get PSK server hint from the wire */
                     srvHintLen = min(length, MAX_PSK_ID_LEN - 1);
-                    XMEMCPY(ssl->arrays->server_hint, input + idx, srvHintLen);
+                    XMEMCPY(ssl->arrays->server_hint, input + args->idx, srvHintLen);
                     ssl->arrays->server_hint[srvHintLen] = 0;
 
-                    idx += length;
+                    args->idx += length;
 
-                    if ((idx - begin) + ENUM_LEN + OPAQUE16_LEN +
+                    if ((args->idx - args->begin) + ENUM_LEN + OPAQUE16_LEN +
                         OPAQUE8_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     /* Check curve name and ID */
-                    b = input[idx++];
+                    b = input[args->idx++];
                     if (b != named_curve) {
                         ERROR_OUT(ECC_CURVETYPE_ERROR, exit_dske);
                     }
 
-                    idx += 1;   /* curve type, eat leading 0 */
-                    b = input[idx++];
+                    args->idx += 1;   /* curve type, eat leading 0 */
+                    b = input[args->idx++];
                     if ((curveOid = CheckCurveId(b)) < 0) {
                         ERROR_OUT(ECC_CURVE_ERROR, exit_dske);
                     }
 
-                    length = input[idx++];
-                    if ((idx - begin) + length > size) {
+                    length = input[args->idx++];
+                    if ((args->idx - args->begin) + length > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     if (ssl->peerEccKey == NULL) {
                         /* alloc/init on demand */
                         ssl->peerEccKey = (ecc_key*)XMALLOC(sizeof(ecc_key),
-                                                     ssl->heap, DYNAMIC_TYPE_ECC);
+                                                   ssl->heap, DYNAMIC_TYPE_ECC);
                         if (ssl->peerEccKey == NULL) {
                             WOLFSSL_MSG("PeerEccKey Memory error");
                             ERROR_OUT(MEMORY_E, exit_dske);
                         }
-                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap, ssl->devId);
+                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
+                                                                    ssl->devId);
                         if (ret != 0) {
                             goto exit_dske;
                         }
                     } else if (ssl->peerEccKeyPresent) {  /* don't leak on reuse */
                         wc_ecc_free(ssl->peerEccKey);
                         ssl->peerEccKeyPresent = 0;
-                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap, ssl->devId);
+                        ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
+                                                                    ssl->devId);
                         if (ret != 0) {
                             goto exit_dske;
                         }
                     }
 
                     curveId = wc_ecc_get_oid(curveOid, NULL, NULL);
-                    if (wc_ecc_import_x963_ex(input + idx, length,
+                    if (wc_ecc_import_x963_ex(input + args->idx, length,
                         ssl->peerEccKey, curveId) != 0) {
                         ERROR_OUT(ECC_PEERKEY_ERROR, exit_dske);
                     }
 
-                    idx += length;
+                    args->idx += length;
                     ssl->peerEccKeyPresent = 1;
                     break;
                 }
@@ -14955,34 +15655,35 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         break;
                     }
 
-                    verifySz = (word16)(idx - begin);
+                    verifySz = (word16)(args->idx - args->begin);
                     if (verifySz > MAX_DH_SZ) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
                     if (IsAtLeastTLSv1_2(ssl)) {
-                        if ((idx - begin) + ENUM_LEN + ENUM_LEN > size) {
+                        if ((args->idx - args->begin) + ENUM_LEN + ENUM_LEN >
+                                                                        size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dske);
                         }
 
-                        hashAlgo = input[idx++];
-                        sigAlgo  = input[idx++];
+                        hashAlgo = input[args->idx++];
+                        args->sigAlgo  = input[args->idx++];
 
                         switch (hashAlgo) {
                             case sha512_mac:
-                                #ifdef WOLFSSL_SHA512
-                                    hashType = WC_HASH_TYPE_SHA512;
-                                #endif
+                            #ifdef WOLFSSL_SHA512
+                                hashType = WC_HASH_TYPE_SHA512;
+                            #endif
                                 break;
                             case sha384_mac:
-                                #ifdef WOLFSSL_SHA384
-                                    hashType = WC_HASH_TYPE_SHA384;
-                                #endif
+                            #ifdef WOLFSSL_SHA384
+                                hashType = WC_HASH_TYPE_SHA384;
+                            #endif
                                 break;
                             case sha256_mac:
-                                #ifndef NO_SHA256
-                                    hashType = WC_HASH_TYPE_SHA256;
-                                #endif
+                            #ifndef NO_SHA256
+                                hashType = WC_HASH_TYPE_SHA256;
+                            #endif
                                 break;
                             case sha_mac:
                                 #if !defined(NO_SHA) && \
@@ -15003,7 +15704,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         /* only using sha and md5 for rsa */
                         #ifndef NO_OLD_TLS
                             hashType = WC_HASH_TYPE_SHA;
-                            if (sigAlgo == rsa_sa_algo) {
+                            if (args->sigAlgo == rsa_sa_algo) {
                                 hashType = WC_HASH_TYPE_MD5_SHA;
                             }
                         #else
@@ -15011,18 +15712,18 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         #endif
                     }
                 #ifndef NO_RSA
-                    typeH = wc_HashGetOID(hashType);
+                    args->typeH = wc_HashGetOID(hashType);
                 #endif
 
                     /* signature */
-                    if ((idx - begin) + OPAQUE16_LEN > size) {
+                    if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
-                    ato16(input + idx, &length);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &args->verifySigSz);
+                    args->idx += OPAQUE16_LEN;
 
-                    if ((idx - begin) + length > size) {
+                    if ((args->idx - args->begin) + args->verifySigSz > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dske);
                     }
 
@@ -15049,7 +15750,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     XMEMCPY(&ssl->buffers.sig.buffer[RAN_LEN],
                         ssl->arrays->serverRandom, RAN_LEN);
                     XMEMCPY(&ssl->buffers.sig.buffer[RAN_LEN * 2],
-                        input + begin, verifySz); /* message */
+                        input + args->begin, verifySz); /* message */
 
                     /* Perform hash */
                     ret = wc_Hash(hashType,
@@ -15059,7 +15760,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         goto exit_dske;
                     }
 
-                    switch (sigAlgo)
+                    switch (args->sigAlgo)
                     {
                     #ifndef NO_RSA
                         case rsa_sa_algo:
@@ -15083,7 +15784,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
 
                     default:
                         ret = ALGO_ID_E;
-                    } /* switch (sigAlgo) */
+                    } /* switch (args->sigAlgo) */
 
             #endif /* NO_DH && !HAVE_ECC */
                     break;
@@ -15123,23 +15824,24 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         break;
                     }
 
-                    if (verifySig == NULL) {
-                        verifySig = (byte*)XMALLOC(length, ssl->heap,
-                                                    DYNAMIC_TYPE_TMP_BUFFER);
-                        if (!verifySig) {
+                    if (args->verifySig == NULL) {
+                        args->verifySig = (byte*)XMALLOC(args->verifySigSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->verifySig == NULL) {
                             ERROR_OUT(MEMORY_E, exit_dske);
                         }
-                        XMEMCPY(verifySig, input + idx, length);
+                        XMEMCPY(args->verifySig, input + args->idx,
+                                                            args->verifySigSz);
                     }
 
-                    switch (sigAlgo)
+                    switch (args->sigAlgo)
                     {
                     #ifndef NO_RSA
                         case rsa_sa_algo:
                         {
                             ret = RsaVerify(ssl,
-                                verifySig, length,
-                                &output,
+                                args->verifySig, args->verifySigSz,
+                                &args->output,
                                 ssl->peerRsaKey,
                             #ifdef HAVE_PK_CALLBACKS
                                 ssl->buffers.peerRsaKey.buffer,
@@ -15151,7 +15853,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                             );
 
                             if (ret >= 0) {
-                                sigSz = (word16)ret;
+                                args->sigSz = (word16)ret;
                                 ret = 0;
                             }
                             break;
@@ -15161,7 +15863,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                         case ecc_dsa_sa_algo:
                         {
                             ret = EccVerify(ssl,
-                                verifySig, length,
+                                args->verifySig, args->verifySigSz,
                                 ssl->buffers.digest.buffer,
                                 ssl->buffers.digest.length,
                                 ssl->peerEccDsaKey,
@@ -15173,6 +15875,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                 NULL, 0, NULL
                             #endif
                             );
+
                             break;
                         }
                     #endif /* HAVE_ECC */
@@ -15219,9 +15922,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                     }
 
                     /* increment index after verify is done */
-                    idx += length;
+                    args->idx += args->verifySigSz;
 
-                    switch(sigAlgo)
+                    switch(args->sigAlgo)
                     {
                     #ifndef NO_RSA
                         case rsa_sa_algo:
@@ -15244,9 +15947,9 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
 
                                 encSigSz = wc_EncodeSignature(encodedSig,
                                     ssl->buffers.digest.buffer,
-                                    ssl->buffers.digest.length, typeH);
-                                if (encSigSz != sigSz || !output ||
-                                    XMEMCMP(output, encodedSig,
+                                    ssl->buffers.digest.length, args->typeH);
+                                if (encSigSz != args->sigSz || !args->output ||
+                                    XMEMCMP(args->output, encodedSig,
                                             min(encSigSz, MAX_ENCODED_SIG_SZ)) != 0) {
                                     ret = VERIFY_SIGN_ERROR;
                                 }
@@ -15257,9 +15960,11 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                                     goto exit_dske;
                                 }
                             }
-                            else if (sigSz != FINISHED_SZ || !output ||
-                                XMEMCMP(output, ssl->buffers.digest.buffer,
-                                                        FINISHED_SZ) != 0) {
+                            else if (args->sigSz != FINISHED_SZ ||
+                                    !args->output ||
+                                    XMEMCMP(args->output,
+                                            ssl->buffers.digest.buffer,
+                                            FINISHED_SZ) != 0) {
                                 ERROR_OUT(VERIFY_SIGN_ERROR, exit_dske);
                             }
                             break;
@@ -15292,7 +15997,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
         case KEYSHARE_FINALIZE:
         {
             if (IsEncryptionOn(ssl, 0)) {
-                idx += ssl->keys.padSz;
+                args->idx += ssl->keys.padSz;
             }
 
             /* QSH extensions */
@@ -15302,17 +16007,17 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
                 int    qshSz;
 
                 /* extension name */
-                ato16(input + idx, &name);
-                idx += OPAQUE16_LEN;
+                ato16(input + args->idx, &name);
+                args->idx += OPAQUE16_LEN;
 
                 if (name == TLSX_QUANTUM_SAFE_HYBRID) {
                     /* if qshSz is larger than 0 it is the length of
                        buffer used */
-                    if ((qshSz = TLSX_QSHCipher_Parse(ssl, input + idx,
+                    if ((qshSz = TLSX_QSHCipher_Parse(ssl, input + args->idx,
                                                        size, 0)) < 0) {
                         ERROR_OUT(qshSz, exit_dske);
                     }
-                    idx += qshSz;
+                    args->idx += qshSz;
                 }
                 else {
                     /* unknown extension sent server ignored handshake */
@@ -15333,7 +16038,7 @@ static int DoServerKeyExchange(WOLFSSL* ssl, const byte* input,
         case KEYSHARE_END:
         {
             /* return index */
-            *inOutIdx = idx;
+            *inOutIdx = args->idx;
 
             ssl->options.serverState = SERVER_KEYEXCHANGE_COMPLETE;
             break;
@@ -15346,44 +16051,18 @@ exit_dske:
 
     WOLFSSL_LEAVE("DoServerKeyExchange", ret);
 
-    /* Handle cleanup for stack variables here */
-
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* Handle WC_PENDING_E */
+    /* Handle async operation */
     if (ret == WC_PENDING_E) {
-        /* Store variables needed for async */
-        XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-        ssl->async.idx = idx;
-        ssl->async.length = length;
-        ssl->async.output = output;
-        ssl->async.sigSz = sigSz;
-    #ifndef NO_RSA
-        ssl->async.hashAlgo = typeH;
-    #endif
-        ssl->async.sigAlgo = sigAlgo;
-    #if !defined(NO_DH) || defined(HAVE_ECC)
-        ssl->async.data = verifySig;
-    #endif
-
         /* Mark message as not recevied so it can process again */
         ssl->msgsReceived.got_server_key_exchange = 0;
 
-        /* Push event to queue */
-        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-        if (ret == 0) {
-            return WC_PENDING_E;
-        }
+        return ret;
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
-#if !defined(NO_DH) || defined(HAVE_ECC)
-    if (verifySig) {
-        XFREE(verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        verifySig = NULL;
-    }
-#endif
-
     /* Final cleanup */
+    FreeDskeArgs(ssl, args);
     FreeKeyExchange(ssl);
 
     return ret;
@@ -15524,6 +16203,11 @@ static int QSH_Encrypt(QSHKey* key, byte* in, word32 szIn,
     int ret = 0;
     word16 size = *szOut;
 
+    (void)in;
+    (void)szIn;
+    (void)out;
+    (void)szOut;
+
     WOLFSSL_MSG("Encrypting QSH key material");
 
     switch (key->name) {
@@ -15546,12 +16230,16 @@ static int QSH_Encrypt(QSHKey* key, byte* in, word32 szIn,
 
 
 /* Decrypt using Quantum Safe Handshake algorithms */
-int QSH_Decrypt(QSHKey* key, byte* in, word32 szIn,
-                                                       byte* out, word16* szOut)
+int QSH_Decrypt(QSHKey* key, byte* in, word32 szIn, byte* out, word16* szOut)
 {
     int ret = 0;
     word16 size = *szOut;
 
+    (void)in;
+    (void)szIn;
+    (void)out;
+    (void)szOut;
+
     WOLFSSL_MSG("Decrypting QSH key material");
 
     switch (key->name) {
@@ -15578,12 +16266,14 @@ int QSH_Decrypt(QSHKey* key, byte* in, word32 szIn,
  */
 static word32 QSH_MaxSecret(QSHKey* key)
 {
+    int ret = 0;
+#ifdef HAVE_NTRU
     byte isNtru = 0;
     word16 inSz = 48;
     word16 outSz;
     DRBG_HANDLE drbg = 0;
     byte bufIn[48];
-    int ret = 0;
+#endif
 
     if (key == NULL || key->pub.length == 0)
         return 0;
@@ -15605,6 +16295,7 @@ static word32 QSH_MaxSecret(QSHKey* key)
             return 0;
     }
 
+#ifdef HAVE_NTRU
     if (isNtru) {
         ret = ntru_crypto_drbg_external_instantiate(GetEntropy, &drbg);
         if (ret != DRBG_OK)
@@ -15615,10 +16306,11 @@ static word32 QSH_MaxSecret(QSHKey* key)
             return NTRU_ENCRYPT_ERROR;
         }
         ntru_crypto_drbg_uninstantiate(drbg);
-        return outSz;
+        ret = outSz;
     }
+#endif
 
-    return 0;
+    return ret;
 }
 
 /* Generate the secret byte material for pms
@@ -15759,47 +16451,51 @@ static word32 QSH_KeyExchangeWrite(WOLFSSL* ssl, byte isServer)
 #endif /* HAVE_QSH */
 
 
+typedef struct SckeArgs {
+    byte*  output; /* not allocated */
+    byte*  encSecret;
+    byte*  input;
+    word32 encSz;
+    word32 length;
+    int    sendSz;
+    int    inputSz;
+} SckeArgs;
+
+static void FreeSckeArgs(WOLFSSL* ssl, void* pArgs)
+{
+    SckeArgs* args = (SckeArgs*)pArgs;
+
+    (void)ssl;
+
+    if (args->encSecret) {
+        XFREE(args->encSecret, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->encSecret = NULL;
+    }
+    if (args->input) {
+        XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->input = NULL;
+    }
+}
+
 int SendClientKeyExchange(WOLFSSL* ssl)
 {
     int ret = 0;
-    int sendSz = 0;
-    word32 length_lcl = 0;
-    word32* length = &length_lcl;
-    byte* output = NULL;
-    byte* encSecret = NULL;
-    word32 encSz = 0;
-
-    (void)length;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    SckeArgs* args = (SckeArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    SckeArgs  args[1];
+#endif
 
     WOLFSSL_ENTER("SendClientKeyExchange");
 
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* use async pointer for length */
-    length = &ssl->async.length;
-
-    ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
     if (ret != WC_NOT_PENDING_E) {
-        WOLF_EVENT_TYPE eType = ssl->event.type;
-
-        /* Clear event */
-        XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
         /* Check for error */
-        if (ret < 0) {
+        if (ret < 0)
             goto exit_scke;
-        }
-        else {
-            /* Restore variables needed for async */
-            output = ssl->async.output;
-            sendSz = ssl->async.sendSz;
-            encSecret = ssl->async.data;
-            encSz = ssl->async.sigSz;
-
-            /* Advance key share state if not wolfCrypt */
-            if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                ssl->options.keyShareState++;
-            }
-        }
     }
     else
 #endif
@@ -15807,6 +16503,10 @@ int SendClientKeyExchange(WOLFSSL* ssl)
         /* Reset state */
         ret = 0;
         ssl->options.keyShareState = KEYSHARE_BEGIN;
+        XMEMSET(args, 0, sizeof(SckeArgs));
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeSckeArgs;
+    #endif
     }
 
     switch(ssl->options.keyShareState)
@@ -15877,20 +16577,14 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 #endif
 
                     /* create private key */
-                    ssl->sigKey = XMALLOC(sizeof(ecc_key),
-                                               ssl->heap, DYNAMIC_TYPE_ECC);
-                    if (ssl->sigKey == NULL) {
-                        ERROR_OUT(MEMORY_E, exit_scke);
-                    }
-                    ssl->sigType = DYNAMIC_TYPE_ECC;
-
-                    ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap,
-                                                                ssl->devId);
+                    ssl->hsType = DYNAMIC_TYPE_ECC;
+                    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                     if (ret != 0) {
                         goto exit_scke;
                     }
-                    ret = EccMakeKey(ssl, (ecc_key*)ssl->sigKey,
-                                                            ssl->peerEccKey);
+
+                    ret = EccMakeKey(ssl, (ecc_key*)ssl->hsKey, ssl->peerEccKey);
+
                     break;
             #endif /* HAVE_ECC && !NO_PSK */
             #ifdef HAVE_NTRU
@@ -15933,19 +16627,14 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
 
                     /* create private key */
-                    ssl->sigKey = XMALLOC(sizeof(ecc_key),
-                                               ssl->heap, DYNAMIC_TYPE_ECC);
-                    if (ssl->sigKey == NULL) {
-                        ERROR_OUT(MEMORY_E, exit_scke);
-                    }
-                    ssl->sigType = DYNAMIC_TYPE_ECC;
-
-                    ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap,
-                                                                ssl->devId);
+                    ssl->hsType = DYNAMIC_TYPE_ECC;
+                    ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                     if (ret != 0) {
                         goto exit_scke;
                     }
-                    ret = EccMakeKey(ssl, (ecc_key*)ssl->sigKey, peerKey);
+
+                    ret = EccMakeKey(ssl, (ecc_key*)ssl->hsKey, peerKey);
+
                     break;
                 }
             #endif /* HAVE_ECC */
@@ -15965,10 +16654,10 @@ int SendClientKeyExchange(WOLFSSL* ssl)
 
         case KEYSHARE_BUILD:
         {
-            encSz = MAX_ENCRYPT_SZ;
-            encSecret = (byte*)XMALLOC(MAX_ENCRYPT_SZ, ssl->heap,
-                                                   DYNAMIC_TYPE_TMP_BUFFER);
-            if (encSecret == NULL) {
+            args->encSz = MAX_ENCRYPT_SZ;
+            args->encSecret = (byte*)XMALLOC(args->encSz, ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+            if (args->encSecret == NULL) {
                 ERROR_OUT(MEMORY_E, exit_scke);
             }
 
@@ -15998,6 +16687,26 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     if (ssl->buffers.sig.buffer == NULL) {
                         ERROR_OUT(MEMORY_E, exit_scke);
                     }
+
+                    ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                        ssl->buffers.serverDH_P.buffer,
+                        ssl->buffers.serverDH_P.length,
+                        ssl->buffers.serverDH_G.buffer,
+                        ssl->buffers.serverDH_G.length);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    /* for DH, encSecret is Yc, agree is pre-master */
+                    ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                        args->encSecret, &args->encSz);
                     break;
                 }
             #endif /* !NO_DH */
@@ -16012,23 +16721,24 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         ssl->arrays->psk_keySz > MAX_PSK_KEY_LEN) {
                         ERROR_OUT(PSK_KEY_ERROR, exit_scke);
                     }
-                    encSz = (word32)XSTRLEN(ssl->arrays->client_identity);
-                    if (encSz > MAX_PSK_ID_LEN) {
+                    args->encSz = (word32)XSTRLEN(ssl->arrays->client_identity);
+                    if (args->encSz > MAX_PSK_ID_LEN) {
                         ERROR_OUT(CLIENT_ID_ERROR, exit_scke);
                     }
-                    XMEMCPY(encSecret,
-                        ssl->arrays->client_identity, encSz);
+                    XMEMCPY(args->encSecret, ssl->arrays->client_identity,
+                                                                args->encSz);
 
                     /* make psk pre master secret */
                     /* length of key + length 0s + length of key + key */
                     c16toa((word16)ssl->arrays->psk_keySz, pms);
-                    pms += 2;
+                    pms += OPAQUE16_LEN;
                     XMEMSET(pms, 0, ssl->arrays->psk_keySz);
                     pms += ssl->arrays->psk_keySz;
                     c16toa((word16)ssl->arrays->psk_keySz, pms);
-                    pms += 2;
+                    pms += OPAQUE16_LEN;
                     XMEMCPY(pms, ssl->arrays->psk_key, ssl->arrays->psk_keySz);
-                    ssl->arrays->preMasterSz = ssl->arrays->psk_keySz * 2 + 4;
+                    ssl->arrays->preMasterSz = (ssl->arrays->psk_keySz * 2) +
+                        (2 * OPAQUE16_LEN);
                     ForceZero(ssl->arrays->psk_key, ssl->arrays->psk_keySz);
                     ssl->arrays->psk_keySz = 0; /* No further need */
                     break;
@@ -16038,7 +16748,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 case dhe_psk_kea:
                 {
                     word32 esSz = 0;
-                    output = encSecret;
+                    args->output = args->encSecret;
 
                     ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
                          ssl->arrays->server_hint, ssl->arrays->client_identity,
@@ -16060,13 +16770,33 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         ERROR_OUT(MEMORY_E, exit_scke);
                     }
 
-                    c16toa((word16)esSz, output);
-                    output += OPAQUE16_LEN;
-                    XMEMCPY(output, ssl->arrays->client_identity, esSz);
-                    output += esSz;
-                    encSz = esSz + OPAQUE16_LEN;
+                    c16toa((word16)esSz, args->output);
+                    args->output += OPAQUE16_LEN;
+                    XMEMCPY(args->output, ssl->arrays->client_identity, esSz);
+                    args->output += esSz;
+                    args->encSz = esSz + OPAQUE16_LEN;
 
-                    *length = 0;
+                    args->length = 0;
+
+                    ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                        ssl->buffers.serverDH_P.buffer,
+                        ssl->buffers.serverDH_P.length,
+                        ssl->buffers.serverDH_G.buffer,
+                        ssl->buffers.serverDH_G.length);
+                    if (ret != 0) {
+                        goto exit_scke;
+                    }
+
+                    /* for DH, encSecret is Yc, agree is pre-master */
+                    ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
+                        args->output + OPAQUE16_LEN, &args->length);
                     break;
                 }
             #endif /* !NO_DH && !NO_PSK */
@@ -16074,7 +16804,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 case ecdhe_psk_kea:
                 {
                     word32 esSz = 0;
-                    output = encSecret;
+                    args->output = args->encSecret;
 
                     /* Send PSK client identity */
                     ssl->arrays->psk_keySz = ssl->options.client_psk_cb(ssl,
@@ -16091,14 +16821,18 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
 
                     /* place size and identity in output buffer sz:identity */
-                    c16toa((word16)esSz, output);
-                    output += OPAQUE16_LEN;
-                    XMEMCPY(output, ssl->arrays->client_identity, esSz);
-                    output += esSz;
-                    encSz = esSz + OPAQUE16_LEN;
+                    c16toa((word16)esSz, args->output);
+                    args->output += OPAQUE16_LEN;
+                    XMEMCPY(args->output, ssl->arrays->client_identity, esSz);
+                    args->output += esSz;
+                    args->encSz = esSz + OPAQUE16_LEN;
 
                     /* length is used for public key size */
-                    *length = MAX_ENCRYPT_SZ;
+                    args->length = MAX_ENCRYPT_SZ;
+
+                    /* Create shared ECC key leaving room at the begining
+                       of buffer for size of shared key. */
+                    ssl->arrays->preMasterSz = ENCRYPT_LEN - OPAQUE16_LEN;
 
                 #ifdef HAVE_PK_CALLBACKS
                     /* if callback then use it for shared secret */
@@ -16107,12 +16841,13 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
                 #endif
 
-                    /* Place ECC key in buffer, leaving room for size */
-                    ret = wc_ecc_export_x963((ecc_key*)ssl->sigKey,
-                                            output + OPAQUE8_LEN, length);
+                    /* Place ECC key in output buffer, leaving room for size */
+                    ret = wc_ecc_export_x963((ecc_key*)ssl->hsKey,
+                                    args->output + OPAQUE8_LEN, &args->length);
                     if (ret != 0) {
                         ERROR_OUT(ECC_EXPORT_ERROR, exit_scke);
                     }
+
                     break;
                 }
             #endif /* HAVE_ECC && !NO_PSK */
@@ -16126,14 +16861,16 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     }
 
                     ssl->arrays->preMasterSz = SECRET_LEN;
-                    encSz = MAX_ENCRYPT_SZ;
+                    args->encSz = MAX_ENCRYPT_SZ;
                     break;
                 }
             #endif /* HAVE_NTRU */
             #ifdef HAVE_ECC
                 case ecc_diffie_hellman_kea:
                 {
-                #ifdef HAVE_PK_CALLBACKS
+                    ssl->arrays->preMasterSz = ENCRYPT_LEN;
+
+                 #ifdef HAVE_PK_CALLBACKS
                     /* if callback then use it for shared secret */
                     if (ssl->ctx->EccSharedSecretCb != NULL) {
                         break;
@@ -16141,8 +16878,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 #endif
 
                     /* Place ECC key in buffer, leaving room for size */
-                    ret = wc_ecc_export_x963((ecc_key*)ssl->sigKey,
-                                        encSecret + OPAQUE8_LEN, &encSz);
+                    ret = wc_ecc_export_x963((ecc_key*)ssl->hsKey,
+                                args->encSecret + OPAQUE8_LEN, &args->encSz);
                     if (ret != 0) {
                         ERROR_OUT(ECC_EXPORT_ERROR, exit_scke);
                     }
@@ -16172,7 +16909,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 {
                     ret = RsaEnc(ssl,
                         ssl->arrays->preMasterSecret, SECRET_LEN,
-                        encSecret, &encSz,
+                        args->encSecret, &args->encSz,
                         ssl->peerRsaKey,
                     #if defined(HAVE_PK_CALLBACKS)
                         ssl->buffers.peerRsaKey.buffer,
@@ -16182,19 +16919,15 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         NULL, 0, NULL
                     #endif
                     );
+
                     break;
                 }
             #endif /* !NO_RSA */
             #ifndef NO_DH
                 case diffie_hellman_kea:
                 {
-                    ret = DhAgree(ssl,
-                        ssl->buffers.serverDH_P.buffer,
-                        ssl->buffers.serverDH_P.length,
-                        ssl->buffers.serverDH_G.buffer,
-                        ssl->buffers.serverDH_G.length,
-                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
-                        encSecret, &encSz,
+                    ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, ssl->buffers.sig.length,
                         ssl->buffers.serverDH_Pub.buffer,
                         ssl->buffers.serverDH_Pub.length,
                         ssl->arrays->preMasterSecret,
@@ -16211,13 +16944,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #if !defined(NO_DH) && !defined(NO_PSK)
                 case dhe_psk_kea:
                 {
-                    ret = DhAgree(ssl,
-                        ssl->buffers.serverDH_P.buffer,
-                        ssl->buffers.serverDH_P.length,
-                        ssl->buffers.serverDH_G.buffer,
-                        ssl->buffers.serverDH_G.length,
-                        ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
-                        output + OPAQUE16_LEN, length,
+                    ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
+                        ssl->buffers.sig.buffer, ssl->buffers.sig.length,
                         ssl->buffers.serverDH_Pub.buffer,
                         ssl->buffers.serverDH_Pub.length,
                         ssl->arrays->preMasterSecret + OPAQUE16_LEN,
@@ -16228,13 +16956,9 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #if defined(HAVE_ECC) && !defined(NO_PSK)
                 case ecdhe_psk_kea:
                 {
-                    /* Create shared ECC key leaving room at the begining
-                       of buffer for size of shared key. */
-                    ssl->arrays->preMasterSz = ENCRYPT_LEN - OPAQUE16_LEN;
-
-                    ret = EccSharedSecret(ssl,
-                        (ecc_key*)ssl->sigKey, ssl->peerEccKey,
-                        output + OPAQUE8_LEN, length,
+                    ecc_key* key = (ecc_key*)ssl->hsKey;
+                    ret = EccSharedSecret(ssl, key, ssl->peerEccKey,
+                        args->output + OPAQUE8_LEN, &args->length,
                         ssl->arrays->preMasterSecret + OPAQUE16_LEN,
                         &ssl->arrays->preMasterSz,
                         WOLFSSL_CLIENT_END,
@@ -16261,8 +16985,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                                                   ssl->peerNtruKey,
                                                   ssl->arrays->preMasterSz,
                                                   ssl->arrays->preMasterSecret,
-                                                  (word16*)&encSz,
-                                                  encSecret);
+                                                  (word16*)&args->encSz,
+                                                  args->encSecret);
                     ntru_crypto_drbg_uninstantiate(drbg);
                     if (rc != NTRU_OK) {
                         ERROR_OUT(NTRU_ENCRYPT_ERROR, exit_scke);
@@ -16274,14 +16998,13 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #ifdef HAVE_ECC
                 case ecc_diffie_hellman_kea:
                 {
+                    ecc_key* key = (ecc_key*)ssl->hsKey;
                     ecc_key* peerKey = (ssl->specs.static_ecdh) ?
                                 ssl->peerEccDsaKey : ssl->peerEccKey;
 
-                    ssl->arrays->preMasterSz = ENCRYPT_LEN;
-
                     ret = EccSharedSecret(ssl,
-                        (ecc_key*)ssl->sigKey, peerKey,
-                        encSecret + OPAQUE8_LEN, &encSz,
+                        key, peerKey,
+                        args->encSecret + OPAQUE8_LEN, &args->encSz,
                         ssl->arrays->preMasterSecret,
                         &ssl->arrays->preMasterSz,
                         WOLFSSL_CLIENT_END,
@@ -16291,6 +17014,7 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                         NULL
                     #endif
                     );
+
                     break;
                 }
             #endif /* HAVE_ECC */
@@ -16333,15 +17057,15 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             #if !defined(NO_DH) && !defined(NO_PSK)
                 case dhe_psk_kea:
                 {
-                    byte*  pms = ssl->arrays->preMasterSecret;
+                    byte* pms = ssl->arrays->preMasterSecret;
 
                     /* validate args */
-                    if (output == NULL || *length == 0) {
+                    if (args->output == NULL || args->length == 0) {
                         ERROR_OUT(BAD_FUNC_ARG, exit_scke);
                     }
 
-                    c16toa((word16)*length, output);
-                    encSz += *length + OPAQUE16_LEN;
+                    c16toa((word16)args->length, args->output);
+                    args->encSz += args->length + OPAQUE16_LEN;
                     c16toa((word16)ssl->arrays->preMasterSz, pms);
                     ssl->arrays->preMasterSz += OPAQUE16_LEN;
                     pms += ssl->arrays->preMasterSz;
@@ -16364,13 +17088,13 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                     byte* pms = ssl->arrays->preMasterSecret;
 
                     /* validate args */
-                    if (output == NULL || *length > ENCRYPT_LEN) {
+                    if (args->output == NULL || args->length > ENCRYPT_LEN) {
                         ERROR_OUT(BAD_FUNC_ARG, exit_scke);
                     }
 
                     /* place size of public key in output buffer */
-                    *output = (byte)*length;
-                    encSz += *length + OPAQUE8_LEN;
+                    *args->output = (byte)args->length;
+                    args->encSz += args->length + OPAQUE8_LEN;
 
                     /* Create pre master secret is the concatination of
                        eccSize + eccSharedKey + pskSize + pskKey */
@@ -16399,8 +17123,8 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 case ecc_diffie_hellman_kea:
                 {
                     /* place size of public key in buffer */
-                    *encSecret = (byte)encSz;
-                    encSz += OPAQUE8_LEN;
+                    *args->encSecret = (byte)args->encSz;
+                    args->encSz += OPAQUE8_LEN;
                     break;
                 }
             #endif /* HAVE_ECC */
@@ -16440,50 +17164,50 @@ int SendClientKeyExchange(WOLFSSL* ssl)
                 tlsSz = 0;
             }
 
-            idx    = HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
-            sendSz = encSz + tlsSz + idx;
+            idx = HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+            args->sendSz = args->encSz + tlsSz + idx;
 
         #ifdef WOLFSSL_DTLS
             if (ssl->options.dtls) {
                 idx    += DTLS_HANDSHAKE_EXTRA + DTLS_RECORD_EXTRA;
-                sendSz += DTLS_HANDSHAKE_EXTRA + DTLS_RECORD_EXTRA;
+                args->sendSz += DTLS_HANDSHAKE_EXTRA + DTLS_RECORD_EXTRA;
             }
         #endif
 
             if (IsEncryptionOn(ssl, 1)) {
-                sendSz += MAX_MSG_EXTRA;
+                args->sendSz += MAX_MSG_EXTRA;
             }
 
         #ifdef HAVE_QSH
-            encSz += qshSz;
-            sendSz += qshSz;
+            args->encSz += qshSz;
+            args->sendSz += qshSz;
         #endif
 
             /* check for available size */
-            if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+            if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                 goto exit_scke;
             }
 
             /* get output buffer */
-            output = ssl->buffers.outputBuffer.buffer +
-                     ssl->buffers.outputBuffer.length;
+            args->output = ssl->buffers.outputBuffer.buffer +
+                           ssl->buffers.outputBuffer.length;
 
         #ifdef HAVE_QSH
             if (ssl->peerQSHKeyPresent) {
                 byte idxSave = idx;
-                idx = sendSz - qshSz;
+                idx = args->sendSz - qshSz;
 
                 if (QSH_KeyExchangeWrite(ssl, 0) != 0) {
                     ERROR_OUT(MEMORY_E, exit_scke);
                 }
 
                 /* extension type */
-                c16toa(TLSX_QUANTUM_SAFE_HYBRID, output + idx);
+                c16toa(TLSX_QUANTUM_SAFE_HYBRID, args->output + idx);
                 idx += OPAQUE16_LEN;
 
                 /* write to output and check amount written */
-                if (TLSX_QSHPK_Write(ssl->QSH_secret->list, output + idx)
-                                                     > qshSz - OPAQUE16_LEN) {
+                if (TLSX_QSHPK_Write(ssl->QSH_secret->list,
+                            args->output + idx) > qshSz - OPAQUE16_LEN) {
                     ERROR_OUT(MEMORY_E, exit_scke);
                 }
 
@@ -16491,64 +17215,31 @@ int SendClientKeyExchange(WOLFSSL* ssl)
             }
         #endif
 
-            AddHeaders(output, encSz + tlsSz, client_key_exchange, ssl);
+            AddHeaders(args->output, args->encSz + tlsSz, client_key_exchange, ssl);
 
         #ifdef HAVE_QSH
             if (ssl->peerQSHKeyPresent) {
-                encSz -= qshSz;
+                args->encSz -= qshSz;
             }
         #endif
             if (tlsSz) {
-                c16toa((word16)encSz, &output[idx]);
-                idx += 2;
+                c16toa((word16)args->encSz, &args->output[idx]);
+                idx += OPAQUE16_LEN;
             }
-            XMEMCPY(output + idx, encSecret, encSz);
-            idx += encSz;
+            XMEMCPY(args->output + idx, args->encSecret, args->encSz);
+            idx += args->encSz;
 
             if (IsEncryptionOn(ssl, 1)) {
-                byte* input;
-                int   inputSz = idx-RECORD_HEADER_SZ; /* buildmsg adds rechdr */
-
-                input = (byte*)XMALLOC(inputSz, ssl->heap,
-                                       DYNAMIC_TYPE_TMP_BUFFER);
-                if (input == NULL) {
+                args->inputSz = idx - RECORD_HEADER_SZ; /* buildmsg adds rechdr */
+                args->input = (byte*)XMALLOC(args->inputSz, ssl->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (args->input == NULL) {
                     ERROR_OUT(MEMORY_E, exit_scke);
                 }
 
-                XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
-                sendSz = BuildMessage(ssl, output, sendSz, input, inputSz,
-                                      handshake, 1, 0);
-                XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-                if (sendSz < 0) {
-                    ERROR_OUT(sendSz, exit_scke);
-                }
+                XMEMCPY(args->input, args->output + RECORD_HEADER_SZ,
+                                                                args->inputSz);
             }
-            else {
-                #ifdef WOLFSSL_DTLS
-                    if (ssl->options.dtls)
-                        DtlsSEQIncrement(ssl, CUR_ORDER);
-                #endif
-                ret = HashOutput(ssl, output, sendSz, 0);
-                if (ret != 0) {
-                    goto exit_scke;
-                }
-            }
-
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0) {
-                    goto exit_scke;
-                }
-            }
-        #endif
-
-        #ifdef WOLFSSL_CALLBACKS
-            if (ssl->hsInfoOn)
-                AddPacketName("ClientKeyExchange", &ssl->handShakeInfo);
-            if (ssl->toInfoOn)
-                AddPacketInfo("ClientKeyExchange", &ssl->timeoutInfo,
-                              output, sendSz, ssl->heap);
-        #endif
 
             /* Check for error */
             if (ret != 0) {
@@ -16561,7 +17252,50 @@ int SendClientKeyExchange(WOLFSSL* ssl)
 
         case KEYSHARE_END:
         {
-            ssl->buffers.outputBuffer.length += sendSz;
+            if (IsEncryptionOn(ssl, 1)) {
+                ret = BuildMessage(ssl, args->output, args->sendSz,
+                            args->input, args->inputSz, handshake, 1, 0, 1);
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    goto exit_scke;
+            #endif
+                XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                args->input = NULL; /* make sure its not double free'd on cleanup */
+
+                if (ret >= 0) {
+                    args->sendSz = ret;
+                    ret = 0;
+                }
+            }
+            else {
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+                ret = HashOutput(ssl, args->output, args->sendSz, 0);
+            }
+
+            if (ret != 0) {
+                goto exit_scke;
+            }
+
+        #ifdef WOLFSSL_DTLS
+            if (IsDtlsNotSctpMode(ssl)) {
+                if ((ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz)) != 0) {
+                    goto exit_scke;
+                }
+            }
+        #endif
+
+        #ifdef WOLFSSL_CALLBACKS
+            if (ssl->hsInfoOn)
+                AddPacketName("ClientKeyExchange", &ssl->handShakeInfo);
+            if (ssl->toInfoOn)
+                AddPacketInfo("ClientKeyExchange", &ssl->timeoutInfo,
+                              args->output, args->sendSz, ssl->heap);
+        #endif
+
+            ssl->buffers.outputBuffer.length += args->sendSz;
 
             if (!ssl->options.groupMessages) {
                 ret = SendBuffered(ssl);
@@ -16583,39 +17317,18 @@ exit_scke:
 
     WOLFSSL_LEAVE("SendClientKeyExchange", ret);
 
-    /* Handle cleanup for stack variables here */
-
-
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* Handle WC_PENDING_E */
-    if (ret == WC_PENDING_E) {
-        /* Store variables needed for async */
-        length_lcl = ssl->async.length;
-        XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-        ssl->async.output = output;
-        ssl->async.sendSz = sendSz;
-        ssl->async.data = encSecret;
-        ssl->async.sigSz = encSz;
-        ssl->async.length = length_lcl;
-
-        /* Push event to queue */
-        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-        if (ret == 0) {
-            return WC_PENDING_E;
-        }
-    }
+    /* Handle async operation */
+    if (ret == WC_PENDING_E)
+        return ret;
 #endif
 
     /* No further need for PMS */
     ForceZero(ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz);
     ssl->arrays->preMasterSz = 0;
 
-    if (encSecret) {
-        XFREE(encSecret, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        encSecret = NULL;
-    }
-
     /* Final cleanup */
+    FreeSckeArgs(ssl, args);
     FreeKeyExchange(ssl);
 
     return ret;
@@ -16624,47 +17337,58 @@ exit_scke:
 
 #ifndef NO_CERTS
 
+typedef struct ScvArgs {
+    byte*  output; /* not allocated */
+#ifndef NO_RSA
+    byte*  verifySig;
+#endif
+    byte*  verify; /* not allocated */
+    byte*  input;
+    word32 idx;
+    word32 extraSz;
+    word32 sigSz;
+    int    sendSz;
+    int    length;
+    int    inputSz;
+} ScvArgs;
+
+static void FreeScvArgs(WOLFSSL* ssl, void* pArgs)
+{
+    ScvArgs* args = (ScvArgs*)pArgs;
+
+    (void)ssl;
+
+#ifndef NO_RSA
+    if (args->verifySig) {
+        XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->verifySig = NULL;
+    }
+#endif
+    if (args->input) {
+        XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        args->input = NULL;
+    }
+}
+
 int SendCertificateVerify(WOLFSSL* ssl)
 {
-    byte*  output = NULL;
-    int    sendSz = 0, length = 0, ret;
-    byte*  verify = NULL;
-    word32 idx = 0;
-    word32 extraSz = 0;
-#ifndef NO_RSA
-    byte*  verifySig = NULL;
+    int ret = 0;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ScvArgs* args = (ScvArgs*)ssl->async.args;
+    typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+    (void)sizeof(args_test);
+#else
+    ScvArgs  args[1];
 #endif
 
     WOLFSSL_ENTER("SendCertificateVerify");
 
 #ifdef WOLFSSL_ASYNC_CRYPT
-    ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+    ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
     if (ret != WC_NOT_PENDING_E) {
-        WOLF_EVENT_TYPE eType = ssl->event.type;
-
-        /* Clear event */
-        XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
         /* Check for error */
-        if (ret < 0) {
+        if (ret < 0)
             goto exit_scv;
-        }
-        else  {
-            /* Restore variables needed for async */
-            output = ssl->async.output;
-            sendSz = ssl->async.sendSz;
-            extraSz = ssl->async.sigSz;
-            length = ssl->async.length;
-            idx = ssl->async.idx;
-        #ifndef NO_RSA
-            verifySig = ssl->async.data;
-        #endif
-
-            /* Advance key share state if not wolfCrypt */
-            if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                ssl->options.keyShareState++;
-            }
-        }
     }
     else
 #endif
@@ -16672,6 +17396,10 @@ int SendCertificateVerify(WOLFSSL* ssl)
         /* Reset state */
         ret = 0;
         ssl->options.keyShareState = KEYSHARE_BEGIN;
+        XMEMSET(args, 0, sizeof(ScvArgs));
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ssl->async.freeArgs = FreeScvArgs;
+    #endif
     }
 
     switch(ssl->options.keyShareState)
@@ -16682,19 +17410,19 @@ int SendCertificateVerify(WOLFSSL* ssl)
                 return 0;  /* sent blank cert, can't verify */
             }
 
-            sendSz = MAX_CERT_VERIFY_SZ;
+            args->sendSz = MAX_CERT_VERIFY_SZ;
             if (IsEncryptionOn(ssl, 1)) {
-                sendSz += MAX_MSG_EXTRA;
+                args->sendSz += MAX_MSG_EXTRA;
             }
 
             /* check for available size */
-            if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+            if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                 goto exit_scv;
             }
 
             /* get output buffer */
-            output = ssl->buffers.outputBuffer.buffer +
-                     ssl->buffers.outputBuffer.length;
+            args->output = ssl->buffers.outputBuffer.buffer +
+                           ssl->buffers.outputBuffer.length;
 
             /* Advance state and proceed */
             ssl->options.keyShareState = KEYSHARE_BUILD;
@@ -16710,30 +17438,30 @@ int SendCertificateVerify(WOLFSSL* ssl)
                 goto exit_scv;
             }
 
-        #ifndef NO_RSA
-            ssl->sigKey = (RsaKey*)XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                            DYNAMIC_TYPE_RSA);
-            if (ssl->sigKey == NULL) {
-                ERROR_OUT(MEMORY_E, exit_scv);
+            /* make sure private key exists */
+            if (ssl->buffers.key == NULL || ssl->buffers.key->buffer == NULL) {
+                WOLFSSL_MSG("Private key missing!");
+                ERROR_OUT(NO_PRIVATE_KEY, exit_scv);
             }
-            ssl->sigType = DYNAMIC_TYPE_RSA;
 
-            ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey, ssl->heap, ssl->devId);
+        #ifndef NO_RSA
+            ssl->hsType = DYNAMIC_TYPE_RSA;
+            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
             if (ret != 0) {
                 goto exit_scv;
             }
 
             WOLFSSL_MSG("Trying RSA client cert");
 
-            ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                        (RsaKey*)ssl->sigKey, ssl->buffers.key->length);
+            ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &args->idx,
+                                (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
             if (ret == 0) {
-                keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                 if (keySz < 0) { /* check if keySz has error case */
                     ERROR_OUT(keySz, exit_scv);
                 }
 
-                length = (word32)keySz;
+                args->length = (word32)keySz;
                 if (keySz < ssl->options.minRsaKeySz) {
                     WOLFSSL_MSG("RSA key size too small");
                     ERROR_OUT(RSA_KEY_SIZE_E, exit_scv);
@@ -16743,41 +17471,31 @@ int SendCertificateVerify(WOLFSSL* ssl)
         #endif /* !NO_RSA */
             {
         #ifdef HAVE_ECC
-                if (ssl->sigKey) {
-                    XFREE(ssl->sigKey, ssl->heap, DYNAMIC_TYPE_RSA);
-                }
-                ssl->sigKey = (ecc_key*)XMALLOC(sizeof(ecc_key), ssl->heap,
-                                                            DYNAMIC_TYPE_ECC);
-                if (ssl->sigKey == NULL) {
-                    ERROR_OUT(MEMORY_E, exit_scv);
-                }
-                ssl->sigType = DYNAMIC_TYPE_ECC;
+            #ifndef NO_RSA
+                FreeKey(ssl, ssl->hsType, (void**)&ssl->hsKey);
+            #endif /* !NO_RSA */
 
-                ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap, ssl->devId);
+                ssl->hsType = DYNAMIC_TYPE_ECC;
+                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                 if (ret != 0) {
                     goto exit_scv;
                 }
 
                 WOLFSSL_MSG("Trying ECC client cert, RSA didn't work");
 
-                if (ssl->buffers.key == NULL) {
-                    WOLFSSL_MSG("ECC Key missing");
-                    ERROR_OUT(NO_PRIVATE_KEY, exit_scv);
-                }
-
-                idx = 0;
-                ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer, &idx,
-                            (ecc_key*)ssl->sigKey, ssl->buffers.key->length);
+                args->idx = 0;
+                ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer,
+                    &args->idx, (ecc_key*)ssl->hsKey, ssl->buffers.key->length);
                 if (ret != 0) {
                     WOLFSSL_MSG("Bad client cert type");
                     goto exit_scv;
                 }
 
                 WOLFSSL_MSG("Using ECC client cert");
-                length = MAX_ENCODED_SIG_SZ;
+                args->length = MAX_ENCODED_SIG_SZ;
 
                 /* check minimum size of ECC key */
-                keySz = wc_ecc_size((ecc_key*)ssl->sigKey);
+                keySz = wc_ecc_size((ecc_key*)ssl->hsKey);
                 if (keySz < ssl->options.minEccKeySz) {
                     WOLFSSL_MSG("ECC key size too small");
                     ERROR_OUT(ECC_KEY_SIZE_E, exit_scv);
@@ -16786,9 +17504,9 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
 
             /* idx is used to track verify pointer offset to output */
-            idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-            verify = &output[RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ];
-            extraSz = 0;  /* tls 1.2 hash/sig */
+            args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+            args->verify = &args->output[RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ];
+            args->extraSz = 0;  /* tls 1.2 hash/sig */
 
             /* build encoded signature buffer */
             ssl->buffers.sig.length = MAX_ENCODED_SIG_SZ;
@@ -16800,8 +17518,8 @@ int SendCertificateVerify(WOLFSSL* ssl)
 
         #ifdef WOLFSSL_DTLS
             if (ssl->options.dtls) {
-                idx += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                verify += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                args->idx += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                args->verify += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
             }
         #endif
 
@@ -16822,37 +17540,41 @@ int SendCertificateVerify(WOLFSSL* ssl)
     #endif /* !NO_OLD_TLS */
 
             if (IsAtLeastTLSv1_2(ssl)) {
-                verify[0] = ssl->suites->hashAlgo;
-                verify[1] = (ssl->sigType == DYNAMIC_TYPE_ECC) ?
+                args->verify[0] = ssl->suites->hashAlgo;
+                args->verify[1] = (ssl->hsType == DYNAMIC_TYPE_ECC) ?
                                                 ecc_dsa_sa_algo : rsa_sa_algo;
-                extraSz = HASH_SIG_SIZE;
+                args->extraSz = HASH_SIG_SIZE;
 
                 switch (ssl->suites->hashAlgo) {
                 #ifndef NO_SHA
                     case sha_mac:
                         ssl->buffers.digest.length = SHA_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha;
                         typeH    = SHAh;
                         break;
                 #endif /* NO_SHA */
                 #ifndef NO_SHA256
                     case sha256_mac:
                         ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha256;
                         typeH    = SHA256h;
                         break;
                 #endif /* !NO_SHA256 */
                 #ifdef WOLFSSL_SHA384
                     case sha384_mac:
                         ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha384;
                         typeH    = SHA384h;
                         break;
                 #endif /* WOLFSSL_SHA384 */
                 #ifdef WOLFSSL_SHA512
                     case sha512_mac:
                         ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
-                        ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
+                        ssl->buffers.digest.buffer =
+                            ssl->hsHashes->certHashes.sha512;
                         typeH    = SHA512h;
                         break;
                 #endif /* WOLFSSL_SHA512 */
@@ -16871,9 +17593,9 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
 
         #ifndef NO_RSA
-            if (ssl->sigType == DYNAMIC_TYPE_RSA) {
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
                 ssl->buffers.sig.length = FINISHED_SZ;
-                ssl->sigLen = ENCRYPT_LEN;
+                args->sigSz = ENCRYPT_LEN;
 
                 if (IsAtLeastTLSv1_2(ssl)) {
                     ssl->buffers.sig.length = wc_EncodeSignature(
@@ -16881,7 +17603,8 @@ int SendCertificateVerify(WOLFSSL* ssl)
                             ssl->buffers.digest.length, typeH);
                 }
 
-                c16toa((word16)length, verify + extraSz); /* prepend hdr */
+                /* prepend hdr */
+                c16toa((word16)args->length, args->verify + args->extraSz);
             }
         #endif /* !NO_RSA */
 
@@ -16892,11 +17615,13 @@ int SendCertificateVerify(WOLFSSL* ssl)
         case KEYSHARE_DO:
         {
         #ifdef HAVE_ECC
-           if (ssl->sigType == DYNAMIC_TYPE_ECC) {
+           if (ssl->hsType == DYNAMIC_TYPE_ECC) {
+                ecc_key* key = (ecc_key*)ssl->hsKey;
+
                 ret = EccSign(ssl,
                     ssl->buffers.digest.buffer, ssl->buffers.digest.length,
                     ssl->buffers.sig.buffer, &ssl->buffers.sig.length,
-                    (ecc_key*)ssl->sigKey,
+                    key,
             #if defined(HAVE_PK_CALLBACKS)
                     ssl->buffers.key->buffer,
                     ssl->buffers.key->length,
@@ -16908,14 +17633,16 @@ int SendCertificateVerify(WOLFSSL* ssl)
             }
         #endif /* HAVE_ECC */
         #ifndef NO_RSA
-            if (ssl->sigType == DYNAMIC_TYPE_RSA) {
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
+                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                 /* restore verify pointer */
-                verify = &output[idx];
+                args->verify = &args->output[args->idx];
 
                 ret = RsaSign(ssl,
                     ssl->buffers.sig.buffer, ssl->buffers.sig.length,
-                    verify + extraSz + VERIFY_HEADER, &ssl->sigLen,
-                    (RsaKey*)ssl->sigKey,
+                    args->verify + args->extraSz + VERIFY_HEADER, &args->sigSz,
+                    key,
                     ssl->buffers.key->buffer,
                     ssl->buffers.key->length,
                 #ifdef HAVE_PK_CALLBACKS
@@ -16939,33 +17666,38 @@ int SendCertificateVerify(WOLFSSL* ssl)
         case KEYSHARE_VERIFY:
         {
             /* restore verify pointer */
-            verify = &output[idx];
+            args->verify = &args->output[args->idx];
 
         #ifdef HAVE_ECC
-            if (ssl->sigType == DYNAMIC_TYPE_ECC) {
-                length = ssl->buffers.sig.length;
-                c16toa((word16)ssl->buffers.sig.length, verify + extraSz); /* prepend hdr */
-                XMEMCPY(verify + extraSz + VERIFY_HEADER,
+            if (ssl->hsType == DYNAMIC_TYPE_ECC) {
+                args->length = ssl->buffers.sig.length;
+                /* prepend hdr */
+                c16toa((word16)ssl->buffers.sig.length, args->verify +
+                                                                args->extraSz);
+                XMEMCPY(args->verify + args->extraSz + VERIFY_HEADER,
                         ssl->buffers.sig.buffer, ssl->buffers.sig.length);
             }
         #endif /* HAVE_ECC */
         #ifndef NO_RSA
-            if (ssl->sigType == DYNAMIC_TYPE_RSA) {
-                if (verifySig == NULL) {
-                    verifySig = (byte*)XMALLOC(ssl->sigLen, ssl->heap,
+            if (ssl->hsType == DYNAMIC_TYPE_RSA) {
+                RsaKey* key = (RsaKey*)ssl->hsKey;
+
+                if (args->verifySig == NULL) {
+                    args->verifySig = (byte*)XMALLOC(args->sigSz, ssl->heap,
                                       DYNAMIC_TYPE_TMP_BUFFER);
-                    if (verifySig == NULL) {
+                    if (args->verifySig == NULL) {
                         ERROR_OUT(MEMORY_E, exit_scv);
                     }
-                    XMEMCPY(verifySig, verify + extraSz + VERIFY_HEADER,
-                                                                ssl->sigLen);
+                    XMEMCPY(args->verifySig, args->verify + args->extraSz +
+                                                    VERIFY_HEADER, args->sigSz);
                 }
 
                 /* check for signature faults */
                 ret = VerifyRsaSign(ssl,
-                    verifySig, ssl->sigLen,
+                    args->verifySig, args->sigSz,
                     ssl->buffers.sig.buffer, ssl->buffers.sig.length,
-                    (RsaKey*)ssl->sigKey);
+                    key
+                );
             }
         #endif /* !NO_RSA */
 
@@ -16980,51 +17712,33 @@ int SendCertificateVerify(WOLFSSL* ssl)
 
         case KEYSHARE_FINALIZE:
         {
-            AddHeaders(output, length + extraSz + VERIFY_HEADER,
-                                                   certificate_verify, ssl);
+            if (args->output == NULL) {
+                ERROR_OUT(BUFFER_ERROR, exit_scv);
+            }
+            AddHeaders(args->output, args->length + args->extraSz +
+                                        VERIFY_HEADER, certificate_verify, ssl);
 
-            sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ + length +
-                                                     extraSz + VERIFY_HEADER;
+            args->sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ +
+                                    args->length + args->extraSz + VERIFY_HEADER;
 
         #ifdef WOLFSSL_DTLS
             if (ssl->options.dtls) {
-                sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
             }
         #endif
 
             if (IsEncryptionOn(ssl, 1)) {
-                byte* input;
-                int   inputSz = sendSz - RECORD_HEADER_SZ;
+                args->inputSz = args->sendSz - RECORD_HEADER_SZ;
                                 /* build msg adds rec hdr */
-                input = (byte*)XMALLOC(inputSz, ssl->heap,
-                                       DYNAMIC_TYPE_TMP_BUFFER);
-                if (input == NULL) {
+                args->input = (byte*)XMALLOC(args->inputSz, ssl->heap,
+                                                       DYNAMIC_TYPE_TMP_BUFFER);
+                if (args->input == NULL) {
                     ERROR_OUT(MEMORY_E, exit_scv);
                 }
 
-                XMEMCPY(input, output + RECORD_HEADER_SZ, inputSz);
-                sendSz = BuildMessage(ssl, output,
-                                      MAX_CERT_VERIFY_SZ +MAX_MSG_EXTRA,
-                                      input, inputSz, handshake, 1, 0);
-                XFREE(input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-
-                if (sendSz < 0) {
-                    ret = sendSz;
-                }
+                XMEMCPY(args->input, args->output + RECORD_HEADER_SZ,
+                                                                args->inputSz);
             }
-            else {
-                #ifdef WOLFSSL_DTLS
-                    if (ssl->options.dtls)
-                        DtlsSEQIncrement(ssl, CUR_ORDER);
-                #endif
-                ret = HashOutput(ssl, output, sendSz, 0);
-            }
-
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                ret = DtlsMsgPoolSave(ssl, output, sendSz);
-            }
-        #endif
 
             /* Check for error */
             if (ret != 0) {
@@ -17037,15 +17751,52 @@ int SendCertificateVerify(WOLFSSL* ssl)
 
         case KEYSHARE_END:
         {
+            if (IsEncryptionOn(ssl, 1)) {
+                ret = BuildMessage(ssl, args->output,
+                                      MAX_CERT_VERIFY_SZ + MAX_MSG_EXTRA,
+                                      args->input, args->inputSz, handshake,
+                                      1, 0, 1);
+            #ifdef WOLFSSL_ASYNC_CRYPT
+                if (ret == WC_PENDING_E)
+                    goto exit_scv;
+            #endif
+
+                XFREE(args->input, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                args->input = NULL;  /* make sure its not double free'd on cleanup */
+
+                if (ret >= 0) {
+                    args->sendSz = ret;
+                    ret = 0;
+                }
+            }
+            else {
+            #ifdef WOLFSSL_DTLS
+                if (ssl->options.dtls)
+                    DtlsSEQIncrement(ssl, CUR_ORDER);
+            #endif
+                ret = HashOutput(ssl, args->output, args->sendSz, 0);
+            }
+
+            if (ret != 0) {
+                goto exit_scv;
+            }
+
+        #ifdef WOLFSSL_DTLS
+            if (IsDtlsNotSctpMode(ssl)) {
+                ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz);
+            }
+        #endif
+
+
         #ifdef WOLFSSL_CALLBACKS
             if (ssl->hsInfoOn)
                 AddPacketName("CertificateVerify", &ssl->handShakeInfo);
             if (ssl->toInfoOn)
                 AddPacketInfo("CertificateVerify", &ssl->timeoutInfo,
-                              output, sendSz, ssl->heap);
+                              args->output, args->sendSz, ssl->heap);
         #endif
 
-            ssl->buffers.outputBuffer.length += sendSz;
+            ssl->buffers.outputBuffer.length += args->sendSz;
 
             if (!ssl->options.groupMessages) {
                 ret = SendBuffered(ssl);
@@ -17060,43 +17811,19 @@ exit_scv:
 
     WOLFSSL_LEAVE("SendCertificateVerify", ret);
 
-    /* Handle cleanup for stack variables here */
-
-
 #ifdef WOLFSSL_ASYNC_CRYPT
-    /* Handle WC_PENDING_E */
+    /* Handle async operation */
     if (ret == WC_PENDING_E) {
-        /* Store variables needed for async */
-        XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-        ssl->async.output = output;
-        ssl->async.sendSz = sendSz;
-        ssl->async.sigSz = extraSz;
-        ssl->async.length = length;
-        ssl->async.idx = idx;
-    #ifndef NO_RSA
-        ssl->async.data = verifySig;
-    #endif
-
-        /* Push event to queue */
-        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-        if (ret == 0) {
-            return WC_PENDING_E;
-        }
+        return ret;
     }
-#endif
-
-#ifndef NO_RSA
-    if (verifySig) {
-        XFREE(verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-        verifySig = NULL;
-    }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
     /* Digest is not allocated, so do this to prevent free */
     ssl->buffers.digest.buffer = NULL;
     ssl->buffers.digest.length = 0;
 
     /* Final cleanup */
+    FreeScvArgs(ssl, args);
     FreeKeyExchange(ssl);
 
     return ret;
@@ -17436,67 +18163,79 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
 #endif /* HAVE_ECC */
 
+    typedef struct SskeArgs {
+        byte*  output; /* not allocated */
+    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
+        byte*  sigDataBuf;
+    #endif
+    #if defined(HAVE_ECC)
+        byte*  exportBuf;
+    #endif
+    #ifndef NO_RSA
+        byte*  verifySig;
+    #endif
+        word32 idx;
+        word32 tmpSigSz;
+        word32 length;
+        word32 sigSz;
+    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
+        word32 sigDataSz;
+    #endif
+    #if defined(HAVE_ECC)
+        word32 exportSz;
+    #endif
+    #ifdef HAVE_QSH
+        word32 qshSz;
+    #endif
+        int    sendSz;
+    } SskeArgs;
+
+    static void FreeSskeArgs(WOLFSSL* ssl, void* pArgs)
+    {
+        SskeArgs* args = (SskeArgs*)pArgs;
+
+        (void)ssl;
+
+    #if defined(HAVE_ECC)
+        if (args->exportBuf) {
+            XFREE(args->exportBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            args->exportBuf = NULL;
+        }
+    #endif
+    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
+        if (args->sigDataBuf) {
+            XFREE(args->sigDataBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            args->sigDataBuf = NULL;
+        }
+    #endif
+    #ifndef NO_RSA
+        if (args->verifySig) {
+            XFREE(args->verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+            args->verifySig = NULL;
+        }
+    #endif
+        (void)args;
+    }
 
     int SendServerKeyExchange(WOLFSSL* ssl)
     {
         int ret;
-        int sendSz = 0;
-        byte *output = NULL;
-        word32 idx = 0, sigSz = 0, length = 0;
-    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
-        byte *sigDataBuf = NULL;
-        word32 sigDataSz = 0;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        SskeArgs* args = (SskeArgs*)ssl->async.args;
+        typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+        (void)sizeof(args_test);
+    #else
+        SskeArgs  args[1];
     #endif
-    #if defined(HAVE_ECC)
-        byte *exportBuf = NULL;
-        word32 exportSz = 0;
-    #endif
-
-    #ifdef HAVE_QSH
-        word32 qshSz = 0;
-        if (ssl->peerQSHKeyPresent) {
-            qshSz = QSH_KeyGetSize(ssl);
-        }
-    #endif
-    #ifndef NO_RSA
-        byte* verifySig = NULL;
-    #endif
-
-        (void)ssl;
-        (void)sigSz;
-        (void)length;
-        (void)idx;
 
         WOLFSSL_ENTER("SendServerKeyExchange");
 
     #ifdef WOLFSSL_ASYNC_CRYPT
-        ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
         if (ret != WC_NOT_PENDING_E) {
-            WOLF_EVENT_TYPE eType = ssl->event.type;
-
-            /* Clear event */
-            XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
             /* Check for error */
-            if (ret < 0) {
+            if (ret < 0)
                 goto exit_sske;
-            }
-            else  {
-                /* Restore variables needed for async */
-                output = ssl->async.output;
-                sendSz = ssl->async.sendSz;
-                idx = ssl->async.idx;
-                sigSz = ssl->async.sigSz;
-                length = ssl->async.length;
-            #ifndef NO_RSA
-                verifySig = ssl->async.data;
-            #endif
-
-                /* Advance key share state if not wolfCrypt */
-                if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                    ssl->options.keyShareState++;
-                }
-            }
         }
         else
     #endif
@@ -17504,12 +18243,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             /* Reset state */
             ret = 0;
             ssl->options.keyShareState = KEYSHARE_BEGIN;
+            XMEMSET(args, 0, sizeof(SskeArgs));
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ssl->async.freeArgs = FreeSskeArgs;
+        #endif
         }
 
         switch(ssl->options.keyShareState)
         {
             case KEYSHARE_BEGIN:
             {
+            #ifdef HAVE_QSH
+                if (ssl->peerQSHKeyPresent) {
+                    args->qshSz = QSH_KeyGetSize(ssl);
+                }
+            #endif
+
                 /* Do some checks / debug msgs */
                 switch(ssl->specs.kea)
                 {
@@ -17528,7 +18277,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             ERROR_OUT(0, exit_sske);
                         }
 
-                        if (!ssl->buffers.key->buffer) {
+                        /* make sure private key exists */
+                        if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
                             ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
                         }
 
@@ -17565,8 +18316,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->buffers.serverDH_Pub.buffer == NULL) {
                             /* Free'd in SSL_ResourceFree and FreeHandshakeResources */
                             ssl->buffers.serverDH_Pub.buffer = (byte*)XMALLOC(
-                                    ssl->buffers.serverDH_P.length + 2, ssl->heap,
-                                    DYNAMIC_TYPE_DH);
+                                    ssl->buffers.serverDH_P.length + OPAQUE16_LEN,
+                                    ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                             if (ssl->buffers.serverDH_Pub.buffer == NULL) {
                                 ERROR_OUT(MEMORY_E, exit_sske);
                             }
@@ -17575,8 +18326,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->buffers.serverDH_Priv.buffer == NULL) {
                             /* Free'd in SSL_ResourceFree and FreeHandshakeResources */
                             ssl->buffers.serverDH_Priv.buffer = (byte*)XMALLOC(
-                                    ssl->buffers.serverDH_P.length + 2, ssl->heap,
-                                    DYNAMIC_TYPE_DH);
+                                    ssl->buffers.serverDH_P.length + OPAQUE16_LEN,
+                                    ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
                             if (ssl->buffers.serverDH_Priv.buffer == NULL) {
                                 ERROR_OUT(MEMORY_E, exit_sske);
                             }
@@ -17585,18 +18336,29 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         ssl->options.dhKeySz =
                                 (word16)ssl->buffers.serverDH_P.length;
 
-                        ret = DhGenKeyPair(ssl,
+                        ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                        if (ret != 0) {
+                            goto exit_sske;
+                        }
+
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_P.buffer,
                             ssl->buffers.serverDH_P.length,
                             ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length,
+                            ssl->buffers.serverDH_G.length);
+                        if (ret != 0) {
+                            goto exit_sske;
+                        }
+
+                        ret = DhGenKeyPair(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
                             &ssl->buffers.serverDH_Priv.length,
                             ssl->buffers.serverDH_Pub.buffer,
                             &ssl->buffers.serverDH_Pub.length);
                         break;
                     }
-                #endif /* !defined(NO_DH) && (!defined(NO_PSK) || !defined(NO_RSA)) */
+                #endif /* !NO_DH && (!NO_PSK || !NO_RSA) */
                 #if defined(HAVE_ECC) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                         /* Fall through to create temp ECC key */
@@ -17607,19 +18369,16 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         /* need ephemeral key now, create it if missing */
                         if (ssl->eccTempKey == NULL) {
                             /* alloc/init on demand */
-                            ssl->eccTempKey = (ecc_key*)XMALLOC(sizeof(ecc_key),
-                                                         ssl->heap, DYNAMIC_TYPE_ECC);
-                            if (ssl->eccTempKey == NULL) {
-                                WOLFSSL_MSG("EccTempKey Memory error");
-                                ERROR_OUT(MEMORY_E, exit_sske);
-                            }
-                            ret = wc_ecc_init_ex(ssl->eccTempKey, ssl->heap, ssl->devId);
-                            if (ret != 0)
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                (void**)&ssl->eccTempKey);
+                            if (ret != 0) {
                                 goto exit_sske;
+                            }
                         }
 
                         if (ssl->eccTempKeyPresent == 0) {
-                            /* TODO: Need to first do wc_EccPrivateKeyDecode, then we know curve dp */
+                            /* TODO: Need to first do wc_EccPrivateKeyDecode,
+                                then we know curve dp */
                             ret = EccMakeKey(ssl, ssl->eccTempKey, NULL);
                             if (ret == 0 || ret == WC_PENDING_E) {
                                 ssl->eccTempKeyPresent = 1;
@@ -17653,52 +18412,58 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_PSK
                     case psk_kea:
                     {
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
 
                         if (ssl->arrays->server_hint[0] == 0) {
                             ERROR_OUT(0, exit_sske); /* don't send */
                         }
 
                         /* include size part */
-                        length = (word32)XSTRLEN(ssl->arrays->server_hint);
-                        if (length > MAX_PSK_ID_LEN) {
+                        args->length = (word32)XSTRLEN(ssl->arrays->server_hint);
+                        if (args->length > MAX_PSK_ID_LEN) {
                             ERROR_OUT(SERVER_HINT_ERROR, exit_sske);
                         }
 
-                        length += HINT_LEN_SZ;
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->length += HINT_LEN_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ +
+                                                            RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
 
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
 
                         /* key data */
                     #ifdef HAVE_QSH
-                        c16toa((word16)(length - qshSz - HINT_LEN_SZ), output + idx);
+                        c16toa((word16)(args->length - args->qshSz -
+                                        HINT_LEN_SZ), args->output + args->idx);
                     #else
-                        c16toa((word16)(length - HINT_LEN_SZ), output + idx);
+                        c16toa((word16)(args->length - HINT_LEN_SZ),
+                                                      args->output + args->idx);
                     #endif
 
-                        idx += HINT_LEN_SZ;
-                        XMEMCPY(output + idx, ssl->arrays->server_hint, length - HINT_LEN_SZ);
+                        args->idx += HINT_LEN_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->arrays->server_hint,
+                                args->length - HINT_LEN_SZ);
                         break;
                     }
                 #endif /* !NO_PSK */
@@ -17707,8 +18472,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         word32 hintLen;
 
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = LENGTH_SZ * 3 + /* p, g, pub */
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = LENGTH_SZ * 3 + /* p, g, pub */
                                  ssl->buffers.serverDH_P.length +
                                  ssl->buffers.serverDH_G.length +
                                  ssl->buffers.serverDH_Pub.length;
@@ -17718,58 +18483,67 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (hintLen > MAX_PSK_ID_LEN) {
                             ERROR_OUT(SERVER_HINT_ERROR, exit_sske);
                         }
-                        length += hintLen + HINT_LEN_SZ;
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->length += hintLen + HINT_LEN_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ +
+                                                            RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
 
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
 
                         /* key data */
-                        c16toa((word16)hintLen, output + idx);
-                        idx += HINT_LEN_SZ;
-                        XMEMCPY(output + idx, ssl->arrays->server_hint, hintLen);
-                        idx += hintLen;
+                        c16toa((word16)hintLen, args->output + args->idx);
+                        args->idx += HINT_LEN_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                            ssl->arrays->server_hint, hintLen);
+                        args->idx += hintLen;
 
                         /* add p, g, pub */
-                        c16toa((word16)ssl->buffers.serverDH_P.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_P.buffer,
-                                              ssl->buffers.serverDH_P.length);
-                        idx += ssl->buffers.serverDH_P.length;
+                        c16toa((word16)ssl->buffers.serverDH_P.length,
+                            args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->buffers.serverDH_P.buffer,
+                                ssl->buffers.serverDH_P.length);
+                        args->idx += ssl->buffers.serverDH_P.length;
 
                         /*  g */
-                        c16toa((word16)ssl->buffers.serverDH_G.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_G.buffer,
-                                              ssl->buffers.serverDH_G.length);
-                        idx += ssl->buffers.serverDH_G.length;
+                        c16toa((word16)ssl->buffers.serverDH_G.length,
+                            args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->buffers.serverDH_G.buffer,
+                                ssl->buffers.serverDH_G.length);
+                        args->idx += ssl->buffers.serverDH_G.length;
 
                         /*  pub */
-                        c16toa((word16)ssl->buffers.serverDH_Pub.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_Pub.buffer,
-                                              ssl->buffers.serverDH_Pub.length);
+                        c16toa((word16)ssl->buffers.serverDH_Pub.length,
+                            args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                ssl->buffers.serverDH_Pub.buffer,
+                                ssl->buffers.serverDH_Pub.length);
                         /* No need to update idx, since sizes are already set */
-                        /* idx += ssl->buffers.serverDH_Pub.length; */
+                        /* args->idx += ssl->buffers.serverDH_Pub.length; */
                         break;
                     }
                 #endif /* !defined(NO_DH) && !defined(NO_PSK) */
@@ -17779,59 +18553,62 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word32 hintLen;
 
                         /* curve type, named curve, length(1) */
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
 
-                        exportSz = MAX_EXPORT_ECC_SZ;
-                        exportBuf = (byte*)XMALLOC(exportSz, ssl->heap,
-                                                      DYNAMIC_TYPE_TMP_BUFFER);
-                        if (exportBuf == NULL) {
+                        args->exportSz = MAX_EXPORT_ECC_SZ;
+                        args->exportBuf = (byte*)XMALLOC(args->exportSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->exportBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        if (wc_ecc_export_x963(ssl->eccTempKey, exportBuf, &exportSz) != 0) {
+                        if (wc_ecc_export_x963(ssl->eccTempKey, args->exportBuf,
+                                                      &args->exportSz) != 0) {
                             ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
                         }
-                        length += exportSz;
+                        args->length += args->exportSz;
 
                         /* include size part */
                         hintLen = (word32)XSTRLEN(ssl->arrays->server_hint);
                         if (hintLen > MAX_PSK_ID_LEN) {
                             ERROR_OUT(SERVER_HINT_ERROR, exit_sske);
                         }
-                        length += hintLen + HINT_LEN_SZ;
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->length += hintLen + HINT_LEN_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
                         }
                     #endif
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get output buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
                         /* key data */
-                        c16toa((word16)hintLen, output + idx);
-                        idx += HINT_LEN_SZ;
-                        XMEMCPY(output + idx, ssl->arrays->server_hint, hintLen);
-                        idx += hintLen;
+                        c16toa((word16)hintLen, args->output + args->idx);
+                        args->idx += HINT_LEN_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                            ssl->arrays->server_hint, hintLen);
+                        args->idx += hintLen;
 
                         /* ECC key exchange data */
-                        output[idx++] = named_curve;
-                        output[idx++] = 0x00;          /* leading zero */
-                        output[idx++] = SetCurveId(ssl->eccTempKey);
-                        output[idx++] = (byte)exportSz;
-                        XMEMCPY(output + idx, exportBuf, exportSz);
+                        args->output[args->idx++] = named_curve;
+                        args->output[args->idx++] = 0x00;          /* leading zero */
+                        args->output[args->idx++] = SetCurveId(ssl->eccTempKey);
+                        args->output[args->idx++] = (byte)args->exportSz;
+                        XMEMCPY(args->output + args->idx, args->exportBuf,
+                                                                args->exportSz);
                         break;
                     }
                 #endif /* HAVE_ECC && !NO_PSK */
@@ -17841,23 +18618,24 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         enum wc_HashType hashType = WC_HASH_TYPE_NONE;
 
                         /* curve type, named curve, length(1) */
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = ENUM_LEN + CURVE_LEN + ENUM_LEN;
 
                         /* Export temp ECC key and add to length */
-                        exportSz = MAX_EXPORT_ECC_SZ;
-                        exportBuf = (byte*)XMALLOC(exportSz, ssl->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                        if (exportBuf == NULL) {
+                        args->exportSz = MAX_EXPORT_ECC_SZ;
+                        args->exportBuf = (byte*)XMALLOC(args->exportSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->exportBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        if (wc_ecc_export_x963(ssl->eccTempKey, exportBuf, &exportSz) != 0) {
+                        if (wc_ecc_export_x963(ssl->eccTempKey, args->exportBuf,
+                                                        &args->exportSz) != 0) {
                             ERROR_OUT(ECC_EXPORT_ERROR, exit_sske);
                         }
-                        length += exportSz;
+                        args->length += args->exportSz;
 
-                        preSigSz  = length;
-                        preSigIdx = idx;
+                        preSigSz  = args->length;
+                        preSigIdx = args->idx;
 
                         switch(ssl->specs.sig_algo)
                         {
@@ -17867,31 +18645,26 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 word32 i = 0;
                                 int    keySz;
 
-                                ssl->sigKey = XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                              DYNAMIC_TYPE_RSA);
-                                if (ssl->sigKey == NULL) {
-                                    ERROR_OUT(MEMORY_E, exit_sske);
-                                }
-                                ssl->sigType = DYNAMIC_TYPE_RSA;
-
-                                ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey,
-                                                     ssl->heap, ssl->devId);
+                                ssl->hsType = DYNAMIC_TYPE_RSA;
+                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
 
-                                ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer,
-                                                      &i, (RsaKey*)ssl->sigKey,
-                                                      ssl->buffers.key->length);
+                                ret = wc_RsaPrivateKeyDecode(
+                                    ssl->buffers.key->buffer,
+                                    &i,
+                                    (RsaKey*)ssl->hsKey,
+                                    ssl->buffers.key->length);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
-                                keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                                keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                                 if (keySz < 0) { /* test if keySz has error */
                                     ERROR_OUT(keySz, exit_sske);
                                 }
 
-                                sigSz = (word32)keySz;
+                                args->tmpSigSz = (word32)keySz;
                                 if (keySz < ssl->options.minRsaKeySz) {
                                     WOLFSSL_MSG("RSA signature key size too small");
                                     ERROR_OUT(RSA_KEY_SIZE_E, exit_sske);
@@ -17902,27 +18675,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             case ecc_dsa_sa_algo:
                             {
                                 word32 i = 0;
-                                ssl->sigKey = XMALLOC(sizeof(ecc_key),
-                                                   ssl->heap, DYNAMIC_TYPE_ECC);
-                                if (ssl->sigKey == NULL) {
-                                    ERROR_OUT(MEMORY_E, exit_sske);
-                                }
-                                ssl->sigType = DYNAMIC_TYPE_ECC;
 
-                                ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap, ssl->devId);
-                                if (ret != 0)
-                                    goto exit_sske;
-
-                                ret = wc_EccPrivateKeyDecode(ssl->buffers.key->buffer,
-                                                      &i, (ecc_key*)ssl->sigKey,
-                                                      ssl->buffers.key->length);
+                                ssl->hsType = DYNAMIC_TYPE_ECC;
+                                ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                                 if (ret != 0) {
                                     goto exit_sske;
                                 }
-                                sigSz = wc_ecc_sig_size((ecc_key*)ssl->sigKey);  /* worst case estimate */
+
+                                ret = wc_EccPrivateKeyDecode(
+                                    ssl->buffers.key->buffer,
+                                    &i,
+                                    (ecc_key*)ssl->hsKey,
+                                    ssl->buffers.key->length);
+                                if (ret != 0) {
+                                    goto exit_sske;
+                                }
+                                /* worst case estimate */
+                                args->tmpSigSz = wc_ecc_sig_size(
+                                    (ecc_key*)ssl->hsKey);
 
                                 /* check the minimum ECC key size */
-                                if (wc_ecc_size((ecc_key*)ssl->sigKey) <
+                                if (wc_ecc_size((ecc_key*)ssl->hsKey) <
                                         ssl->options.minEccKeySz) {
                                     WOLFSSL_MSG("ECC key size too small");
                                     ret = ECC_KEY_SIZE_E;
@@ -17935,66 +18708,66 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         } /* switch(ssl->specs.sig_algo) */
 
                         /* sig length */
-                        length += LENGTH_SZ;
-                        length += sigSz;
+                        args->length += LENGTH_SZ;
+                        args->length += args->tmpSigSz;
 
                         if (IsAtLeastTLSv1_2(ssl)) {
-                            length += HASH_SIG_SIZE;
+                            args->length += HASH_SIG_SIZE;
                         }
 
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            preSigIdx = idx;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            preSigIdx = args->idx;
                         }
                     #endif
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
                         /* record and message headers will be added below, when we're sure
                            of the sig length */
 
                         /* key exchange data */
-                        output[idx++] = named_curve;
-                        output[idx++] = 0x00;          /* leading zero */
-                        output[idx++] = SetCurveId(ssl->eccTempKey);
-                        output[idx++] = (byte)exportSz;
-                        XMEMCPY(output + idx, exportBuf, exportSz);
-                        idx += exportSz;
+                        args->output[args->idx++] = named_curve;
+                        args->output[args->idx++] = 0x00;          /* leading zero */
+                        args->output[args->idx++] = SetCurveId(ssl->eccTempKey);
+                        args->output[args->idx++] = (byte)args->exportSz;
+                        XMEMCPY(args->output + args->idx, args->exportBuf, args->exportSz);
+                        args->idx += args->exportSz;
 
                         /* Determine hash type */
                         if (IsAtLeastTLSv1_2(ssl)) {
-                            output[idx++] = ssl->suites->hashAlgo;
-                            output[idx++] = ssl->suites->sigAlgo;
+                            args->output[args->idx++] = ssl->suites->hashAlgo;
+                            args->output[args->idx++] = ssl->suites->sigAlgo;
 
                             switch (ssl->suites->hashAlgo) {
                                 case sha512_mac:
-                                    #ifdef WOLFSSL_SHA512
-                                        hashType = WC_HASH_TYPE_SHA512;
-                                    #endif
+                                #ifdef WOLFSSL_SHA512
+                                    hashType = WC_HASH_TYPE_SHA512;
+                                #endif
                                     break;
                                 case sha384_mac:
-                                    #ifdef WOLFSSL_SHA384
-                                        hashType = WC_HASH_TYPE_SHA384;
-                                    #endif
+                                #ifdef WOLFSSL_SHA384
+                                    hashType = WC_HASH_TYPE_SHA384;
+                                #endif
                                     break;
                                 case sha256_mac:
-                                    #ifndef NO_SHA256
-                                        hashType = WC_HASH_TYPE_SHA256;
-                                    #endif
+                                #ifndef NO_SHA256
+                                    hashType = WC_HASH_TYPE_SHA256;
+                                #endif
                                     break;
                                 case sha_mac:
                                     #if !defined(NO_SHA) && \
@@ -18028,37 +18801,42 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                     #ifdef HAVE_FUZZER
                         if (ssl->fuzzerCb) {
-                            ssl->fuzzerCb(ssl, output + preSigIdx, preSigSz,
-                                                           FUZZ_SIGNATURE, ssl->fuzzerCtx);
+                            ssl->fuzzerCb(ssl, args->output + preSigIdx,
+                                preSigSz, FUZZ_SIGNATURE, ssl->fuzzerCtx);
                         }
                     #endif
 
                         /* Assemble buffer to hash for signature */
-                        sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
-                        sigDataBuf = (byte*)XMALLOC(sigDataSz, ssl->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                        if (sigDataBuf == NULL) {
+                        args->sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
+                        args->sigDataBuf = (byte*)XMALLOC(args->sigDataSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->sigDataBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        XMEMCPY(sigDataBuf, ssl->arrays->clientRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN, ssl->arrays->serverRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN+RAN_LEN, output + preSigIdx, preSigSz);
+                        XMEMCPY(args->sigDataBuf, ssl->arrays->clientRandom,
+                                                                       RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN,
+                                            ssl->arrays->serverRandom, RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN+RAN_LEN,
+                                args->output + preSigIdx, preSigSz);
 
                         ssl->buffers.sig.length = wc_HashGetDigestSize(hashType);
-                        ssl->buffers.sig.buffer = (byte*)XMALLOC(ssl->buffers.sig.length,
+                        ssl->buffers.sig.buffer = (byte*)XMALLOC(
+                                            ssl->buffers.sig.length,
                                             ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                         if (ssl->buffers.sig.buffer == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
 
                         /* Perform hash */
-                        ret = wc_Hash(hashType, sigDataBuf, sigDataSz,
+                        ret = wc_Hash(hashType,
+                            args->sigDataBuf, args->sigDataSz,
                             ssl->buffers.sig.buffer, ssl->buffers.sig.length);
                         if (ret != 0) {
                             goto exit_sske;
                         }
 
-                        ssl->sigLen = sigSz;
+                        args->sigSz = args->tmpSigSz;
 
                         /* Sign hash to create signature */
                         switch (ssl->specs.sig_algo)
@@ -18078,19 +18856,19 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                                     switch (ssl->suites->hashAlgo) {
                                         case sha512_mac:
-                                            #ifdef WOLFSSL_SHA512
-                                                typeH    = SHA512h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA512
+                                            typeH    = SHA512h;
+                                        #endif
                                             break;
                                         case sha384_mac:
-                                            #ifdef WOLFSSL_SHA384
-                                                typeH    = SHA384h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA384
+                                            typeH    = SHA384h;
+                                        #endif
                                             break;
                                         case sha256_mac:
-                                            #ifndef NO_SHA256
-                                                typeH    = SHA256h;
-                                            #endif
+                                        #ifndef NO_SHA256
+                                            typeH    = SHA256h;
+                                        #endif
                                             break;
                                         case sha_mac:
                                             #if !defined(NO_SHA) && \
@@ -18103,8 +18881,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                             break;
                                     }
 
-                                    ssl->buffers.sig.length = wc_EncodeSignature(encodedSig,
-                                        ssl->buffers.sig.buffer, ssl->buffers.sig.length, typeH);
+                                    ssl->buffers.sig.length =
+                                        wc_EncodeSignature(encodedSig,
+                                        ssl->buffers.sig.buffer,
+                                        ssl->buffers.sig.length, typeH);
 
                                     /* Replace sig buffer with new one */
                                     XFREE(ssl->buffers.sig.buffer, ssl->heap,
@@ -18113,8 +18893,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 }
 
                                 /* write sig size here */
-                                c16toa((word16)ssl->sigLen, output + idx);
-                                idx += LENGTH_SZ;
+                                c16toa((word16)args->sigSz,
+                                    args->output + args->idx);
+                                args->idx += LENGTH_SZ;
                                 break;
                             }
                         #endif /* !NO_RSA */
@@ -18131,50 +18912,46 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         enum wc_HashType hashType = WC_HASH_TYPE_NONE;
 
-                        idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-                        length = LENGTH_SZ * 3;  /* p, g, pub */
-                        length += ssl->buffers.serverDH_P.length +
-                                  ssl->buffers.serverDH_G.length +
-                                  ssl->buffers.serverDH_Pub.length;
+                        args->idx = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+                        args->length = LENGTH_SZ * 3;  /* p, g, pub */
+                        args->length += ssl->buffers.serverDH_P.length +
+                                        ssl->buffers.serverDH_G.length +
+                                        ssl->buffers.serverDH_Pub.length;
 
-                        preSigIdx = idx;
-                        preSigSz  = length;
+                        preSigIdx = args->idx;
+                        preSigSz  = args->length;
 
                         if (!ssl->options.usingAnon_cipher) {
                             word32   i = 0;
                             int      keySz;
 
-                            ssl->sigKey = (RsaKey*)XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                              DYNAMIC_TYPE_RSA);
-                            if (ssl->sigKey == NULL) {
-                                ERROR_OUT(MEMORY_E, exit_sske);
+                            /* make sure private key exists */
+                            if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
+                                ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
                             }
-                            ssl->sigType = DYNAMIC_TYPE_RSA;
 
-                            ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey,
-                                                        ssl->heap, ssl->devId);
+                            ssl->hsType = DYNAMIC_TYPE_RSA;
+                            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                             if (ret != 0) {
                                 goto exit_sske;
                             }
 
                             /* sig length */
-                            length += LENGTH_SZ;
+                            args->length += LENGTH_SZ;
 
-                            if (!ssl->buffers.key->buffer) {
-                                ERROR_OUT(NO_PRIVATE_KEY, exit_sske);
-                            }
-
-                            ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer, &i,
-                                                         (RsaKey*)ssl->sigKey, ssl->buffers.key->length);
+                            ret = wc_RsaPrivateKeyDecode(
+                                ssl->buffers.key->buffer, &i,
+                                (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
                             if (ret != 0) {
                                 goto exit_sske;
                             }
-                            keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                            keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                             if (keySz < 0) { /* test if keySz has error */
                                 ERROR_OUT(keySz, exit_sske);
                             }
-                            sigSz = (word32)keySz;
-                            length += sigSz;
+                            args->tmpSigSz = (word32)keySz;
+                            args->length += args->tmpSigSz;
 
                             if (keySz < ssl->options.minRsaKeySz) {
                                 WOLFSSL_MSG("RSA key size too small");
@@ -18182,60 +18959,68 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             }
 
                             if (IsAtLeastTLSv1_2(ssl)) {
-                                length += HASH_SIG_SIZE;
+                                args->length += HASH_SIG_SIZE;
                             }
                         }
 
-                        sendSz = length + HANDSHAKE_HEADER_SZ + RECORD_HEADER_SZ;
+                        args->sendSz = args->length + HANDSHAKE_HEADER_SZ +
+                                                            RECORD_HEADER_SZ;
 
                     #ifdef HAVE_QSH
-                        length += qshSz;
-                        sendSz += qshSz;
+                        args->length += args->qshSz;
+                        args->sendSz += args->qshSz;
                     #endif
                     #ifdef WOLFSSL_DTLS
                         if (ssl->options.dtls) {
-                            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-                            preSigIdx = idx;
+                            args->sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            args->idx    += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+                            preSigIdx = args->idx;
                         }
                     #endif
 
                         /* check for available size */
-                        if ((ret = CheckAvailableSize(ssl, sendSz)) != 0) {
+                        if ((ret = CheckAvailableSize(ssl, args->sendSz)) != 0) {
                             goto exit_sske;
                         }
 
                         /* get ouput buffer */
-                        output = ssl->buffers.outputBuffer.buffer +
-                                 ssl->buffers.outputBuffer.length;
+                        args->output = ssl->buffers.outputBuffer.buffer +
+                                       ssl->buffers.outputBuffer.length;
 
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
 
                         /* add p, g, pub */
-                        c16toa((word16)ssl->buffers.serverDH_P.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_P.buffer,
+                        c16toa((word16)ssl->buffers.serverDH_P.length,
+                                                    args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                              ssl->buffers.serverDH_P.buffer,
                                               ssl->buffers.serverDH_P.length);
-                        idx += ssl->buffers.serverDH_P.length;
+                        args->idx += ssl->buffers.serverDH_P.length;
 
                         /*  g */
-                        c16toa((word16)ssl->buffers.serverDH_G.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_G.buffer,
+                        c16toa((word16)ssl->buffers.serverDH_G.length,
+                                                    args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                              ssl->buffers.serverDH_G.buffer,
                                               ssl->buffers.serverDH_G.length);
-                        idx += ssl->buffers.serverDH_G.length;
+                        args->idx += ssl->buffers.serverDH_G.length;
 
                         /*  pub */
-                        c16toa((word16)ssl->buffers.serverDH_Pub.length, output + idx);
-                        idx += LENGTH_SZ;
-                        XMEMCPY(output + idx, ssl->buffers.serverDH_Pub.buffer,
+                        c16toa((word16)ssl->buffers.serverDH_Pub.length,
+                                                    args->output + args->idx);
+                        args->idx += LENGTH_SZ;
+                        XMEMCPY(args->output + args->idx,
+                                              ssl->buffers.serverDH_Pub.buffer,
                                               ssl->buffers.serverDH_Pub.length);
-                        idx += ssl->buffers.serverDH_Pub.length;
+                        args->idx += ssl->buffers.serverDH_Pub.length;
 
                     #ifdef HAVE_FUZZER
                         if (ssl->fuzzerCb) {
-                            ssl->fuzzerCb(ssl, output + preSigIdx, preSigSz,
-                                                           FUZZ_SIGNATURE, ssl->fuzzerCtx);
+                            ssl->fuzzerCb(ssl, args->output + preSigIdx,
+                                preSigSz, FUZZ_SIGNATURE, ssl->fuzzerCtx);
                         }
                     #endif
 
@@ -18245,24 +19030,24 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                         /* Determine hash type */
                         if (IsAtLeastTLSv1_2(ssl)) {
-                            output[idx++] = ssl->suites->hashAlgo;
-                            output[idx++] = ssl->suites->sigAlgo;
+                            args->output[args->idx++] = ssl->suites->hashAlgo;
+                            args->output[args->idx++] = ssl->suites->sigAlgo;
 
                             switch (ssl->suites->hashAlgo) {
                                 case sha512_mac:
-                                    #ifdef WOLFSSL_SHA512
-                                        hashType = WC_HASH_TYPE_SHA512;
-                                    #endif
+                                #ifdef WOLFSSL_SHA512
+                                    hashType = WC_HASH_TYPE_SHA512;
+                                #endif
                                     break;
                                 case sha384_mac:
-                                    #ifdef WOLFSSL_SHA384
-                                        hashType = WC_HASH_TYPE_SHA384;
-                                    #endif
+                                #ifdef WOLFSSL_SHA384
+                                    hashType = WC_HASH_TYPE_SHA384;
+                                #endif
                                     break;
                                 case sha256_mac:
-                                    #ifndef NO_SHA256
-                                        hashType = WC_HASH_TYPE_SHA256;
-                                    #endif
+                                #ifndef NO_SHA256
+                                    hashType = WC_HASH_TYPE_SHA256;
+                                #endif
                                     break;
                                 case sha_mac:
                                     #if !defined(NO_SHA) && \
@@ -18292,19 +19077,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         }
 
                         /* signature size */
-                        c16toa((word16)sigSz, output + idx);
-                        idx += LENGTH_SZ;
+                        c16toa((word16)args->tmpSigSz, args->output + args->idx);
+                        args->idx += LENGTH_SZ;
 
                         /* Assemble buffer to hash for signature */
-                        sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
-                        sigDataBuf = (byte*)XMALLOC(sigDataSz, ssl->heap,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                        if (sigDataBuf == NULL) {
+                        args->sigDataSz = RAN_LEN + RAN_LEN + preSigSz;
+                        args->sigDataBuf = (byte*)XMALLOC(args->sigDataSz,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                        if (args->sigDataBuf == NULL) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
-                        XMEMCPY(sigDataBuf, ssl->arrays->clientRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN, ssl->arrays->serverRandom, RAN_LEN);
-                        XMEMCPY(sigDataBuf+RAN_LEN+RAN_LEN, output + preSigIdx, preSigSz);
+                        XMEMCPY(args->sigDataBuf, ssl->arrays->clientRandom,
+                                                                    RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN,
+                                        ssl->arrays->serverRandom, RAN_LEN);
+                        XMEMCPY(args->sigDataBuf+RAN_LEN+RAN_LEN,
+                            args->output + preSigIdx, preSigSz);
 
                         ssl->buffers.sig.length = wc_HashGetDigestSize(hashType);
                         ssl->buffers.sig.buffer = (byte*)XMALLOC(
@@ -18315,13 +19103,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         }
 
                         /* Perform hash */
-                        ret = wc_Hash(hashType, sigDataBuf, sigDataSz,
+                        ret = wc_Hash(hashType,
+                            args->sigDataBuf, args->sigDataSz,
                             ssl->buffers.sig.buffer, ssl->buffers.sig.length);
                         if (ret != 0) {
                             goto exit_sske;
                         }
 
-                        ssl->sigLen = sigSz;
+                        args->sigSz = args->tmpSigSz;
 
                         /* Sign hash to create signature */
                         switch (ssl->suites->sigAlgo)
@@ -18341,19 +19130,19 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                                     switch (ssl->suites->hashAlgo) {
                                         case sha512_mac:
-                                            #ifdef WOLFSSL_SHA512
-                                                typeH    = SHA512h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA512
+                                            typeH    = SHA512h;
+                                        #endif
                                             break;
                                         case sha384_mac:
-                                            #ifdef WOLFSSL_SHA384
-                                                typeH    = SHA384h;
-                                            #endif
+                                        #ifdef WOLFSSL_SHA384
+                                            typeH    = SHA384h;
+                                        #endif
                                             break;
                                         case sha256_mac:
-                                            #ifndef NO_SHA256
-                                                typeH    = SHA256h;
-                                            #endif
+                                        #ifndef NO_SHA256
+                                            typeH    = SHA256h;
+                                        #endif
                                             break;
                                         case sha_mac:
                                             #if !defined(NO_SHA) && \
@@ -18366,8 +19155,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                             break;
                                     }
 
-                                    ssl->buffers.sig.length = wc_EncodeSignature(encodedSig,
-                                        ssl->buffers.sig.buffer, ssl->buffers.sig.length, typeH);
+                                    ssl->buffers.sig.length =
+                                    wc_EncodeSignature(encodedSig,
+                                        ssl->buffers.sig.buffer,
+                                        ssl->buffers.sig.length, typeH);
 
                                     /* Replace sig buffer with new one */
                                     XFREE(ssl->buffers.sig.buffer, ssl->heap,
@@ -18423,12 +19214,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                                 ret = RsaSign(ssl,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    output + idx,
-                                    &ssl->sigLen,
-                                    (RsaKey*)ssl->sigKey,
+                                    args->output + args->idx,
+                                    &args->sigSz,
+                                    key,
                                     ssl->buffers.key->buffer,
                                     ssl->buffers.key->length,
                             #ifdef HAVE_PK_CALLBACKS
@@ -18442,12 +19235,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #endif /* !NO_RSA */
                             case ecc_dsa_sa_algo:
                             {
+                                ecc_key* key = (ecc_key*)ssl->hsKey;
+
                                 ret = EccSign(ssl,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    output + LENGTH_SZ + idx,
-                                    &ssl->sigLen,
-                                    (ecc_key*)ssl->sigKey,
+                                    args->output + LENGTH_SZ + args->idx,
+                                    &args->sigSz,
+                                    key,
                             #if defined(HAVE_PK_CALLBACKS)
                                     ssl->buffers.key->buffer,
                                     ssl->buffers.key->length,
@@ -18471,6 +19266,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                                 if (ssl->options.usingAnon_cipher) {
                                     break;
                                 }
@@ -18478,9 +19275,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 ret = RsaSign(ssl,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    output + idx,
-                                    &ssl->sigLen,
-                                    (RsaKey*)ssl->sigKey,
+                                    args->output + args->idx,
+                                    &args->sigSz,
+                                    key,
                                     ssl->buffers.key->buffer,
                                     ssl->buffers.key->length,
                                 #ifdef HAVE_PK_CALLBACKS
@@ -18541,35 +19338,41 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
-                                if (verifySig == NULL) {
-                                    if (ssl->sigLen == 0) {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
+                                if (args->verifySig == NULL) {
+                                    if (args->sigSz == 0) {
                                         ERROR_OUT(BAD_COND_E, exit_sske);
                                     }
-                                    verifySig = (byte*)XMALLOC(ssl->sigLen, ssl->heap,
-                                                      DYNAMIC_TYPE_TMP_BUFFER);
-                                    if (!verifySig) {
+                                    args->verifySig = (byte*)XMALLOC(
+                                                    args->sigSz, ssl->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+                                    if (!args->verifySig) {
                                         ERROR_OUT(MEMORY_E, exit_sske);
                                     }
-                                    XMEMCPY(verifySig, output + idx, ssl->sigLen);
+                                    XMEMCPY(args->verifySig,
+                                        args->output + args->idx, args->sigSz);
                                 }
 
                                 /* check for signature faults */
                                 ret = VerifyRsaSign(ssl,
-                                    verifySig, ssl->sigLen,
+                                    args->verifySig, args->sigSz,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    (RsaKey*)ssl->sigKey);
+                                    key
+                                );
                                 break;
                             }
                         #endif
                             case ecc_dsa_sa_algo:
                             {
                                 /* Now that we know the real sig size, write it. */
-                                c16toa((word16)ssl->sigLen, output + idx);
+                                c16toa((word16)args->sigSz,
+                                                    args->output + args->idx);
 
                                 /* And adjust length and sendSz from estimates */
-                                length += ssl->sigLen - sigSz;
-                                sendSz += ssl->sigLen - sigSz;
+                                args->length += args->sigSz - args->tmpSigSz;
+                                args->sendSz += args->sigSz - args->tmpSigSz;
                                 break;
                             }
                             default:
@@ -18586,28 +19389,33 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         #ifndef NO_RSA
                             case rsa_sa_algo:
                             {
+                                RsaKey* key = (RsaKey*)ssl->hsKey;
+
                                 if (ssl->options.usingAnon_cipher) {
                                     break;
                                 }
 
-                                if (verifySig == NULL) {
-                                    if (ssl->sigLen == 0) {
+                                if (args->verifySig == NULL) {
+                                    if (args->sigSz == 0) {
                                         ERROR_OUT(BAD_COND_E, exit_sske);
                                     }
-                                    verifySig = (byte*)XMALLOC(ssl->sigLen, ssl->heap,
+                                    args->verifySig = (byte*)XMALLOC(
+                                                      args->sigSz, ssl->heap,
                                                       DYNAMIC_TYPE_TMP_BUFFER);
-                                    if (!verifySig) {
+                                    if (!args->verifySig) {
                                         ERROR_OUT(MEMORY_E, exit_sske);
                                     }
-                                    XMEMCPY(verifySig, output + idx, ssl->sigLen);
+                                    XMEMCPY(args->verifySig,
+                                        args->output + args->idx, args->sigSz);
                                 }
 
                                 /* check for signature faults */
                                 ret = VerifyRsaSign(ssl,
-                                    verifySig, ssl->sigLen,
+                                    args->verifySig, args->sigSz,
                                     ssl->buffers.sig.buffer,
                                     ssl->buffers.sig.length,
-                                    (RsaKey*)ssl->sigKey);
+                                    key
+                                );
                                 break;
                             }
                         #endif
@@ -18630,19 +19438,21 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             {
             #ifdef HAVE_QSH
                 if (ssl->peerQSHKeyPresent) {
-                    if (qshSz > 0) {
-                        idx = sendSz - qshSz;
+                    if (args->qshSz > 0) {
+                        args->idx = args->sendSz - args->qshSz;
                         if (QSH_KeyExchangeWrite(ssl, 1) != 0) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
 
                         /* extension type */
-                        c16toa(TLSX_QUANTUM_SAFE_HYBRID, output + idx);
-                        idx += OPAQUE16_LEN;
+                        c16toa(TLSX_QUANTUM_SAFE_HYBRID,
+                                                    args->output + args->idx);
+                        args->idx += OPAQUE16_LEN;
 
                         /* write to output and check amount written */
-                        if (TLSX_QSHPK_Write(ssl->QSH_secret->list, output + idx)
-                                                          > qshSz - OPAQUE16_LEN) {
+                        if (TLSX_QSHPK_Write(ssl->QSH_secret->list,
+                            args->output + args->idx) >
+                                                args->qshSz - OPAQUE16_LEN) {
                             ERROR_OUT(MEMORY_E, exit_sske);
                         }
                     }
@@ -18653,8 +19463,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 if (ssl->specs.kea == ecdhe_psk_kea ||
                     ssl->specs.kea == ecc_diffie_hellman_kea) {
                     /* Check output to make sure it was set */
-                    if (output) {
-                        AddHeaders(output, length, server_key_exchange, ssl);
+                    if (args->output) {
+                        AddHeaders(args->output, args->length,
+                                                    server_key_exchange, ssl);
                     }
                     else {
                         ERROR_OUT(BUFFER_ERROR, exit_sske);
@@ -18664,7 +19475,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
             #ifdef WOLFSSL_DTLS
                 if (IsDtlsNotSctpMode(ssl)) {
-                    if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0) {
+                    if ((ret = DtlsMsgPoolSave(ssl, args->output, args->sendSz)) != 0) {
                         goto exit_sske;
                     }
                 }
@@ -18673,7 +19484,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     DtlsSEQIncrement(ssl, CUR_ORDER);
             #endif
 
-                ret = HashOutput(ssl, output, sendSz, 0);
+                ret = HashOutput(ssl, args->output, args->sendSz, 0);
                 if (ret != 0) {
                     goto exit_sske;
                 }
@@ -18683,8 +19494,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     AddPacketName("ServerKeyExchange", &ssl->handShakeInfo);
                 }
                 if (ssl->toInfoOn) {
-                    AddPacketInfo("ServerKeyExchange", &ssl->timeoutInfo, output,
-                                                                sendSz, ssl->heap);
+                    AddPacketInfo("ServerKeyExchange", &ssl->timeoutInfo,
+                        args->output, args->sendSz, ssl->heap);
                 }
             #endif
 
@@ -18699,7 +19510,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
             case KEYSHARE_END:
             {
-                ssl->buffers.outputBuffer.length += sendSz;
+                ssl->buffers.outputBuffer.length += args->sendSz;
                 if (!ssl->options.groupMessages) {
                     ret = SendBuffered(ssl);
                 }
@@ -18715,51 +19526,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         WOLFSSL_LEAVE("SendServerKeyExchange", ret);
 
-        /* Handle cleanup for stack variables here */
-    #if defined(HAVE_ECC)
-        if (exportBuf) {
-            XFREE(exportBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            exportBuf = NULL;
-        }
-    #endif
-    #if defined(HAVE_ECC) || (!defined(NO_DH) && !defined(NO_RSA))
-        if (sigDataBuf) {
-            XFREE(sigDataBuf, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            sigDataBuf = NULL;
-        }
-    #endif
-
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* Handle WC_PENDING_E */
-        if (ret == WC_PENDING_E) {
-            /* Store variables needed for async */
-            XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-            ssl->async.output = output;
-            ssl->async.sendSz = sendSz;
-            ssl->async.idx = idx;
-            ssl->async.length = length;
-            ssl->async.sigSz = sigSz;
-        #ifndef NO_RSA
-            ssl->async.data = verifySig;
-        #endif
-
-            /* Push event to queue */
-            ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-            if (ret == 0) {
-                return WC_PENDING_E;
-            }
-        }
-    #endif
-
-    #ifndef NO_RSA
-        if (verifySig) {
-            XFREE(verifySig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
-            verifySig = NULL;
-        }
-    #endif
+        /* Handle async operation */
+        if (ret == WC_PENDING_E)
+            return ret;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
         /* Final cleanup */
+        FreeSskeArgs(ssl, args);
         FreeKeyExchange(ssl);
 
         return ret;
@@ -19051,7 +19825,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         /* suite size */
         ato16(&input[idx], &clSuites.suiteSz);
-        idx += 2;
+        idx += OPAQUE16_LEN;
 
         if (clSuites.suiteSz > WOLFSSL_MAX_SUITE_SZ)
             return BUFFER_ERROR;
@@ -19059,14 +19833,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         /* session size */
         ato16(&input[idx], &sessionSz);
-        idx += 2;
+        idx += OPAQUE16_LEN;
 
         if (sessionSz > ID_LEN)
             return BUFFER_ERROR;
 
         /* random size */
         ato16(&input[idx], &randomSz);
-        idx += 2;
+        idx += OPAQUE16_LEN;
 
         if (randomSz > RAN_LEN)
             return BUFFER_ERROR;
@@ -19075,10 +19849,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         for (i = 0, j = 0; i < clSuites.suiteSz; i += 3) {
             byte first = input[idx++];
             if (!first) { /* implicit: skip sslv2 type */
-                XMEMCPY(&clSuites.suites[j], &input[idx], 2);
-                j += 2;
+                XMEMCPY(&clSuites.suites[j], &input[idx], SUITE_LEN);
+                j += SUITE_LEN;
             }
-            idx += 2;
+            idx += SUITE_LEN;
         }
         clSuites.suiteSz = j;
 
@@ -19169,6 +19943,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         byte            peerCookieSz = 0;
         byte            cookieType;
         byte            cookieSz = 0;
+
+        XMEMSET(&cookieHmac, 0, sizeof(Hmac));
 #endif /* WOLFSSL_DTLS */
 
 #ifdef WOLFSSL_CALLBACKS
@@ -19612,51 +20388,45 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
 #if !defined(NO_RSA) || defined(HAVE_ECC)
 
+    typedef struct DcvArgs {
+        byte*  output; /* not allocated */
+        word32 sendSz;
+        word16 sz;
+        word32 sigSz;
+        word32 idx;
+        word32 begin;
+        byte   hashAlgo;
+        byte   sigAlgo;
+    } DcvArgs;
+
+    static void FreeDcvArgs(WOLFSSL* ssl, void* pArgs)
+    {
+        DcvArgs* args = (DcvArgs*)pArgs;
+
+        (void)ssl;
+        (void)args;
+    }
+
     static int DoCertificateVerify(WOLFSSL* ssl, byte* input,
                                 word32* inOutIdx, word32 size)
     {
-        int         ret = 0;
-        byte*       output = NULL;
-        word32      sendSz = 0;
-        word16      sz = 0;
-        word32      sigSz = 0;
-        byte        hashAlgo = sha_mac;
-        byte        sigAlgo = anonymous_sa_algo;
-        word32      idx = *inOutIdx, begin = *inOutIdx;
+        int ret = 0;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        DcvArgs* args = (DcvArgs*)ssl->async.args;
+        typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+        (void)sizeof(args_test);
+    #else
+        DcvArgs  args[1];
+    #endif
 
         WOLFSSL_ENTER("DoCertificateVerify");
 
-        (void)sigSz;
-        (void)output;
-        (void)sendSz;
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
         if (ret != WC_NOT_PENDING_E) {
-            WOLF_EVENT_TYPE eType = ssl->event.type;
-
-            /* Clear event */
-            XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
             /* Check for error */
-            if (ret < 0) {
+            if (ret < 0)
                 goto exit_dcv;
-            }
-            else  {
-                /* Restore variables needed for async */
-                output = ssl->async.output;
-                sendSz = ssl->async.sendSz;
-                idx = ssl->async.idx;
-                sigSz = ssl->async.sigSz;
-                sz = ssl->async.length;
-                sigAlgo = ssl->async.sigAlgo;
-                hashAlgo = ssl->async.hashAlgo;
-
-                /* Advance key share state if not wolfCrypt */
-                if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                    ssl->options.keyShareState++;
-                }
-            }
         }
         else
     #endif
@@ -19664,6 +20434,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             /* Reset state */
             ret = 0;
             ssl->options.keyShareState = KEYSHARE_BEGIN;
+            XMEMSET(args, 0, sizeof(DcvArgs));
+            args->hashAlgo = sha_mac;
+            args->sigAlgo = anonymous_sa_algo;
+            args->idx = *inOutIdx;
+            args->begin = *inOutIdx;
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ssl->async.freeArgs = FreeDcvArgs;
+        #endif
         }
 
         switch(ssl->options.keyShareState)
@@ -19684,22 +20462,23 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             case KEYSHARE_BUILD:
             {
                 if (IsAtLeastTLSv1_2(ssl)) {
-                    if ((idx - begin) + ENUM_LEN + ENUM_LEN > size) {
+                    if ((args->idx - args->begin) + ENUM_LEN + ENUM_LEN > size) {
                         ERROR_OUT(BUFFER_ERROR, exit_dcv);
                     }
 
-                    hashAlgo = input[idx++];
-                    sigAlgo  = input[idx++];
+                    args->hashAlgo = input[args->idx++];
+                    args->sigAlgo  = input[args->idx++];
                 }
 
-                if ((idx - begin) + OPAQUE16_LEN > size) {
+                if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                     ERROR_OUT(BUFFER_ERROR, exit_dcv);
                 }
 
-                ato16(input + idx, &sz);
-                idx += OPAQUE16_LEN;
+                ato16(input + args->idx, &args->sz);
+                args->idx += OPAQUE16_LEN;
 
-                if ((idx - begin) + sz > size || sz > ENCRYPT_LEN) {
+                if ((args->idx - args->begin) + args->sz > size ||
+                                                    args->sz > ENCRYPT_LEN) {
                     ERROR_OUT(BUFFER_ERROR, exit_dcv);
                 }
 
@@ -19726,27 +20505,29 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #endif
 
                     if (IsAtLeastTLSv1_2(ssl)) {
-                        if (sigAlgo != ecc_dsa_sa_algo) {
+                        if (args->sigAlgo != ecc_dsa_sa_algo) {
                             WOLFSSL_MSG("Oops, peer sent ECC key but not in verify");
                         }
 
-                        if (hashAlgo == sha256_mac) {
+                        switch (args->hashAlgo) {
+                            case sha256_mac:
                             #ifndef NO_SHA256
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
                                 ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
                             #endif
-                        }
-                        else if (hashAlgo == sha384_mac) {
+                                break;
+                            case sha384_mac:
                             #ifdef WOLFSSL_SHA384
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
                                 ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
                             #endif
-                        }
-                        else if (hashAlgo == sha512_mac) {
+                                break;
+                            case sha512_mac:
                             #ifdef WOLFSSL_SHA512
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
                                 ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
                             #endif
+                                break;
                         }
                     }
                 }
@@ -19763,9 +20544,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     WOLFSSL_MSG("Doing RSA peer cert verify");
 
                     ret = RsaVerify(ssl,
-                        input + idx,
-                        sz,
-                        &output,
+                        input + args->idx,
+                        args->sz,
+                        &args->output,
                         ssl->peerRsaKey,
                     #ifdef HAVE_PK_CALLBACKS
                         ssl->buffers.peerRsaKey.buffer,
@@ -19776,7 +20557,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     #endif
                     );
                     if (ret >= 0) {
-                        sendSz = ret;
+                        args->sendSz = ret;
                         ret = 0;
                     }
                 }
@@ -19786,7 +20567,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     WOLFSSL_MSG("Doing ECC peer cert verify");
 
                     ret = EccVerify(ssl,
-                        input + idx, sz,
+                        input + args->idx, args->sz,
                         ssl->buffers.digest.buffer, ssl->buffers.digest.length,
                         ssl->peerEccDsaKey,
                     #ifdef HAVE_PK_CALLBACKS
@@ -19815,11 +20596,11 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 if (ssl->peerRsaKey != NULL && ssl->peerRsaKeyPresent != 0) {
                     if (IsAtLeastTLSv1_2(ssl)) {
                     #ifdef WOLFSSL_SMALL_STACK
-                        byte*  encodedSig = NULL;
+                        byte* encodedSig = NULL;
                     #else
-                        byte   encodedSig[MAX_ENCODED_SIG_SZ];
+                        byte  encodedSig[MAX_ENCODED_SIG_SZ];
                     #endif
-                        int    typeH = SHAh;
+                        int   typeH = SHAh;
 
                     /* make sure a default is defined */
                     #if !defined(NO_SHA)
@@ -19839,56 +20620,58 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     #endif
 
                     #ifdef WOLFSSL_SMALL_STACK
-                        encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
-                                                               DYNAMIC_TYPE_TMP_BUFFER);
+                        encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+                                            ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                         if (encodedSig == NULL) {
                             ERROR_OUT(MEMORY_E, exit_dcv);
                         }
                     #endif
 
-                        if (sigAlgo != rsa_sa_algo) {
+                        if (args->sigAlgo != rsa_sa_algo) {
                             WOLFSSL_MSG("Oops, peer sent RSA key but not in verify");
                         }
 
-                        switch (hashAlgo) {
-                        #ifndef NO_SHA256
+                        switch (args->hashAlgo) {
                             case sha256_mac:
+                            #ifndef NO_SHA256
                                 typeH    = SHA256h;
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha256;
                                 ssl->buffers.digest.length = SHA256_DIGEST_SIZE;
+                            #endif /* !NO_SHA256 */
                                 break;
-                        #endif /* !NO_SHA256 */
-                        #ifdef WOLFSSL_SHA384
                             case sha384_mac:
+                            #ifdef WOLFSSL_SHA384
                                 typeH    = SHA384h;
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha384;
                                 ssl->buffers.digest.length = SHA384_DIGEST_SIZE;
+                            #endif /* WOLFSSL_SHA384 */
                                 break;
-                        #endif /* WOLFSSL_SHA384 */
-                        #ifdef WOLFSSL_SHA512
                             case sha512_mac:
+                            #ifdef WOLFSSL_SHA512
                                 typeH    = SHA512h;
                                 ssl->buffers.digest.buffer = ssl->hsHashes->certHashes.sha512;
                                 ssl->buffers.digest.length = SHA512_DIGEST_SIZE;
+                            #endif /* WOLFSSL_SHA512 */
                                 break;
-                        #endif /* WOLFSSL_SHA512 */
                         } /* switch */
 
-                        sigSz = wc_EncodeSignature(encodedSig,
-                            ssl->buffers.digest.buffer, ssl->buffers.digest.length,
-                                                                            typeH);
+                        args->sigSz = wc_EncodeSignature(encodedSig,
+                            ssl->buffers.digest.buffer,
+                            ssl->buffers.digest.length, typeH);
 
-                        if (sendSz != sigSz || !output || XMEMCMP(output,
-                                encodedSig, min(sigSz, MAX_ENCODED_SIG_SZ)) != 0) {
+                        if (args->sendSz != args->sigSz || !args->output ||
+                            XMEMCMP(args->output, encodedSig,
+                                min(args->sigSz, MAX_ENCODED_SIG_SZ)) != 0) {
                             ret = VERIFY_CERT_ERROR;
                         }
 
                     #ifdef WOLFSSL_SMALL_STACK
-                        XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+                        XFREE(encodedSig, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
                     #endif
                     }
                     else {
-                        if (sendSz != FINISHED_SZ || !output || XMEMCMP(output,
+                        if (args->sendSz != FINISHED_SZ || !args->output ||
+                            XMEMCMP(args->output,
                                 &ssl->hsHashes->certHashes, FINISHED_SZ) != 0) {
                             ret = VERIFY_CERT_ERROR;
                         }
@@ -19905,8 +20688,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 ssl->options.havePeerVerify = 1;
 
                 /* Set final index */
-                idx += sz;
-                *inOutIdx = idx;
+                args->idx += args->sz;
+                *inOutIdx = args->idx;
 
                 /* Advance state and proceed */
                 ssl->options.keyShareState = KEYSHARE_END;
@@ -19924,30 +20707,13 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         WOLFSSL_LEAVE("DoCertificateVerify", ret);
 
-        /* Handle cleanup for stack variables here */
-
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* Handle WC_PENDING_E */
+        /* Handle async operation */
         if (ret == WC_PENDING_E) {
-            /* Store variables needed for async */
-            XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-            ssl->async.output = output;
-            ssl->async.sendSz = sendSz;
-            ssl->async.idx = idx;
-            ssl->async.sigSz = sigSz;
-            ssl->async.length = sz;
-            ssl->async.sigAlgo = sigAlgo;
-            ssl->async.hashAlgo = hashAlgo;
-
             /* Mark message as not recevied so it can process again */
             ssl->msgsReceived.got_certificate_verify = 0;
 
-            /* Push event to queue */
-            ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-            if (ret == 0) {
-                return WC_PENDING_E;
-            }
+            return ret;
         }
     #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -19956,6 +20722,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         ssl->buffers.digest.length = 0;
 
         /* Final cleanup */
+        FreeDcvArgs(ssl, args);
         FreeKeyExchange(ssl);
 
         return ret;
@@ -19965,14 +20732,15 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
     int SendServerHelloDone(WOLFSSL* ssl)
     {
-        byte              *output;
-        int                sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
-        int                ret;
+        byte* output;
+        int   sendSz = RECORD_HEADER_SZ + HANDSHAKE_HEADER_SZ;
+        int   ret;
+
+    #ifdef WOLFSSL_DTLS
+        if (ssl->options.dtls)
+            sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
+    #endif
 
-        #ifdef WOLFSSL_DTLS
-            if (ssl->options.dtls)
-                sendSz += DTLS_RECORD_EXTRA + DTLS_HANDSHAKE_EXTRA;
-        #endif
         /* check for available size */
         if ((ret = CheckAvailableSize(ssl, sendSz)) != 0)
             return ret;
@@ -19983,27 +20751,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         AddHeaders(output, 0, server_hello_done, ssl);
 
-        #ifdef WOLFSSL_DTLS
-            if (IsDtlsNotSctpMode(ssl)) {
-                if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
-                    return 0;
-            }
+    #ifdef WOLFSSL_DTLS
+        if (IsDtlsNotSctpMode(ssl)) {
+            if ((ret = DtlsMsgPoolSave(ssl, output, sendSz)) != 0)
+                return 0;
+        }
 
-            if (ssl->options.dtls)
-                DtlsSEQIncrement(ssl, CUR_ORDER);
-        #endif
+        if (ssl->options.dtls)
+            DtlsSEQIncrement(ssl, CUR_ORDER);
+    #endif
 
         ret = HashOutput(ssl, output, sendSz, 0);
             if (ret != 0)
                 return ret;
 
-#ifdef WOLFSSL_CALLBACKS
+    #ifdef WOLFSSL_CALLBACKS
         if (ssl->hsInfoOn)
             AddPacketName("ServerHelloDone", &ssl->handShakeInfo);
         if (ssl->toInfoOn)
             AddPacketInfo("ServerHelloDone", &ssl->timeoutInfo, output, sendSz,
                           ssl->heap);
-#endif
+    #endif
         ssl->options.serverState = SERVER_HELLODONE_COMPLETE;
 
         ssl->buffers.outputBuffer.length += sendSz;
@@ -20276,49 +21044,42 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
     }
 #endif /* WOLFSSL_DTLS */
 
+    typedef struct DckeArgs {
+        byte*  output; /* not allocated */
+        word32 length;
+        word32 idx;
+        word32 begin;
+        word32 sigSz;
+    } DckeArgs;
+
+    static void FreeDckeArgs(WOLFSSL* ssl, void* pArgs)
+    {
+        DckeArgs* args = (DckeArgs*)pArgs;
+
+        (void)ssl;
+        (void)args;
+    }
+
     static int DoClientKeyExchange(WOLFSSL* ssl, byte* input, word32* inOutIdx,
                                                                     word32 size)
     {
-        int    ret;
-        word32 length = 0;
-        word32 idx = *inOutIdx, begin = *inOutIdx;
-        byte*  output_lcl = NULL;
-        byte** output = &output_lcl;
-
-        /* suppress possible compiler warnings */
-        (void)input;
-        (void)size;
-        (void)length;
-        (void)idx;
-        (void)output;
+        int ret;
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        DckeArgs* args = (DckeArgs*)ssl->async.args;
+        typedef char args_test[sizeof(ssl->async.args) >= sizeof(*args) ? 1 : -1];
+        (void)sizeof(args_test);
+    #else
+        DckeArgs  args[1];
+    #endif
 
         WOLFSSL_ENTER("DoClientKeyExchange");
 
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* use async pointer for output */
-        output = &ssl->async.output;
-
-        ret = wolfAsync_EventPop(&ssl->event, WOLF_EVENT_TYPE_ASYNC_ANY);
+        ret = wolfSSL_AsyncPop(ssl, &ssl->options.keyShareState);
         if (ret != WC_NOT_PENDING_E) {
-            WOLF_EVENT_TYPE eType = ssl->event.type;
-
-            /* Clear event */
-            XMEMSET(&ssl->event, 0, sizeof(ssl->event));
-
             /* Check for error */
-            if (ret < 0) {
+            if (ret < 0)
                 goto exit_dcke;
-            }
-            else  {
-                /* Restore variables needed for async */
-                idx = ssl->async.idx;
-                length = ssl->async.length;
-
-                /* Advance key share state if not wolfCrypt */
-                if (eType == WOLF_EVENT_TYPE_ASYNC_WOLFSSL) {
-                    ssl->options.keyShareState++;
-                }
-            }
         }
         else
     #endif /* WOLFSSL_ASYNC_CRYPT */
@@ -20326,6 +21087,12 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             /* Reset state */
             ret = 0;
             ssl->options.keyShareState = KEYSHARE_BEGIN;
+            XMEMSET(args, 0, sizeof(DckeArgs));
+            args->idx = *inOutIdx;
+            args->begin = *inOutIdx;
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ssl->async.freeArgs = FreeDckeArgs;
+        #endif
         }
 
         /* Do Client Key Exchange State Machine */
@@ -20356,7 +21123,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                 if (ssl->options.verifyPeer && ssl->options.failNoCertxPSK) {
                     if (!ssl->options.havePeerCert &&
-                                                 !ssl->options.usingPSK_cipher){
+                                             !ssl->options.usingPSK_cipher) {
                         WOLFSSL_MSG("client didn't present peer cert");
                         return NO_PEER_CERT;
                     }
@@ -20376,7 +21143,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
-                        if (!ssl->buffers.key->buffer) {
+                        /* make sure private key exists */
+                        if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
                             ERROR_OUT(NO_PRIVATE_KEY, exit_dcke);
                         }
                         break;
@@ -20396,7 +21165,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifdef HAVE_NTRU
                     case ntru_kea:
                     {
-                        if (!ssl->buffers.key->buffer) {
+                        /* make sure private key exists */
+                        if (ssl->buffers.key == NULL ||
+                                            ssl->buffers.key->buffer == NULL) {
                             ERROR_OUT(NO_PRIVATE_KEY, exit_dcke);
                         }
                         break;
@@ -20459,29 +21230,22 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word32 i = 0;
                         int    keySz;
 
-                        ssl->sigKey = XMALLOC(sizeof(RsaKey), ssl->heap,
-                                                              DYNAMIC_TYPE_RSA);
-                        if (ssl->sigKey == NULL) {
-                            ERROR_OUT(MEMORY_E, exit_dcke);
-                        }
-                        ssl->sigType = DYNAMIC_TYPE_RSA;
-
-                        ret = wc_InitRsaKey_ex((RsaKey*)ssl->sigKey, ssl->heap,
-                                                                   ssl->devId);
+                        ssl->hsType = DYNAMIC_TYPE_RSA;
+                        ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                         if (ret != 0) {
                             goto exit_dcke;
                         }
 
                         ret = wc_RsaPrivateKeyDecode(ssl->buffers.key->buffer,
-                            &i, (RsaKey*)ssl->sigKey, ssl->buffers.key->length);
+                            &i, (RsaKey*)ssl->hsKey, ssl->buffers.key->length);
                         if (ret != 0) {
                             goto exit_dcke;
                         }
-                        keySz = wc_RsaEncryptSize((RsaKey*)ssl->sigKey);
+                        keySz = wc_RsaEncryptSize((RsaKey*)ssl->hsKey);
                         if (keySz < 0) { /* test if keySz has error */
                             ERROR_OUT(keySz, exit_dcke);
                         }
-                        length = (word32)keySz;
+                        args->length = (word32)keySz;
 
                         if (keySz < ssl->options.minRsaKeySz) {
                             WOLFSSL_MSG("Peer RSA key is too small");
@@ -20492,25 +21256,25 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->options.tls) {
                             word16 check;
 
-                            if ((idx - begin) + OPAQUE16_LEN > size) {
+                            if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                                 ERROR_OUT(BUFFER_ERROR, exit_dcke);
                             }
 
-                            ato16(input + idx, &check);
-                            idx += OPAQUE16_LEN;
+                            ato16(input + args->idx, &check);
+                            args->idx += OPAQUE16_LEN;
 
-                            if ((word32)check != length) {
+                            if ((word32)check != args->length) {
                                 WOLFSSL_MSG("RSA explicit size doesn't match");
                                 ERROR_OUT(RSA_PRIVATE_ERROR, exit_dcke);
                             }
                         }
 
-                        if ((idx - begin) + length > size) {
+                        if ((args->idx - args->begin) + args->length > size) {
                             WOLFSSL_MSG("RSA message too big");
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        *output = NULL;
+                        args->output = NULL;
                         break;
                     } /* rsa_kea */
                 #endif /* !NO_RSA */
@@ -20520,25 +21284,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         byte* pms = ssl->arrays->preMasterSecret;
                         word16 ci_sz;
 
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &ci_sz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &ci_sz);
+                        args->idx += OPAQUE16_LEN;
 
                         if (ci_sz > MAX_PSK_ID_LEN) {
                             ERROR_OUT(CLIENT_ID_ERROR, exit_dcke);
                         }
 
-                        if ((idx - begin) + ci_sz > size) {
+                        if ((args->idx - args->begin) + ci_sz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        XMEMCPY(ssl->arrays->client_identity, input + idx, ci_sz);
-                        idx += ci_sz;
+                        XMEMCPY(ssl->arrays->client_identity,
+                                                    input + args->idx, ci_sz);
+                        args->idx += ci_sz;
 
-                        ssl->arrays->client_identity[min(ci_sz, MAX_PSK_ID_LEN-1)] = 0;
+                        ssl->arrays->client_identity[
+                                        min(ci_sz, MAX_PSK_ID_LEN-1)] = 0;
                         ssl->arrays->psk_keySz = ssl->options.server_psk_cb(ssl,
                             ssl->arrays->client_identity, ssl->arrays->psk_key,
                             MAX_PSK_KEY_LEN);
@@ -20560,7 +21326,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         pms += OPAQUE16_LEN;
 
                         XMEMCPY(pms, ssl->arrays->psk_key, ssl->arrays->psk_keySz);
-                        ssl->arrays->preMasterSz = ssl->arrays->psk_keySz * 2 + 4;
+                        ssl->arrays->preMasterSz =
+                            (ssl->arrays->psk_keySz * 2) + (OPAQUE16_LEN * 2);
                         break;
                     }
                 #endif /* !NO_PSK */
@@ -20568,27 +21335,27 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case ntru_kea:
                     {
                         word16 cipherLen;
-                        word16 plainLen = sizeof(ssl->arrays->preMasterSecret);
+                        word16 plainLen = ENCRYPT_LEN;
 
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &cipherLen);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &cipherLen);
+                        args->idx += OPAQUE16_LEN;
 
                         if (cipherLen > MAX_NTRU_ENCRYPT_SZ) {
                             ERROR_OUT(NTRU_KEY_ERROR, exit_dcke);
                         }
 
-                        if ((idx - begin) + cipherLen > size) {
+                        if ((args->idx - args->begin) + cipherLen > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
                         if (NTRU_OK != ntru_crypto_ntru_decrypt(
                                     (word16) ssl->buffers.key->length,
                                     ssl->buffers.key->buffer, cipherLen,
-                                    input + idx, &plainLen,
+                                    input + args->idx, &plainLen,
                                     ssl->arrays->preMasterSecret)) {
                             ERROR_OUT(NTRU_DECRYPT_ERROR, exit_dcke);
                         }
@@ -20597,7 +21364,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             ERROR_OUT(NTRU_DECRYPT_ERROR, exit_dcke);
                         }
 
-                        idx += cipherLen;
+                        args->idx += cipherLen;
                         ssl->arrays->preMasterSz = plainLen;
                         break;
                     }
@@ -20611,14 +21378,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         if (ssl->specs.static_ecdh) {
                             word32 i = 0;
 
-                            ssl->sigKey = XMALLOC(sizeof(ecc_key), ssl->heap,
-                                                              DYNAMIC_TYPE_ECC);
-                            if (ssl->sigKey == NULL) {
-                                ERROR_OUT(MEMORY_E, exit_dcke);
-                            }
-                            ssl->sigType = DYNAMIC_TYPE_ECC;
-
-                            ret = wc_ecc_init_ex((ecc_key*)ssl->sigKey, ssl->heap, ssl->devId);
+                            ssl->hsType = DYNAMIC_TYPE_ECC;
+                            ret = AllocKey(ssl, ssl->hsType, &ssl->hsKey);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
@@ -20626,10 +21387,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             ret = wc_EccPrivateKeyDecode(
                                 ssl->buffers.key->buffer,
                                 &i,
-                                (ecc_key*)ssl->sigKey,
+                                (ecc_key*)ssl->hsKey,
                                 ssl->buffers.key->length);
                             if (ret == 0) {
-                                private_key = (ecc_key*)ssl->sigKey;
+                                private_key = (ecc_key*)ssl->hsKey;
                                 if (wc_ecc_size(private_key) <
                                                 ssl->options.minEccKeySz) {
                                     WOLFSSL_MSG("ECC key too small");
@@ -20639,16 +21400,18 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         }
 
                         /* import peer ECC key */
-                        if ((idx - begin) + OPAQUE8_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE8_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        length = input[idx++];
+                        args->length = input[args->idx++];
 
-                        if ((idx - begin) + length > size) {
+                        if ((args->idx - args->begin) + args->length > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
+                        ssl->arrays->preMasterSz = ENCRYPT_LEN;
+
                     #ifdef HAVE_PK_CALLBACKS
                         /* if callback then use it for shared secret */
                         if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -20664,14 +21427,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                         if (ssl->peerEccKey == NULL) {
                             /* alloc/init on demand */
-                            ssl->peerEccKey = (ecc_key*)XMALLOC(
-                                sizeof(ecc_key), ssl->heap, DYNAMIC_TYPE_ECC);
-                            if (ssl->peerEccKey == NULL) {
-                                WOLFSSL_MSG("PeerEccKey Memory error");
-                                ERROR_OUT(MEMORY_E, exit_dcke);
-                            }
-                            ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
-                                                                ssl->devId);
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                (void**)&ssl->peerEccKey);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
@@ -20685,12 +21442,16 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                             }
                         }
 
-                        if (wc_ecc_import_x963_ex(input + idx, length,
-                                ssl->peerEccKey, private_key->dp->id)) {
+                        if (wc_ecc_import_x963_ex(input + args->idx, args->length,
+                                        ssl->peerEccKey, private_key->dp->id)) {
                             ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                         }
 
                         ssl->peerEccKeyPresent = 1;
+
+                        if (ret != 0) {
+                            goto exit_dcke;
+                        }
                         break;
                     }
                 #endif /* HAVE_ECC */
@@ -20699,18 +21460,30 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         word16 clientPubSz;
 
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientPubSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientPubSz);
+                        args->idx += OPAQUE16_LEN;
 
-                        if ((idx - begin) + clientPubSz > size) {
+                        if ((args->idx - args->begin) + clientPubSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ssl->sigLen = clientPubSz;
+                        args->sigSz = clientPubSz;
+
+                        ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                        if (ret != 0) {
+                            goto exit_dcke;
+                        }
+
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length);
                         break;
                     }
                 #endif /* !NO_DH */
@@ -20720,38 +21493,52 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word16 clientSz;
 
                         /* Read in the PSK hint */
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientSz);
+                        args->idx += OPAQUE16_LEN;
                         if (clientSz > MAX_PSK_ID_LEN) {
                             ERROR_OUT(CLIENT_ID_ERROR, exit_dcke);
                         }
 
-                        if ((idx - begin) + clientSz > size) {
+                        if ((args->idx - args->begin) + clientSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        XMEMCPY(ssl->arrays->client_identity, input + idx, clientSz);
-                        idx += clientSz;
+                        XMEMCPY(ssl->arrays->client_identity, input + args->idx,
+                                                                    clientSz);
+                        args->idx += clientSz;
                         ssl->arrays->client_identity[
                             min(clientSz, MAX_PSK_ID_LEN-1)] = 0;
 
                         /* Read in the DHE business */
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientSz);
+                        args->idx += OPAQUE16_LEN;
 
-                        if ((idx - begin) + clientSz > size) {
+                        if ((args->idx - args->begin) + clientSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ssl->sigLen = clientSz;
+                        args->sigSz = clientSz;
+
+                        ret = AllocKey(ssl, DYNAMIC_TYPE_DH,
+                                            (void**)&ssl->buffers.serverDH_Key);
+                        if (ret != 0) {
+                            goto exit_dcke;
+                        }
+
+                        ret = wc_DhSetKey(ssl->buffers.serverDH_Key,
+                            ssl->buffers.serverDH_P.buffer,
+                            ssl->buffers.serverDH_P.length,
+                            ssl->buffers.serverDH_G.buffer,
+                            ssl->buffers.serverDH_G.length);
+
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
@@ -20761,36 +21548,38 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         word16 clientSz;
 
                         /* Read in the PSK hint */
-                        if ((idx - begin) + OPAQUE16_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE16_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        ato16(input + idx, &clientSz);
-                        idx += OPAQUE16_LEN;
+                        ato16(input + args->idx, &clientSz);
+                        args->idx += OPAQUE16_LEN;
                         if (clientSz > MAX_PSK_ID_LEN) {
                             ERROR_OUT(CLIENT_ID_ERROR, exit_dcke);
                         }
-                        if ((idx - begin) + clientSz > size) {
+                        if ((args->idx - args->begin) + clientSz > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
                         XMEMCPY(ssl->arrays->client_identity,
-                                                       input + idx, clientSz);
-                        idx += clientSz;
+                                                   input + args->idx, clientSz);
+                        args->idx += clientSz;
                         ssl->arrays->client_identity[
                             min(clientSz, MAX_PSK_ID_LEN-1)] = 0;
 
                         /* import peer ECC key */
-                        if ((idx - begin) + OPAQUE8_LEN > size) {
+                        if ((args->idx - args->begin) + OPAQUE8_LEN > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
-                        length = input[idx++];
+                        args->length = input[args->idx++];
 
-                        if ((idx - begin) + length > size) {
+                        if ((args->idx - args->begin) + args->length > size) {
                             ERROR_OUT(BUFFER_ERROR, exit_dcke);
                         }
 
+                        args->sigSz = ENCRYPT_LEN - OPAQUE16_LEN;
+
                     #ifdef HAVE_PK_CALLBACKS
                         /* if callback then use it for shared secret */
                         if (ssl->ctx->EccSharedSecretCb != NULL) {
@@ -20805,14 +21594,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                         if (ssl->peerEccKey == NULL) {
                             /* alloc/init on demand */
-                            ssl->peerEccKey = (ecc_key*)XMALLOC(
-                                sizeof(ecc_key), ssl->heap, DYNAMIC_TYPE_ECC);
-                            if (ssl->peerEccKey == NULL) {
-                                WOLFSSL_MSG("PeerEccKey Memory error");
-                                ERROR_OUT(MEMORY_E, exit_dcke);
-                            }
-                            ret = wc_ecc_init_ex(ssl->peerEccKey, ssl->heap,
-                                                                ssl->devId);
+                            ret = AllocKey(ssl, DYNAMIC_TYPE_ECC,
+                                (void**)&ssl->peerEccKey);
                             if (ret != 0) {
                                 goto exit_dcke;
                             }
@@ -20826,9 +21609,8 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                                 goto exit_dcke;
                             }
                         }
-
-                        if (wc_ecc_import_x963_ex(input + idx, length,
-                                ssl->peerEccKey, ssl->eccTempKey->dp->id)) {
+                        if (wc_ecc_import_x963_ex(input + args->idx, args->length,
+                                 ssl->peerEccKey, ssl->eccTempKey->dp->id)) {
                             ERROR_OUT(ECC_PEERKEY_ERROR, exit_dcke);
                         }
 
@@ -20855,12 +21637,13 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_RSA
                     case rsa_kea:
                     {
+                        RsaKey* key = (RsaKey*)ssl->hsKey;
                         ret = RsaDec(ssl,
-                            input + idx,
-                            length,
-                            output,
-                            &ssl->sigLen,
-                            (RsaKey*)ssl->sigKey,
+                            input + args->idx,
+                            args->length,
+                            &args->output,
+                            &args->sigSz,
+                            key,
                         #if defined(HAVE_PK_CALLBACKS)
                             ssl->buffers.key->buffer,
                             ssl->buffers.key->length,
@@ -20889,15 +21672,13 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     {
                         ecc_key* private_key = ssl->eccTempKey;
                         if (ssl->specs.static_ecdh) {
-                            private_key = (ecc_key*)ssl->sigKey;
+                            private_key = (ecc_key*)ssl->hsKey;
                         }
 
-                        ssl->arrays->preMasterSz = ENCRYPT_LEN;
-
                         /* Generate shared secret */
                         ret = EccSharedSecret(ssl,
                             private_key, ssl->peerEccKey,
-                            input + idx, &length,
+                            input + args->idx, &args->length,
                             ssl->arrays->preMasterSecret,
                             &ssl->arrays->preMasterSz,
                             WOLFSSL_SERVER_END,
@@ -20913,19 +21694,11 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
-                        word16 clientPubSz = (word16)ssl->sigLen;
-
-                        ret = DhAgree(ssl,
-                            ssl->buffers.serverDH_P.buffer,
-                            ssl->buffers.serverDH_P.length,
-                            ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length,
+                        ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
-                            &ssl->buffers.serverDH_Priv.length,
-                            NULL,
-                            0,
-                            input + idx,
-                            clientPubSz,
+                            ssl->buffers.serverDH_Priv.length,
+                            input + args->idx,
+                            (word16)args->sigSz,
                             ssl->arrays->preMasterSecret,
                             &ssl->arrays->preMasterSz);
                         break;
@@ -20934,21 +21707,12 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #if !defined(NO_DH) && !defined(NO_PSK)
                     case dhe_psk_kea:
                     {
-                        byte* pms = ssl->arrays->preMasterSecret;
-                        word16 clientSz = ssl->sigLen;
-
-                        ret = DhAgree(ssl,
-                            ssl->buffers.serverDH_P.buffer,
-                            ssl->buffers.serverDH_P.length,
-                            ssl->buffers.serverDH_G.buffer,
-                            ssl->buffers.serverDH_G.length,
+                        ret = DhAgree(ssl, ssl->buffers.serverDH_Key,
                             ssl->buffers.serverDH_Priv.buffer,
-                            &ssl->buffers.serverDH_Priv.length,
-                            NULL,
-                            0,
-                            input + idx,
-                            clientSz,
-                            pms + OPAQUE16_LEN,
+                            ssl->buffers.serverDH_Priv.length,
+                            input + args->idx,
+                            (word16)args->sigSz,
+                            ssl->arrays->preMasterSecret + OPAQUE16_LEN,
                             &ssl->arrays->preMasterSz);
                         break;
                     }
@@ -20956,14 +21720,12 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                 #if defined(HAVE_ECC) && !defined(NO_PSK)
                     case ecdhe_psk_kea:
                     {
-                        ssl->sigLen = ENCRYPT_LEN - OPAQUE16_LEN;
-
                         /* Generate shared secret */
                         ret = EccSharedSecret(ssl,
                             ssl->eccTempKey, ssl->peerEccKey,
-                            input + idx, &length,
+                            input + args->idx, &args->length,
                             ssl->arrays->preMasterSecret + OPAQUE16_LEN,
-                            &ssl->sigLen,
+                            &args->sigSz,
                             WOLFSSL_SERVER_END,
                         #ifdef HAVE_PK_CALLBACKS
                             ssl->EccSharedSecretCtx
@@ -20994,10 +21756,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case rsa_kea:
                     {
                         /* Add the signature length to idx */
-                        idx += length;
+                        args->idx += args->length;
 
-                        if (ssl->sigLen == SECRET_LEN && *output != NULL) {
-                            XMEMCPY(ssl->arrays->preMasterSecret, *output, SECRET_LEN);
+                        if (args->sigSz == SECRET_LEN && args->output != NULL) {
+                            XMEMCPY(ssl->arrays->preMasterSecret, args->output, SECRET_LEN);
                             if (ssl->arrays->preMasterSecret[0] != ssl->chVersion.major ||
                                 ssl->arrays->preMasterSecret[1] != ssl->chVersion.minor) {
                                 ERROR_OUT(PMS_VERSION_ERROR, exit_dcke);
@@ -21025,15 +21787,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case ecc_diffie_hellman_kea:
                     {
                         /* skip past the imported peer key */
-                        idx += length;
+                        args->idx += args->length;
                         break;
                     }
                 #endif /* HAVE_ECC */
                 #ifndef NO_DH
                     case diffie_hellman_kea:
                     {
-                        word16 clientPubSz = (word16)ssl->sigLen;
-                        idx += clientPubSz;
+                        args->idx += (word16)args->sigSz;
                         break;
                     }
                 #endif /* !NO_DH */
@@ -21041,9 +21802,9 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case dhe_psk_kea:
                     {
                         byte* pms = ssl->arrays->preMasterSecret;
-                        word16 clientSz = ssl->sigLen;
+                        word16 clientSz = (word16)args->sigSz;
 
-                        idx += clientSz;
+                        args->idx += clientSz;
                         c16toa((word16)ssl->arrays->preMasterSz, pms);
                         ssl->arrays->preMasterSz += OPAQUE16_LEN;
                         pms += ssl->arrays->preMasterSz;
@@ -21062,8 +21823,10 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                         c16toa((word16) ssl->arrays->psk_keySz, pms);
                         pms += OPAQUE16_LEN;
 
-                        XMEMCPY(pms, ssl->arrays->psk_key, ssl->arrays->psk_keySz);
-                        ssl->arrays->preMasterSz += ssl->arrays->psk_keySz + OPAQUE16_LEN;
+                        XMEMCPY(pms, ssl->arrays->psk_key,
+                                                    ssl->arrays->psk_keySz);
+                        ssl->arrays->preMasterSz += ssl->arrays->psk_keySz +
+                                                                OPAQUE16_LEN;
                         break;
                     }
                 #endif /* !NO_DH && !NO_PSK */
@@ -21071,13 +21834,14 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
                     case ecdhe_psk_kea:
                     {
                         byte* pms = ssl->arrays->preMasterSecret;
+                        word16 clientSz = (word16)args->sigSz;
 
                         /* skip past the imported peer key */
-                        idx += length;
+                        args->idx += args->length;
 
                         /* Add preMasterSecret */
-                        c16toa((word16)ssl->sigLen, pms);
-                        ssl->arrays->preMasterSz += OPAQUE16_LEN + ssl->sigLen;
+                        c16toa(clientSz, pms);
+                        ssl->arrays->preMasterSz += OPAQUE16_LEN + clientSz;
                         pms += ssl->arrays->preMasterSz;
 
                         /* Use the PSK hint to look up the PSK and add it to the
@@ -21120,19 +21884,19 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
                 if (ssl->options.haveQSH) {
                     /* extension name */
-                    ato16(input + idx, &name);
-                    idx += OPAQUE16_LEN;
+                    ato16(input + args->idx, &name);
+                    args->idx += OPAQUE16_LEN;
 
                     if (name == TLSX_QUANTUM_SAFE_HYBRID) {
                         int    qshSz;
                         /* if qshSz is larger than 0 it is the
                            length of buffer used */
                         if ((qshSz = TLSX_QSHCipher_Parse(ssl,
-                                input + idx,
-                                size - idx + begin, 1)) < 0) {
+                                input + args->idx,
+                                size - args->idx + args->begin, 1)) < 0) {
                             ERROR_OUT(qshSz, exit_dcke);
                         }
-                        idx += qshSz;
+                        args->idx += qshSz;
                     }
                     else {
                         /* unknown extension sent client ignored handshake */
@@ -21154,7 +21918,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
             case KEYSHARE_END:
             {
                 /* Set final index */
-                *inOutIdx = idx;
+                *inOutIdx = args->idx;
 
                 ssl->options.clientState = CLIENT_KEYEXCHANGE_COMPLETE;
             #ifndef NO_CERTS
@@ -21172,27 +21936,13 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 
         WOLFSSL_LEAVE("DoClientKeyExchange", ret);
 
-        /* Handle cleanup for stack variables here */
-
-
     #ifdef WOLFSSL_ASYNC_CRYPT
-        /* Handle WC_PENDING_E */
+        /* Handle async operation */
         if (ret == WC_PENDING_E) {
-            /* Store variables needed for async */
-            output_lcl = ssl->async.output;
-            XMEMSET(&ssl->async, 0, sizeof(ssl->async));
-            ssl->async.idx = idx;
-            ssl->async.length = length;
-            ssl->async.output = output_lcl;
-
             /* Mark message as not recevied so it can process again */
             ssl->msgsReceived.got_client_key_exchange = 0;
 
-            /* Push event to queue */
-            ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, &ssl->event);
-            if (ret == 0) {
-                return WC_PENDING_E;
-            }
+            return ret;
         }
     #endif /* WOLFSSL_ASYNC_CRYPT */
 
@@ -21201,6 +21951,7 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
         ssl->arrays->preMasterSz = 0;
 
         /* Final cleanup */
+        FreeDckeArgs(ssl, args);
         FreeKeyExchange(ssl);
 
         return ret;
@@ -21226,6 +21977,82 @@ int DoSessionTicket(WOLFSSL* ssl, const byte* input, word32* inOutIdx,
 #endif /* HAVE_STUNNEL */
 #endif /* NO_WOLFSSL_SERVER */
 
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+int wolfSSL_AsyncPop(WOLFSSL* ssl, byte* state)
+{
+    int ret = 0;
+    WC_ASYNC_DEV* asyncDev;
+    WOLF_EVENT* event;
+
+    if (ssl == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* check for pending async */
+    asyncDev = ssl->async.dev;
+    if (asyncDev) {
+        /* grab event pointer */
+        event = &asyncDev->event;
+
+        ret = wolfAsync_EventPop(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL);
+        if (ret != WC_NOT_PENDING_E && ret != WC_PENDING_E) {
+
+            /* advance key share state if doesn't need called again */
+            if (state && (asyncDev->event.flags & WC_ASYNC_FLAG_CALL_AGAIN) == 0) {
+                (*state)++;
+            }
+
+            /* clear event */
+            XMEMSET(&asyncDev->event, 0, sizeof(WOLF_EVENT));
+
+            /* clear async dev */
+            ssl->async.dev = NULL;
+        }
+    }
+    else {
+        ret = WC_NOT_PENDING_E;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_AsyncPop", ret);
+
+    return ret;
+}
+
+int wolfSSL_AsyncPush(WOLFSSL* ssl, WC_ASYNC_DEV* asyncDev, word32 flags)
+{
+    int ret;
+    WOLF_EVENT* event;
+
+    if (ssl == NULL || asyncDev == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    /* grab event pointer */
+    event = &asyncDev->event;
+
+    /* init event */
+    ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, ssl, flags);
+    if (ret == 0) {
+        ssl->async.dev = asyncDev;
+
+        /* place event into queue */
+        ret = wolfAsync_EventQueuePush(&ssl->ctx->event_queue, event);
+    }
+
+    /* success means return WC_PENDING_E */
+    if (ret == 0) {
+        ret = WC_PENDING_E;
+    }
+
+    WOLFSSL_LEAVE("wolfSSL_AsyncPush", ret);
+
+    return ret;
+}
+
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+
 #undef ERROR_OUT
 
 #endif /* WOLFCRYPT_ONLY */
diff --git a/src/keys.c b/src/keys.c
index c16bd3799..5d9c374aa 100644
--- a/src/keys.c
+++ b/src/keys.c
@@ -1053,7 +1053,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
         return UNSUPPORTED_SUITE;
     }   /* switch */
     }   /* if     */
-    if (ssl->options.cipherSuite0 != ECC_BYTE && 
+    if (ssl->options.cipherSuite0 != ECC_BYTE &&
             ssl->options.cipherSuite0 != CHACHA_BYTE) {   /* normal suites */
     switch (ssl->options.cipherSuite) {
 
@@ -1653,7 +1653,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
 
         break;
 #endif
-            
+
 #ifdef BUILD_TLS_RSA_WITH_HC_128_SHA
         case TLS_RSA_WITH_HC_128_SHA :
             ssl->specs.bulk_cipher_algorithm = wolfssl_hc128;
@@ -1667,7 +1667,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
             ssl->specs.key_size              = HC_128_KEY_SIZE;
             ssl->specs.block_size            = 0;
             ssl->specs.iv_size               = HC_128_IV_SIZE;
-            
+
             break;
 #endif
 
@@ -1684,7 +1684,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
             ssl->specs.key_size              = HC_128_KEY_SIZE;
             ssl->specs.block_size            = 0;
             ssl->specs.iv_size               = HC_128_IV_SIZE;
-            
+
             break;
 #endif
 
@@ -1701,7 +1701,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
             ssl->specs.key_size              = AES_128_KEY_SIZE;
             ssl->specs.iv_size               = AES_IV_SIZE;
             ssl->specs.block_size            = AES_BLOCK_SIZE;
-            
+
             break;
 #endif
 
@@ -1718,7 +1718,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
             ssl->specs.key_size              = AES_256_KEY_SIZE;
             ssl->specs.iv_size               = AES_IV_SIZE;
             ssl->specs.block_size            = AES_BLOCK_SIZE;
-            
+
             break;
 #endif
 
@@ -1827,7 +1827,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
 
         break;
 #endif
-    
+
 #ifdef BUILD_TLS_RSA_WITH_CAMELLIA_256_CBC_SHA
     case TLS_RSA_WITH_CAMELLIA_256_CBC_SHA :
         ssl->specs.bulk_cipher_algorithm = wolfssl_camellia;
@@ -1978,7 +1978,7 @@ int SetCipherSpecs(WOLFSSL* ssl)
             ssl->specs.key_size              = IDEA_KEY_SIZE;
             ssl->specs.block_size            = IDEA_BLOCK_SIZE;
             ssl->specs.iv_size               = IDEA_IV_SIZE;
-            
+
             break;
 #endif
 
@@ -2049,7 +2049,7 @@ static int SetPrefix(byte* sha_input, int idx)
         break;
     default:
         WOLFSSL_MSG("Set Prefix error, bad input");
-        return 0; 
+        return 0;
     }
     return 1;
 }
@@ -2070,22 +2070,20 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
             dec->arc4 = (Arc4*)XMALLOC(sizeof(Arc4), heap, DYNAMIC_TYPE_CIPHER);
         if (dec && dec->arc4 == NULL)
             return MEMORY_E;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (devId != INVALID_DEVID) {
-            if (enc) {
-                if (wc_Arc4AsyncInit(enc->arc4, devId) != 0) {
-                    WOLFSSL_MSG("Arc4AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
-            }
-            if (dec) {
-                if (wc_Arc4AsyncInit(dec->arc4, devId) != 0) {
-                    WOLFSSL_MSG("Arc4AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
+
+        if (enc) {
+            if (wc_Arc4Init(enc->arc4, heap, devId) != 0) {
+                WOLFSSL_MSG("Arc4Init failed in SetKeys");
+                return ASYNC_INIT_E;
             }
         }
-#endif
+        if (dec) {
+            if (wc_Arc4Init(dec->arc4, heap, devId) != 0) {
+                WOLFSSL_MSG("Arc4Init failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+
         if (side == WOLFSSL_CLIENT_END) {
             if (enc)
                 wc_Arc4SetKey(enc->arc4, keys->client_write_key, sz);
@@ -2103,9 +2101,9 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_ARC4 */
+
 
-    
 #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
     /* Check that the max implicit iv size is suffecient */
     #if (AEAD_MAX_IMP_SZ < 12) /* CHACHA20_IMP_IV_SZ */
@@ -2165,7 +2163,8 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_CHACHA && HAVE_POLY1305 */
+
 
 #ifdef HAVE_HC128
     /* check that buffer sizes are sufficient */
@@ -2214,8 +2213,8 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
-    
+#endif /* HAVE_HC128 */
+
 #ifdef BUILD_RABBIT
     /* check that buffer sizes are sufficient */
     #if (MAX_WRITE_IV_SZ < 8) /* RABBIT_IV_SIZE */
@@ -2263,8 +2262,8 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
-    
+#endif /* BUILD_RABBIT */
+
 #ifdef BUILD_DES3
     /* check that buffer sizes are sufficient */
     #if (MAX_WRITE_IV_SZ < 8) /* DES_IV_SIZE */
@@ -2274,30 +2273,34 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_triple_des) {
         int desRet = 0;
 
-        if (enc && enc->des3 == NULL)
-            enc->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->des3 == NULL)
-            return MEMORY_E;
-        if (dec && dec->des3 == NULL)
-            dec->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->des3 == NULL)
-            return MEMORY_E;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (devId != INVALID_DEVID) {
-            if (enc) {
-                if (wc_Des3AsyncInit(enc->des3, devId) != 0) {
-                    WOLFSSL_MSG("Des3AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
-            }
-            if (dec) {
-                if (wc_Des3AsyncInit(dec->des3, devId) != 0) {
-                    WOLFSSL_MSG("Des3AsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
+        if (enc) {
+            if (enc->des3 == NULL)
+                enc->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->des3 == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->des3, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->des3 == NULL)
+                dec->des3 = (Des3*)XMALLOC(sizeof(Des3), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->des3 == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->des3, 0, sizeof(Des3));
+        }
+
+        if (enc) {
+            if (wc_Des3Init(enc->des3, heap, devId) != 0) {
+                WOLFSSL_MSG("Des3Init failed in SetKeys");
+                return ASYNC_INIT_E;
             }
         }
-#endif
+        if (dec) {
+            if (wc_Des3Init(dec->des3, heap, devId) != 0) {
+                WOLFSSL_MSG("Des3Init failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
                 desRet = wc_Des3_SetKey(enc->des3, keys->client_write_key,
@@ -2327,7 +2330,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_DES3 */
 
 #ifdef BUILD_AES
     /* check that buffer sizes are sufficient */
@@ -2338,30 +2341,33 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_aes) {
         int aesRet = 0;
 
-        if (enc && enc->aes == NULL)
-            enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->aes == NULL)
-            return MEMORY_E;
-        if (dec && dec->aes == NULL)
-            dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->aes == NULL)
-            return MEMORY_E;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (devId != INVALID_DEVID) {
-            if (enc) {
-                if (wc_AesAsyncInit(enc->aes, devId) != 0) {
-                    WOLFSSL_MSG("AesAsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
-            }
-            if (dec) {
-                if (wc_AesAsyncInit(dec->aes, devId) != 0) {
-                    WOLFSSL_MSG("AesAsyncInit failed in SetKeys");
-                    return ASYNC_INIT_E;
-                }
+        if (enc) {
+            if (enc->aes == NULL)
+                enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->aes, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->aes == NULL)
+                dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->aes, 0, sizeof(Aes));
+        }
+        if (enc) {
+            if (wc_AesInit(enc->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
             }
         }
-#endif
+        if (dec) {
+            if (wc_AesInit(dec->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
                 aesRet = wc_AesSetKey(enc->aes, keys->client_write_key,
@@ -2395,7 +2401,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_AES */
 
 #ifdef BUILD_AESGCM
     /* check that buffer sizes are sufficient */
@@ -2412,14 +2418,33 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_aes_gcm) {
         int gcmRet;
 
-        if (enc && enc->aes == NULL)
-            enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->aes == NULL)
-            return MEMORY_E;
-        if (dec && dec->aes == NULL)
-            dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->aes == NULL)
-            return MEMORY_E;
+        if (enc) {
+            if (enc->aes == NULL)
+                enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->aes, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->aes == NULL)
+                dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->aes, 0, sizeof(Aes));
+        }
+
+        if (enc) {
+            if (wc_AesInit(enc->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+        if (dec) {
+            if (wc_AesInit(dec->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
 
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
@@ -2458,7 +2483,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* BUILD_AESGCM */
 
 #ifdef HAVE_AESCCM
     /* check that buffer sizes are sufficient (CCM is same size as GCM) */
@@ -2475,14 +2500,33 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
     if (specs->bulk_cipher_algorithm == wolfssl_aes_ccm) {
         int CcmRet;
 
-        if (enc && enc->aes == NULL)
-            enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (enc && enc->aes == NULL)
-            return MEMORY_E;
-        if (dec && dec->aes == NULL)
-            dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
-        if (dec && dec->aes == NULL)
-            return MEMORY_E;
+        if (enc) {
+            if (enc->aes == NULL)
+                enc->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (enc->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(enc->aes, 0, sizeof(Aes));
+        }
+        if (dec) {
+            if (dec->aes == NULL)
+                dec->aes = (Aes*)XMALLOC(sizeof(Aes), heap, DYNAMIC_TYPE_CIPHER);
+            if (dec->aes == NULL)
+                return MEMORY_E;
+            XMEMSET(dec->aes, 0, sizeof(Aes));
+        }
+
+        if (enc) {
+            if (wc_AesInit(enc->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
+        if (dec) {
+            if (wc_AesInit(dec->aes, heap, devId) != 0) {
+                WOLFSSL_MSG("AesInit failed in SetKeys");
+                return ASYNC_INIT_E;
+            }
+        }
 
         if (side == WOLFSSL_CLIENT_END) {
             if (enc) {
@@ -2529,7 +2573,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_AESCCM */
 
 #ifdef HAVE_CAMELLIA
     /* check that buffer sizes are sufficient */
@@ -2581,7 +2625,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_CAMELLIA */
 
 #ifdef HAVE_IDEA
     /* check that buffer sizes are sufficient */
@@ -2635,7 +2679,7 @@ static int SetKeys(Ciphers* enc, Ciphers* dec, Keys* keys, CipherSpecs* specs,
         if (dec)
             dec->setup = 1;
     }
-#endif
+#endif /* HAVE_IDEA */
 
 #ifdef HAVE_NULL_CIPHER
     if (specs->bulk_cipher_algorithm == wolfssl_cipher_null) {
@@ -2686,6 +2730,7 @@ static int SetAuthKeys(OneTimeAuth* authentication, Keys* keys,
         (void)keys;
         (void)specs;
         (void)devId;
+        (void)authentication;
 
         return 0;
 }
@@ -2892,12 +2937,12 @@ int StoreKeys(WOLFSSL* ssl, const byte* keyData)
 #ifndef NO_OLD_TLS
 int DeriveKeys(WOLFSSL* ssl)
 {
-    int    length = 2 * ssl->specs.hash_size + 
+    int    length = 2 * ssl->specs.hash_size +
                     2 * ssl->specs.key_size  +
                     2 * ssl->specs.iv_size;
     int    rounds = (length + MD5_DIGEST_SIZE - 1 ) / MD5_DIGEST_SIZE, i;
     int    ret = 0;
-    
+
 #ifdef WOLFSSL_SMALL_STACK
     byte*  shaOutput;
     byte*  md5Input;
@@ -2913,9 +2958,9 @@ int DeriveKeys(WOLFSSL* ssl)
     Md5    md5[1];
     Sha    sha[1];
 #endif
-    
+
 #ifdef WOLFSSL_SMALL_STACK
-    shaOutput = (byte*)XMALLOC(SHA_DIGEST_SIZE, 
+    shaOutput = (byte*)XMALLOC(SHA_DIGEST_SIZE,
                                             NULL, DYNAMIC_TYPE_TMP_BUFFER);
     md5Input  = (byte*)XMALLOC(SECRET_LEN + SHA_DIGEST_SIZE,
                                             NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -2925,7 +2970,7 @@ int DeriveKeys(WOLFSSL* ssl)
                                             NULL, DYNAMIC_TYPE_TMP_BUFFER);
     md5       =  (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
     sha       =  (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    
+
     if (shaOutput == NULL || md5Input == NULL || shaInput == NULL ||
         keyData   == NULL || md5      == NULL || sha      == NULL) {
         if (shaOutput) XFREE(shaOutput, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -2934,7 +2979,7 @@ int DeriveKeys(WOLFSSL* ssl)
         if (keyData)   XFREE(keyData,   NULL, DYNAMIC_TYPE_TMP_BUFFER);
         if (md5)       XFREE(md5,       NULL, DYNAMIC_TYPE_TMP_BUFFER);
         if (sha)       XFREE(sha,       NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        
+
         return MEMORY_E;
     }
 #endif
@@ -3045,7 +3090,7 @@ static int MakeSslMasterSecret(WOLFSSL* ssl)
                                             NULL, DYNAMIC_TYPE_TMP_BUFFER);
     md5       =  (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
     sha       =  (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    
+
     if (shaOutput == NULL || md5Input == NULL || shaInput == NULL ||
                              md5      == NULL || sha      == NULL) {
         if (shaOutput) XFREE(shaOutput, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -3053,15 +3098,15 @@ static int MakeSslMasterSecret(WOLFSSL* ssl)
         if (shaInput)  XFREE(shaInput,  NULL, DYNAMIC_TYPE_TMP_BUFFER);
         if (md5)       XFREE(md5,       NULL, DYNAMIC_TYPE_TMP_BUFFER);
         if (sha)       XFREE(sha,       NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        
+
         return MEMORY_E;
     }
 #endif
 
     wc_InitMd5(md5);
-    
+
     ret = wc_InitSha(sha);
-    
+
     if (ret == 0) {
         XMEMCPY(md5Input, ssl->arrays->preMasterSecret, pmsSz);
 
diff --git a/src/ssl.c b/src/ssl.c
old mode 100644
new mode 100755
index e13c6604c..751cef13d
--- a/src/ssl.c
+++ b/src/ssl.c
@@ -333,7 +333,7 @@ int wolfSSL_CTX_new_rng(WOLFSSL_CTX* ctx)
     }
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(rng, ctx->heap);
+    ret = wc_InitRng_ex(rng, ctx->heap, ctx->devId);
 #else
     ret = wc_InitRng(rng);
 #endif
@@ -852,38 +852,38 @@ int wolfSSL_GetObjectSize(void)
     printf("sizeof suites           = %lu\n", sizeof(Suites));
     printf("sizeof ciphers(2)       = %lu\n", sizeof(Ciphers));
 #ifndef NO_RC4
-    printf("    sizeof arc4         = %lu\n", sizeof(Arc4));
+    printf("\tsizeof arc4         = %lu\n", sizeof(Arc4));
 #endif
-    printf("    sizeof aes          = %lu\n", sizeof(Aes));
+    printf("\tsizeof aes          = %lu\n", sizeof(Aes));
 #ifndef NO_DES3
-    printf("    sizeof des3         = %lu\n", sizeof(Des3));
+    printf("\tsizeof des3         = %lu\n", sizeof(Des3));
 #endif
 #ifndef NO_RABBIT
-    printf("    sizeof rabbit       = %lu\n", sizeof(Rabbit));
+    printf("\tsizeof rabbit       = %lu\n", sizeof(Rabbit));
 #endif
 #ifdef HAVE_CHACHA
-    printf("    sizeof chacha       = %lu\n", sizeof(ChaCha));
+    printf("\tsizeof chacha       = %lu\n", sizeof(ChaCha));
 #endif
     printf("sizeof cipher specs     = %lu\n", sizeof(CipherSpecs));
     printf("sizeof keys             = %lu\n", sizeof(Keys));
     printf("sizeof Hashes(2)        = %lu\n", sizeof(Hashes));
 #ifndef NO_MD5
-    printf("    sizeof MD5          = %lu\n", sizeof(Md5));
+    printf("\tsizeof MD5          = %lu\n", sizeof(Md5));
 #endif
 #ifndef NO_SHA
-    printf("    sizeof SHA          = %lu\n", sizeof(Sha));
+    printf("\tsizeof SHA          = %lu\n", sizeof(Sha));
 #endif
 #ifdef WOLFSSL_SHA224
     printf("    sizeof SHA224       = %lu\n", sizeof(Sha224));
 #endif
 #ifndef NO_SHA256
-    printf("    sizeof SHA256       = %lu\n", sizeof(Sha256));
+    printf("\tsizeof SHA256       = %lu\n", sizeof(Sha256));
 #endif
 #ifdef WOLFSSL_SHA384
-    printf("    sizeof SHA384       = %lu\n", sizeof(Sha384));
+    printf("\tsizeof SHA384       = %lu\n", sizeof(Sha384));
 #endif
 #ifdef WOLFSSL_SHA384
-    printf("    sizeof SHA512       = %lu\n", sizeof(Sha512));
+    printf("\tsizeof SHA512       = %lu\n", sizeof(Sha512));
 #endif
     printf("sizeof Buffers          = %lu\n", sizeof(Buffers));
     printf("sizeof Options          = %lu\n", sizeof(Options));
@@ -1069,7 +1069,7 @@ int wolfSSL_GetOutputSize(WOLFSSL* ssl, int inSz)
     if (inSz > maxSize)
         return INPUT_SIZE_E;
 
-    return BuildMessage(ssl, NULL, 0, NULL, inSz, application_data, 0, 1);
+    return BuildMessage(ssl, NULL, 0, NULL, inSz, application_data, 0, 1, 0);
 }
 
 
@@ -1144,24 +1144,24 @@ int wolfSSL_SetTmpDH(WOLFSSL* ssl, const unsigned char* p, int pSz,
         return SIDE_ERROR;
 
     if (ssl->buffers.serverDH_P.buffer && ssl->buffers.weOwnDH) {
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_P.buffer = NULL;
     }
     if (ssl->buffers.serverDH_G.buffer && ssl->buffers.weOwnDH) {
-        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_G.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_G.buffer = NULL;
     }
 
     ssl->buffers.weOwnDH = 1;  /* SSL owns now */
     ssl->buffers.serverDH_P.buffer = (byte*)XMALLOC(pSz, ssl->heap,
-                                                    DYNAMIC_TYPE_DH);
+                                                    DYNAMIC_TYPE_DH_BUFFER);
     if (ssl->buffers.serverDH_P.buffer == NULL)
         return MEMORY_E;
 
     ssl->buffers.serverDH_G.buffer = (byte*)XMALLOC(gSz, ssl->heap,
-                                                    DYNAMIC_TYPE_DH);
+                                                    DYNAMIC_TYPE_DH_BUFFER);
     if (ssl->buffers.serverDH_G.buffer == NULL) {
-        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(ssl->buffers.serverDH_P.buffer, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         ssl->buffers.serverDH_P.buffer = NULL;
         return MEMORY_E;
     }
@@ -1198,16 +1198,16 @@ int wolfSSL_CTX_SetTmpDH(WOLFSSL_CTX* ctx, const unsigned char* p, int pSz,
     if (pSz < ctx->minDhKeySz)
         return DH_KEY_SIZE_E;
 
-    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH);
-    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH);
+    XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(ctx->serverDH_G.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
 
-    ctx->serverDH_P.buffer = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH);
+    ctx->serverDH_P.buffer = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (ctx->serverDH_P.buffer == NULL)
        return MEMORY_E;
 
-    ctx->serverDH_G.buffer = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH);
+    ctx->serverDH_G.buffer = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (ctx->serverDH_G.buffer == NULL) {
-        XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH);
+        XFREE(ctx->serverDH_P.buffer, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
         return MEMORY_E;
     }
 
@@ -1534,7 +1534,7 @@ int wolfSSL_UseOCSPStapling(WOLFSSL* ssl, byte status_type, byte options)
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequest(&ssl->extensions, status_type,
-                                                            options, ssl->heap);
+                                                options, ssl->heap, ssl->devId);
 }
 
 
@@ -1545,7 +1545,7 @@ int wolfSSL_CTX_UseOCSPStapling(WOLFSSL_CTX* ctx, byte status_type,
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequest(&ctx->extensions, status_type,
-                                                            options, ctx->heap);
+                                                options, ctx->heap, ctx->devId);
 }
 
 #endif /* HAVE_CERTIFICATE_STATUS_REQUEST */
@@ -1558,7 +1558,7 @@ int wolfSSL_UseOCSPStaplingV2(WOLFSSL* ssl, byte status_type, byte options)
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequestV2(&ssl->extensions, status_type,
-                                                            options, ssl->heap);
+                                                options, ssl->heap, ssl->devId);
 }
 
 
@@ -1569,7 +1569,7 @@ int wolfSSL_CTX_UseOCSPStaplingV2(WOLFSSL_CTX* ctx,
         return BAD_FUNC_ARG;
 
     return TLSX_UseCertificateStatusRequestV2(&ctx->extensions, status_type,
-                                                            options, ctx->heap);
+                                                options, ctx->heap, ctx->devId);
 }
 
 #endif /* HAVE_CERTIFICATE_STATUS_REQUEST_V2 */
@@ -1871,26 +1871,28 @@ int wolfSSL_Rehandshake(WOLFSSL* ssl)
 
 #ifndef NO_OLD_TLS
 #ifndef NO_MD5
-    wc_InitMd5(&ssl->hsHashes->hashMd5);
+    ret = wc_InitMd5_ex(&ssl->hsHashes->hashMd5, ssl->heap, ssl->devId);
+    if (ret !=0)
+        return ret;
 #endif
 #ifndef NO_SHA
-    ret = wc_InitSha(&ssl->hsHashes->hashSha);
+    ret = wc_InitSha_ex(&ssl->hsHashes->hashSha, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
 #endif /* NO_OLD_TLS */
 #ifndef NO_SHA256
-    ret = wc_InitSha256(&ssl->hsHashes->hashSha256);
+    ret = wc_InitSha256_ex(&ssl->hsHashes->hashSha256, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
 #ifdef WOLFSSL_SHA384
-    ret = wc_InitSha384(&ssl->hsHashes->hashSha384);
+    ret = wc_InitSha384_ex(&ssl->hsHashes->hashSha384, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
 #ifdef WOLFSSL_SHA512
-    ret = wc_InitSha512(&ssl->hsHashes->hashSha512);
+    ret = wc_InitSha512_ex(&ssl->hsHashes->hashSha512, ssl->heap, ssl->devId);
     if (ret !=0)
         return ret;
 #endif
@@ -2537,6 +2539,7 @@ int AllocDer(DerBuffer** pDer, word32 length, int type, void* heap)
         if (*pDer == NULL) {
             return MEMORY_ERROR;
         }
+        XMEMSET(*pDer, 0, sizeof(DerBuffer) + length);
 
         der = *pDer;
         der->type = type;
@@ -3207,20 +3210,29 @@ int AlreadySigner(WOLFSSL_CERT_MANAGER* cm, byte* hash)
 {
     Signer* signers;
     int     ret = 0;
-    word32  row = HashSigner(hash);
+    word32  row;
 
-    if (wc_LockMutex(&cm->caLock) != 0)
-        return  ret;
+    if (cm == NULL || hash == NULL) {
+        return ret;
+    }
+
+    row = HashSigner(hash);
+
+    if (wc_LockMutex(&cm->caLock) != 0) {
+        return ret;
+    }
     signers = cm->caTable[row];
     while (signers) {
         byte* subjectHash;
-        #ifndef NO_SKID
-            subjectHash = signers->subjectKeyIdHash;
-        #else
-            subjectHash = signers->subjectNameHash;
-        #endif
+
+    #ifndef NO_SKID
+        subjectHash = signers->subjectKeyIdHash;
+    #else
+        subjectHash = signers->subjectNameHash;
+    #endif
+
         if (XMEMCMP(hash, subjectHash, SIGNER_DIGEST_SIZE) == 0) {
-            ret = 1;
+            ret = 1; /* success */
             break;
         }
         signers = signers->next;
@@ -3425,7 +3437,7 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
         XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         return ret;
     }
-    WOLFSSL_MSG("    Parsed new trusted peer cert");
+    WOLFSSL_MSG("\tParsed new trusted peer cert");
 
     peerCert = (TrustedPeerCert*)XMALLOC(sizeof(TrustedPeerCert), cm->heap,
                                                              DYNAMIC_TYPE_CERT);
@@ -3455,7 +3467,7 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
     #endif
 
     if (AlreadyTrustedPeer(cm, subjectHash)) {
-        WOLFSSL_MSG("    Already have this CA, not adding again");
+        WOLFSSL_MSG("\tAlready have this CA, not adding again");
         (void)ret;
     }
     else {
@@ -3510,7 +3522,7 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
                 wc_UnLockMutex(&cm->tpLock);
             }
             else {
-                WOLFSSL_MSG("    Trusted Peer Cert Mutex Lock failed");
+                WOLFSSL_MSG("\tTrusted Peer Cert Mutex Lock failed");
                 FreeDecodedCert(cert);
                 XFREE(cert, cm->heap, DYNAMIC_TYPE_TMP_BUFFER);
                 FreeTrustedPeer(peerCert, cm->heap);
@@ -3518,12 +3530,12 @@ int AddTrustedPeer(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int verify)
             }
         }
 
-    WOLFSSL_MSG("    Freeing parsed trusted peer cert");
+    WOLFSSL_MSG("\tFreeing parsed trusted peer cert");
     FreeDecodedCert(cert);
     XFREE(cert, cm->heap, DYNAMIC_TYPE_TMP_BUFFER);
-    WOLFSSL_MSG("    Freeing der trusted peer cert");
+    WOLFSSL_MSG("\tFreeing der trusted peer cert");
     FreeDer(&der);
-    WOLFSSL_MSG("        OK Freeing der trusted peer cert");
+    WOLFSSL_MSG("\t\tOK Freeing der trusted peer cert");
     WOLFSSL_LEAVE("AddTrustedPeer", ret);
 
     return SSL_SUCCESS;
@@ -3558,7 +3570,7 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
 
     InitDecodedCert(cert, der->buffer, der->length, cm->heap);
     ret = ParseCert(cert, CA_TYPE, verify, cm);
-    WOLFSSL_MSG("    Parsed new CA");
+    WOLFSSL_MSG("\tParsed new CA");
 
 #ifndef NO_SKID
     subjectHash = cert->extSubjKeyId;
@@ -3574,7 +3586,7 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
                 if (cm->minRsaKeySz < 0 ||
                                    cert->pubKeySize < (word16)cm->minRsaKeySz) {
                     ret = RSA_KEY_SIZE_E;
-                    WOLFSSL_MSG("    CA RSA key size error");
+                    WOLFSSL_MSG("\tCA RSA key size error");
                 }
                 break;
             #endif /* !NO_RSA */
@@ -3583,19 +3595,19 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
                 if (cm->minEccKeySz < 0 ||
                                    cert->pubKeySize < (word16)cm->minEccKeySz) {
                     ret = ECC_KEY_SIZE_E;
-                    WOLFSSL_MSG("    CA ECC key size error");
+                    WOLFSSL_MSG("\tCA ECC key size error");
                 }
                 break;
             #endif /* HAVE_ECC */
 
             default:
-                WOLFSSL_MSG("    No key size check done on CA");
+                WOLFSSL_MSG("\tNo key size check done on CA");
                 break; /* no size check if key type is not in switch */
         }
     }
 
     if (ret == 0 && cert->isCA == 0 && type != WOLFSSL_USER_CA) {
-        WOLFSSL_MSG("    Can't add as CA if not actually one");
+        WOLFSSL_MSG("\tCan't add as CA if not actually one");
         ret = NOT_CA_ERROR;
     }
 #ifndef ALLOW_INVALID_CERTSIGN
@@ -3603,12 +3615,12 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
              (cert->extKeyUsage & KEYUSE_KEY_CERT_SIGN) == 0) {
         /* Intermediate CA certs are required to have the keyCertSign
         * extension set. User loaded root certs are not. */
-        WOLFSSL_MSG("    Doesn't have key usage certificate signing");
+        WOLFSSL_MSG("\tDoesn't have key usage certificate signing");
         ret = NOT_CA_ERROR;
     }
 #endif
     else if (ret == 0 && AlreadySigner(cm, subjectHash)) {
-        WOLFSSL_MSG("    Already have this CA, not adding again");
+        WOLFSSL_MSG("\tAlready have this CA, not adding again");
         (void)ret;
     }
     else if (ret == 0) {
@@ -3662,21 +3674,21 @@ int AddCA(WOLFSSL_CERT_MANAGER* cm, DerBuffer** pDer, int type, int verify)
                     cm->caCacheCallback(der->buffer, (int)der->length, type);
             }
             else {
-                WOLFSSL_MSG("    CA Mutex Lock failed");
+                WOLFSSL_MSG("\tCA Mutex Lock failed");
                 ret = BAD_MUTEX_E;
                 FreeSigner(signer, cm->heap);
             }
         }
     }
 
-    WOLFSSL_MSG("    Freeing Parsed CA");
+    WOLFSSL_MSG("\tFreeing Parsed CA");
     FreeDecodedCert(cert);
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(cert, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
-    WOLFSSL_MSG("    Freeing der CA");
+    WOLFSSL_MSG("\tFreeing der CA");
     FreeDer(pDer);
-    WOLFSSL_MSG("        OK Freeing der CA");
+    WOLFSSL_MSG("\t\tOK Freeing der CA");
 
     WOLFSSL_LEAVE("AddCA", ret);
 
@@ -4318,6 +4330,7 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
     int           rsaKey = 0;
     int           resetSuites = 0;
     void*         heap = ctx ? ctx->heap : ((ssl) ? ssl->heap : NULL);
+    int           devId = ctx ? ctx->devId : ((ssl) ? ssl->devId : INVALID_DEVID);
 #ifdef WOLFSSL_SMALL_STACK
     EncryptedInfo* info = NULL;
 #else
@@ -4344,6 +4357,7 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
         return MEMORY_E;
 #endif
 
+    XMEMSET(info, 0, sizeof(EncryptedInfo));
     info->set      = 0;
     info->ctx      = ctx;
     info->consumed = 0;
@@ -4528,7 +4542,7 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                 return MEMORY_E;
         #endif
 
-            ret = wc_InitRsaKey(key, 0);
+            ret = wc_InitRsaKey_ex(key, heap, devId);
             if (ret == 0) {
                 if (wc_RsaPrivateKeyDecode(der->buffer, &idx, key, der->length)
                     != 0) {
@@ -4562,9 +4576,9 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
                         resetSuites = 1;
                     }
                 }
-            }
 
-            wc_FreeRsaKey(key);
+                wc_FreeRsaKey(key);
+            }
 
         #ifdef WOLFSSL_SMALL_STACK
             XFREE(key, heap, DYNAMIC_TYPE_TMP_BUFFER);
@@ -4580,7 +4594,11 @@ int ProcessBuffer(WOLFSSL_CTX* ctx, const unsigned char* buff,
             word32  idx = 0;
             ecc_key key;
 
-            wc_ecc_init(&key);
+            ret = wc_ecc_init_ex(&key, heap, devId);
+            if (ret != 0) {
+                return ret;
+            }
+
             if (wc_EccPrivateKeyDecode(der->buffer, &idx, &key,
                                                         der->length) != 0) {
                 wc_ecc_free(&key);
@@ -8101,31 +8119,38 @@ int wolfSSL_DTLS_SetCookieSecret(WOLFSSL* ssl,
                 if (IsDtlsNotSctpMode(ssl)) {
                     /* re-init hashes, exclude first hello and verify request */
 #ifndef NO_OLD_TLS
-                    wc_InitMd5(&ssl->hsHashes->hashMd5);
-                    if ( (ssl->error = wc_InitSha(&ssl->hsHashes->hashSha))
-                                                                         != 0) {
+                    if ( (ssl->error = wc_InitMd5_ex(&ssl->hsHashes->hashMd5,
+                                                 ssl->heap, ssl->devId)) != 0) {
+                        WOLFSSL_ERROR(ssl->error);
+                        return SSL_FATAL_ERROR;
+                    }
+                    if ( (ssl->error = wc_InitSha_ex(&ssl->hsHashes->hashSha,
+                                                 ssl->heap, ssl->devId)) != 0) {
                         WOLFSSL_ERROR(ssl->error);
                         return SSL_FATAL_ERROR;
                     }
 #endif
                     if (IsAtLeastTLSv1_2(ssl)) {
                         #ifndef NO_SHA256
-                            if ( (ssl->error = wc_InitSha256(
-                                            &ssl->hsHashes->hashSha256)) != 0) {
+                            if ( (ssl->error = wc_InitSha256_ex(
+                                            &ssl->hsHashes->hashSha256,
+                                            ssl->heap, ssl->devId)) != 0) {
                                 WOLFSSL_ERROR(ssl->error);
                                 return SSL_FATAL_ERROR;
                             }
                         #endif
                         #ifdef WOLFSSL_SHA384
-                            if ( (ssl->error = wc_InitSha384(
-                                            &ssl->hsHashes->hashSha384)) != 0) {
+                            if ( (ssl->error = wc_InitSha384_ex(
+                                            &ssl->hsHashes->hashSha384,
+                                            ssl->heap, ssl->devId)) != 0) {
                                 WOLFSSL_ERROR(ssl->error);
                                 return SSL_FATAL_ERROR;
                             }
                         #endif
                         #ifdef WOLFSSL_SHA512
-                            if ( (ssl->error = wc_InitSha512(
-                                            &ssl->hsHashes->hashSha512)) != 0) {
+                            if ( (ssl->error = wc_InitSha512_ex(
+                                            &ssl->hsHashes->hashSha512,
+                                            ssl->heap, ssl->devId)) != 0) {
                                 WOLFSSL_ERROR(ssl->error);
                                 return SSL_FATAL_ERROR;
                             }
@@ -8633,15 +8658,6 @@ int wolfSSL_Cleanup(void)
     if (wc_FreeMutex(&count_mutex) != 0)
         ret = BAD_MUTEX_E;
 
-#ifdef HAVE_ECC
-    #ifdef FP_ECC
-        wc_ecc_fp_free();
-    #endif
-    #ifdef ECC_CACHE_CURVE
-        wc_ecc_curve_cache_free();
-    #endif
-#endif
-
     if (wolfCrypt_Cleanup() != 0) {
         WOLFSSL_MSG("Error with wolfCrypt_Cleanup call");
         ret = WC_CLEANUP_E;
@@ -10955,10 +10971,21 @@ int wolfSSL_set_compression(WOLFSSL* ssl)
         (void)type;
 
         WOLFSSL_ENTER("wolfSSL_EVP_BytesToKey");
-        wc_InitMd5(md5);
+
+        if (wc_InitMd5(md5) != 0) {
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            return 0;
+        }
 
         /* only support MD5 for now */
-        if (XSTRNCMP(md, "MD5", 3) != 0) return 0;
+        if (XSTRNCMP(md, "MD5", 3) != 0) {
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
+            return 0;
+        }
 
         /* only support CBC DES and AES for now */
         #ifndef NO_DES3
@@ -11137,11 +11164,13 @@ size_t wolfSSL_get_client_random(const WOLFSSL* ssl, unsigned char* out,
 #ifndef NO_MD5
     void wolfSSL_MD5_Init(WOLFSSL_MD5_CTX* md5)
     {
+        int ret;
         typedef char md5_test[sizeof(MD5_CTX) >= sizeof(Md5) ? 1 : -1];
         (void)sizeof(md5_test);
 
         WOLFSSL_ENTER("MD5_Init");
-        wc_InitMd5((Md5*)md5);
+        ret = wc_InitMd5((Md5*)md5);
+        (void)ret;
     }
 
 
@@ -11492,8 +11521,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
     void wolfSSL_EVP_MD_CTX_init(WOLFSSL_EVP_MD_CTX* ctx)
     {
         WOLFSSL_ENTER("EVP_CIPHER_MD_CTX_init");
-        (void)ctx;
-        /* do nothing */
+        XMEMSET(ctx, 0, sizeof(WOLFSSL_EVP_MD_CTX));
     }
 
     const WOLFSSL_EVP_MD *wolfSSL_EVP_MD_CTX_md(const WOLFSSL_EVP_MD_CTX *ctx)
@@ -12270,6 +12298,14 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
             return BAD_FUNC_ARG;
         }
 
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        /* compile-time validation of ASYNC_CTX_SIZE */
+        typedef char async_test[WC_ASYNC_DEV_SIZE >= sizeof(WC_ASYNC_DEV) ?
+                                                                        1 : -1];
+        (void)sizeof(async_test);
+    #endif
+
         if (XSTRNCMP(type, "SHA256", 6) == 0) {
              ctx->macType = SHA256;
              wolfSSL_SHA256_Init(&(ctx->hash.sha256));
@@ -12451,6 +12487,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md)
             return NULL;
     #endif
 
+        XMEMSET(hmac, 0, sizeof(Hmac));
         if (wc_HmacSetKey(hmac, type, (const byte*)key, key_len) == 0)
             if (wc_HmacUpdate(hmac, d, n) == 0)
                 if (wc_HmacFinal(hmac, md) == 0) {
@@ -15573,13 +15610,13 @@ long wolfSSL_set_tmp_dh(WOLFSSL *ssl, WOLFSSL_DH *dh)
     if (pSz <= 0 || gSz <= 0)
         return SSL_FATAL_ERROR;
 
-    p = (byte*)XMALLOC(pSz, ssl->heap, DYNAMIC_TYPE_DH);
+    p = (byte*)XMALLOC(pSz, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (!p)
         return MEMORY_E;
 
-    g = (byte*)XMALLOC(gSz, ssl->heap, DYNAMIC_TYPE_DH);
+    g = (byte*)XMALLOC(gSz, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
     if (!g) {
-        XFREE(p, ssl->heap, DYNAMIC_TYPE_DH);
+        XFREE(p, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
         return MEMORY_E;
     }
 
@@ -15589,8 +15626,8 @@ long wolfSSL_set_tmp_dh(WOLFSSL *ssl, WOLFSSL_DH *dh)
     if (pSz >= 0 && gSz >= 0) /* Conversion successful */
         ret = wolfSSL_SetTmpDH(ssl, p, pSz, g, gSz);
 
-    XFREE(p, ssl->heap, DYNAMIC_TYPE_DH);
-    XFREE(g, ssl->heap, DYNAMIC_TYPE_DH);
+    XFREE(p, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(g, ssl->heap, DYNAMIC_TYPE_DH_BUFFER);
 
     return pSz > 0 && gSz > 0 ? ret : SSL_FATAL_ERROR;
 }
@@ -16683,7 +16720,7 @@ void wolfSSL_BN_free(WOLFSSL_BIGNUM* bn)
     WOLFSSL_MSG("wolfSSL_BN_free");
     if (bn) {
         if (bn->internal) {
-            mp_clear((mp_int*)bn->internal);
+            mp_forcezero((mp_int*)bn->internal);
             XFREE(bn->internal, NULL, DYNAMIC_TYPE_BIGINT);
             bn->internal = NULL;
         }
@@ -18874,6 +18911,7 @@ void wolfSSL_HMAC_Init(WOLFSSL_HMAC_CTX* ctx, const void* key, int keylen,
 
     if (key && keylen) {
         WOLFSSL_MSG("keying hmac");
+        XMEMSET(&ctx->hmac, 0, sizeof(Hmac));
         wc_HmacSetKey(&ctx->hmac, ctx->type, (const byte*)key, (word32)keylen);
         /* OpenSSL compat, no error */
     }
@@ -20400,6 +20438,7 @@ int wolfSSL_EC_POINT_mul(const WOLFSSL_EC_GROUP *group, WOLFSSL_EC_POINT *r,
                          const WOLFSSL_BIGNUM *m, WOLFSSL_BN_CTX *ctx)
 {
     mp_int a, prime;
+    int ret;
 
     (void)ctx;
     (void)n;
@@ -20416,43 +20455,41 @@ int wolfSSL_EC_POINT_mul(const WOLFSSL_EC_GROUP *group, WOLFSSL_EC_POINT *r,
         WOLFSSL_MSG("No ECPoint internal set, do it");
 
         if (SetECPointInternal((WOLFSSL_EC_POINT *)q) != SSL_SUCCESS) {
-            WOLFSSL_MSG("SetECPointInternal failed");
+            WOLFSSL_MSG("SetECPointInternal q failed");
             return SSL_FAILURE;
         }
     }
 
     /* read the curve prime and a */
     if (mp_init_multi(&prime, &a, NULL, NULL, NULL, NULL) != MP_OKAY) {
-        WOLFSSL_MSG("wolfSSL_EC_POINT_mul init 'prime/A' failed");
-        return SSL_FAILURE;
-    }
-    if (mp_read_radix(&prime, ecc_sets[group->curve_idx].prime, 16) != MP_OKAY){
-        WOLFSSL_MSG("wolfSSL_EC_POINT_mul read 'prime' curve value failed");
-        return SSL_FAILURE;
-    }
-    if (mp_read_radix(&a, ecc_sets[group->curve_idx].Af, 16) != MP_OKAY){
-        WOLFSSL_MSG("wolfSSL_EC_POINT_mul read 'A' curve value failed");
         return SSL_FAILURE;
     }
 
+    ret = mp_read_radix(&prime, ecc_sets[group->curve_idx].prime, 16);
+    if (ret == MP_OKAY)
+        ret = mp_read_radix(&a, ecc_sets[group->curve_idx].Af, 16);
+
     /* r = q * m % prime */
-    if (wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
-                      (ecc_point*)r->internal, &a, &prime, 1) != MP_OKAY) {
-        WOLFSSL_MSG("ecc_mulmod failure");
-        mp_clear(&prime);
-        return SSL_FAILURE;
-    }
+    if (ret == MP_OKAY)
+        ret = wc_ecc_mulmod((mp_int*)m->internal, (ecc_point*)q->internal,
+                      (ecc_point*)r->internal, &a, &prime, 1);
 
     mp_clear(&a);
     mp_clear(&prime);
 
-    /* set the external value for the computed point */
-    if (SetECPointInternal(r) != SSL_SUCCESS) {
-        WOLFSSL_MSG("SetECPointInternal failed");
-        return SSL_FAILURE;
+    if (ret != MP_OKAY) {
+        ret = SSL_FAILURE;
     }
 
-    return SSL_SUCCESS;
+    /* set the external value for the computed point */
+    if (ret != SSL_FAILURE) {
+        ret = SetECPointInternal(r);
+        if (ret != SSL_SUCCESS) {
+            WOLFSSL_MSG("SetECPointInternal r failed");
+        }
+    }
+
+    return ret;
 }
 
 void wolfSSL_EC_POINT_clear_free(WOLFSSL_EC_POINT *p)
@@ -20666,8 +20703,8 @@ WOLFSSL_ECDSA_SIG *wolfSSL_ECDSA_do_sign(const unsigned char *d, int dlen,
                 }
 
             }
-            mp_clear(&sig_r);
-            mp_clear(&sig_s);
+            mp_free(&sig_r);
+            mp_free(&sig_s);
         }
     }
 
@@ -22479,13 +22516,13 @@ long wolfSSL_CTX_set_tmp_dh(WOLFSSL_CTX* ctx, WOLFSSL_DH* dh)
     if(pSz <= 0 || gSz <= 0)
         return SSL_FATAL_ERROR;
 
-    p = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH);
+    p = (byte*)XMALLOC(pSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if(!p)
         return MEMORY_E;
 
-    g = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH);
+    g = (byte*)XMALLOC(gSz, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
     if(!g) {
-        XFREE(p, ctx->heap, DYNAMIC_TYPE_DH);
+        XFREE(p, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
         return MEMORY_E;
     }
 
@@ -22495,8 +22532,8 @@ long wolfSSL_CTX_set_tmp_dh(WOLFSSL_CTX* ctx, WOLFSSL_DH* dh)
     if(pSz >= 0 && gSz >= 0) /* Conversion successful */
         ret = wolfSSL_CTX_SetTmpDH(ctx, p, pSz, g, gSz);
 
-    XFREE(p, ctx->heap, DYNAMIC_TYPE_DH);
-    XFREE(g, ctx->heap, DYNAMIC_TYPE_DH);
+    XFREE(p, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
+    XFREE(g, ctx->heap, DYNAMIC_TYPE_DH_BUFFER);
 
     return pSz > 0 && gSz > 0 ? ret : SSL_FATAL_ERROR;
 }
@@ -23359,15 +23396,15 @@ int wolfSSL_AsyncPoll(WOLFSSL* ssl, WOLF_EVENT_FLAG flags)
         return BAD_FUNC_ARG;
     }
 
-    /* not filtering on "ssl", since its the asyncDev */
-    ret = wolfAsync_EventQueuePoll(&ssl->ctx->event_queue, NULL,
+    ret = wolfAsync_EventQueuePoll(&ssl->ctx->event_queue, ssl,
         events, sizeof(events)/sizeof(events), flags, &eventCount);
-    if (ret == 0 && eventCount > 0) {
-        ret = 1; /* Success */
+    if (ret == 0) {
+        ret = eventCount;
     }
 
     return ret;
 }
+
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef OPENSSL_EXTRA
diff --git a/src/tls.c b/src/tls.c
index a522f523e..8158091f0 100755
--- a/src/tls.c
+++ b/src/tls.c
@@ -47,6 +47,8 @@
 #ifdef HAVE_QSH
     static int TLSX_AddQSHKey(QSHKey** list, QSHKey* key);
     static byte* TLSX_QSHKeyFind_Pub(QSHKey* qsh, word16* pubLen, word16 name);
+#endif
+#if defined(HAVE_NTRU) || defined(HAVE_QSH)
     static int TLSX_CreateNtruKey(WOLFSSL* ssl, int type);
 #endif
 
@@ -72,6 +74,7 @@
     #define P_HASH_MAX_SIZE SHA256_DIGEST_SIZE
 #endif
 
+
 /* compute p_hash for MD5, SHA-1, SHA-256, or SHA-384 for TLSv1 PRF */
 static int p_hash(byte* result, word32 resLen, const byte* secret,
                    word32 secLen, const byte* seed, word32 seedLen, int hash)
@@ -146,6 +149,7 @@ static int p_hash(byte* result, word32 resLen, const byte* secret,
 
     lastTime = times - 1;
 
+    XMEMSET(hmac, 0, sizeof(Hmac));
     if ((ret = wc_HmacSetKey(hmac, hash, secret, secLen)) == 0) {
         if ((ret = wc_HmacUpdate(hmac, seed, seedLen)) == 0) { /* A0 = seed */
             if ((ret = wc_HmacFinal(hmac, previous)) == 0) {   /* A1 */
@@ -388,21 +392,28 @@ int BuildTlsFinished(WOLFSSL* ssl, Hashes* hashes, const byte* sender)
 {
     int         ret;
     const byte* side;
-    byte        handshake_hash[HSHASH_SZ];
+    byte*       handshake_hash;
     word32      hashSz = HSHASH_SZ;
 
+    handshake_hash = (byte*)XMALLOC(hashSz, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+    if (handshake_hash == NULL)
+        return MEMORY_E;
+
     ret = BuildTlsHandshakeHash(ssl, handshake_hash, &hashSz);
-    if (ret < 0)
-        return ret;
+    if (ret == 0) {
+        if ( XSTRNCMP((const char*)sender, (const char*)client, SIZEOF_SENDER) == 0)
+            side = tls_client;
+        else
+            side = tls_server;
 
-    if ( XSTRNCMP((const char*)sender, (const char*)client, SIZEOF_SENDER) == 0)
-        side = tls_client;
-    else
-        side = tls_server;
+        ret = PRF((byte*)hashes, TLS_FINISHED_SZ, ssl->arrays->masterSecret,
+                   SECRET_LEN, side, FINISHED_LABEL_SZ, handshake_hash, hashSz,
+                   IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm);
+    }
 
-    return PRF((byte*)hashes, TLS_FINISHED_SZ, ssl->arrays->masterSecret,
-               SECRET_LEN, side, FINISHED_LABEL_SZ, handshake_hash, hashSz,
-               IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm);
+    XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+
+    return ret;
 }
 
 
@@ -533,20 +544,27 @@ int MakeTlsMasterSecret(WOLFSSL* ssl)
 {
     int    ret;
 #ifdef HAVE_EXTENDED_MASTER
-    byte   handshake_hash[HSHASH_SZ];
-    word32 hashSz = HSHASH_SZ;
-
     if (ssl->options.haveEMS) {
+        byte*  handshake_hash;
+        word32 hashSz = HSHASH_SZ;
+
+        handshake_hash = (byte*)XMALLOC(HSHASH_SZ, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        if (handshake_hash == NULL)
+            return MEMORY_E;
 
         ret = BuildTlsHandshakeHash(ssl, handshake_hash, &hashSz);
-        if (ret < 0)
+        if (ret < 0) {
+            XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return ret;
+        }
 
         ret = wolfSSL_MakeTlsExtendedMasterSecret(
                 ssl->arrays->masterSecret, SECRET_LEN,
                 ssl->arrays->preMasterSecret, ssl->arrays->preMasterSz,
                 handshake_hash, hashSz,
                 IsAtLeastTLSv1_2(ssl), ssl->specs.mac_algorithm);
+
+        XFREE(handshake_hash, ssl->heap, DYNAMIC_TYPE_TMP_BUFFER);
     } else
 #endif
     ret = wolfSSL_MakeTlsMasterSecret(ssl->arrays->masterSecret, SECRET_LEN,
@@ -790,6 +808,7 @@ int TLS_hmac(WOLFSSL* ssl, byte* digest, const byte* in, word32 sz,
 
     wolfSSL_SetTlsHmacInner(ssl, myInner, sz, content, verify);
 
+    XMEMSET(&hmac, 0, sizeof(Hmac));
     ret = wc_HmacSetKey(&hmac, wolfSSL_GetHmacType(ssl),
                      wolfSSL_GetMacSecret(ssl, verify), ssl->specs.hash_size);
     if (ret != 0)
@@ -2105,7 +2124,8 @@ static int TLSX_CSR_Parse(WOLFSSL* ssl, byte* input, word16 length,
 
             /* enable extension at ssl level */
             ret = TLSX_UseCertificateStatusRequest(&ssl->extensions,
-                                     csr->status_type, csr->options, ssl->heap);
+                                     csr->status_type, csr->options, ssl->heap,
+                                     ssl->devId);
             if (ret != SSL_SUCCESS)
                 return ret;
 
@@ -2181,7 +2201,7 @@ static int TLSX_CSR_Parse(WOLFSSL* ssl, byte* input, word16 length,
 
         /* accept the first good status_type and return */
         ret = TLSX_UseCertificateStatusRequest(&ssl->extensions, status_type,
-                                                                  0, ssl->heap);
+                                                      0, ssl->heap, ssl->devId);
         if (ret != SSL_SUCCESS)
             return ret; /* throw error */
 
@@ -2267,7 +2287,7 @@ int TLSX_CSR_ForceRequest(WOLFSSL* ssl)
 }
 
 int TLSX_UseCertificateStatusRequest(TLSX** extensions, byte status_type,
-                                                       byte options, void* heap)
+                                           byte options, void* heap, int devId)
 {
     CertificateStatusRequest* csr = NULL;
     int ret = 0;
@@ -2290,11 +2310,13 @@ int TLSX_UseCertificateStatusRequest(TLSX** extensions, byte status_type,
             if (options & WOLFSSL_CSR_OCSP_USE_NONCE) {
                 WC_RNG rng;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-                if (wc_InitRng_ex(&rng, heap) == 0) {
-#else
-                if (wc_InitRng(&rng) == 0) {
-#endif
+            #ifndef HAVE_FIPS
+                ret = wc_InitRng_ex(&rng, heap, devId);
+            #else
+                ret = wc_InitRng(&rng);
+                (void)devId;
+            #endif
+                if (ret == 0) {
                     if (wc_RNG_GenerateBlock(&rng, csr->request.ocsp.nonce,
                                                         MAX_OCSP_NONCE_SZ) == 0)
                         csr->request.ocsp.nonceSz = MAX_OCSP_NONCE_SZ;
@@ -2467,7 +2489,7 @@ static int TLSX_CSR2_Parse(WOLFSSL* ssl, byte* input, word16 length,
             /* enable extension at ssl level */
             for (; csr2; csr2 = csr2->next) {
                 ret = TLSX_UseCertificateStatusRequestV2(&ssl->extensions,
-                                   csr2->status_type, csr2->options, ssl->heap);
+                       csr2->status_type, csr2->options, ssl->heap, ssl->devId);
                 if (ret != SSL_SUCCESS)
                     return ret;
 
@@ -2566,7 +2588,7 @@ static int TLSX_CSR2_Parse(WOLFSSL* ssl, byte* input, word16 length,
 
             /* accept the first good status_type and return */
             ret = TLSX_UseCertificateStatusRequestV2(&ssl->extensions,
-                                                     status_type, 0, ssl->heap);
+                                         status_type, 0, ssl->heap, ssl->devId);
             if (ret != SSL_SUCCESS)
                 return ret; /* throw error */
 
@@ -2679,7 +2701,7 @@ int TLSX_CSR2_ForceRequest(WOLFSSL* ssl)
 }
 
 int TLSX_UseCertificateStatusRequestV2(TLSX** extensions, byte status_type,
-                                                       byte options, void* heap)
+                                           byte options, void* heap, int devId)
 {
     TLSX* extension = NULL;
     CertificateStatusRequestItemV2* csr2 = NULL;
@@ -2709,11 +2731,13 @@ int TLSX_UseCertificateStatusRequestV2(TLSX** extensions, byte status_type,
             if (options & WOLFSSL_CSR2_OCSP_USE_NONCE) {
                 WC_RNG rng;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-                if (wc_InitRng_ex(&rng, heap) == 0) {
-#else
-                if (wc_InitRng(&rng) == 0) {
-#endif
+            #ifndef HAVE_FIPS
+                ret = wc_InitRng_ex(&rng, heap, devId);
+            #else
+                ret = wc_InitRng(&rng);
+                (void)devId;
+            #endif
+                if (ret == 0) {
                     if (wc_RNG_GenerateBlock(&rng, csr2->request.ocsp[0].nonce,
                                                         MAX_OCSP_NONCE_SZ) == 0)
                         csr2->request.ocsp[0].nonceSz = MAX_OCSP_NONCE_SZ;
@@ -3569,10 +3593,12 @@ int TLSX_UseSessionTicket(TLSX** extensions, SessionTicket* ticket, void* heap)
 /* Quantum-Safe-Hybrid                                                        */
 /******************************************************************************/
 
-#ifdef HAVE_QSH
+#if defined(HAVE_NTRU) && defined(HAVE_QSH)
 static WC_RNG* rng;
 static wolfSSL_Mutex* rngMutex;
+#endif
 
+#ifdef HAVE_QSH
 static void TLSX_QSH_FreeAll(QSHScheme* list, void* heap)
 {
     QSHScheme* current;
@@ -4444,6 +4470,8 @@ static int TLSX_CreateQSHKey(WOLFSSL* ssl, int type)
 {
     int ret;
 
+    (void)ssl;
+
     switch (type) {
 #ifdef HAVE_NTRU
         case WOLFSSL_NTRU_EESS439:
@@ -4492,10 +4520,11 @@ static int TLSX_AddQSHKey(QSHKey** list, QSHKey* key)
 }
 
 
-#ifdef HAVE_NTRU
+#if defined(HAVE_NTRU) || defined(HAVE_QSH)
 int TLSX_CreateNtruKey(WOLFSSL* ssl, int type)
 {
-    int ret;
+    int ret = -1;
+#ifdef HAVE_NTRU
     int ntruType;
 
     /* variable declarations for NTRU*/
@@ -4558,6 +4587,10 @@ int TLSX_CreateNtruKey(WOLFSSL* ssl, int type)
     temp->next = NULL;
 
     TLSX_AddQSHKey(&ssl->QSH_Key, temp);
+#endif
+
+    (void)ssl;
+    (void)type;
 
     return ret;
 }
diff --git a/tests/api.c b/tests/api.c
index 4925c14b4..37b1f41d1 100644
--- a/tests/api.c
+++ b/tests/api.c
@@ -2663,7 +2663,9 @@ static void test_wolfSSL_ERR_peek_last_error_line(void)
     tcp_ready ready;
     func_args client_args;
     func_args server_args;
+#ifndef SINGLE_THREADED
     THREAD_TYPE serverThread;
+#endif
     callback_functions client_cb;
     callback_functions server_cb;
     int         line = 0;
@@ -2689,10 +2691,12 @@ static void test_wolfSSL_ERR_peek_last_error_line(void)
     client_args.signal    = &ready;
     client_args.callbacks = &client_cb;
 
+#ifndef SINGLE_THREADED
     start_thread(test_server_nofail, &server_args, &serverThread);
     wait_tcp_ready(&server_args);
     test_client_nofail(&client_args);
     join_thread(serverThread);
+#endif
 
     FreeTcpReady(&ready);
 
diff --git a/tests/hash.c b/tests/hash.c
index 9167cda16..4a714ca08 100644
--- a/tests/hash.c
+++ b/tests/hash.c
@@ -673,6 +673,10 @@ int hmac_md5_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -693,6 +697,8 @@ int hmac_md5_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -743,6 +749,10 @@ int hmac_sha_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -763,6 +773,8 @@ int hmac_sha_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -813,6 +825,10 @@ int hmac_sha224_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS) || defined(HAVE_CAVIUM)
         if (i == 1)
@@ -831,11 +847,10 @@ int hmac_sha224_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA224_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -890,6 +905,10 @@ int hmac_sha256_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -910,6 +929,8 @@ int hmac_sha256_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
@@ -967,6 +988,10 @@ int hmac_sha384_test(void)
     test_hmac[1] = b;
     test_hmac[2] = c;
 
+    ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID);
+    if (ret != 0)
+        return -20009;
+
     for (i = 0; i < times; ++i) {
 #if defined(HAVE_FIPS)
         if (i == 1)
@@ -987,6 +1012,8 @@ int hmac_sha384_test(void)
             return -20 - i;
     }
 
+    wc_HmacFree(&hmac);
+
     return 0;
 }
 #endif
diff --git a/tests/suites.c b/tests/suites.c
index 8192ed3a1..694d362ea 100644
--- a/tests/suites.c
+++ b/tests/suites.c
@@ -56,6 +56,10 @@ static char flagSep[] = " ";
 #endif
 static char forceDefCipherListFlag[] = "-H";
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    static int devId = INVALID_DEVID;
+#endif
+
 
 #ifndef WOLFSSL_ALLOW_SSLV3
 /* if the protocol version is sslv3 return 1, else 0 */
@@ -533,17 +537,25 @@ int SuiteTest(void)
                                                    memory, sizeof(memory), 0, 1)
             != SSL_SUCCESS) {
         printf("unable to load static memory and create ctx");
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 #endif
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (wolfAsync_DevOpen(&devId) < 0) {
+        printf("Async device open failed");
+        args.return_code = EXIT_FAILURE; goto exit;
+    }
+    wolfSSL_CTX_UseAsync(cipherSuiteCtx, devId);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     /* default case */
     args.argc = 1;
     printf("starting default cipher suite tests\n");
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 
     /* any extra cases will need another argument */
@@ -556,7 +568,7 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 #endif
 #ifdef WOLFSSL_SCTP
@@ -566,7 +578,7 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 #endif
 #ifndef WC_STRICT_SIG
@@ -577,7 +589,7 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 #endif /* HAVE_RSA and HAVE_ECC */
 #endif /* !WC_STRICT_SIG */
@@ -588,7 +600,7 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 #endif
 
@@ -599,15 +611,20 @@ int SuiteTest(void)
     test_harness(&args);
     if (args.return_code != 0) {
         printf("error from script %d\n", args.return_code);
-        exit(EXIT_FAILURE);
+        args.return_code = EXIT_FAILURE; goto exit;
     }
 #endif
 
+exit:
     printf(" End Cipher Suite Tests\n");
 
     wolfSSL_CTX_free(cipherSuiteCtx);
     wolfSSL_Cleanup();
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    wolfAsync_DevClose(&devId);
+#endif
+
     return args.return_code;
 }
 
diff --git a/tests/unit.c b/tests/unit.c
index c007fbb64..39a76ddf2 100644
--- a/tests/unit.c
+++ b/tests/unit.c
@@ -45,16 +45,12 @@ int main(int argc, char** argv)
 
 int unit_test(int argc, char** argv)
 {
-    int ret;
+    int ret = 0;
 
     (void)argc;
     (void)argv;
     printf("starting unit tests...\n");
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    InitMemoryTracker();
-#endif
-
 #if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
     wolfSSL_Debugging_ON();
 #endif
@@ -72,28 +68,25 @@ int unit_test(int argc, char** argv)
 
     if ( (ret = HashTest()) != 0){
         printf("hash test failed with %d\n", ret);
-        return ret;
+        goto exit;
     }
 
 #ifndef SINGLE_THREADED
     if ( (ret = SuiteTest()) != 0){
         printf("suite test failed with %d\n", ret);
-        return ret;
+        goto exit;
     }
 #endif
 
     SrpTest();
 
+exit:
 #ifdef HAVE_WNR
     if (wc_FreeNetRandom() < 0)
         err_sys("Failed to free netRandom context");
 #endif /* HAVE_WNR */
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    ShowMemoryTracker();
-#endif
-
-    return 0;
+    return ret;
 }
 
 
diff --git a/tirtos/README b/tirtos/README
index 6001f5664..dc7fbb114 100644
--- a/tirtos/README
+++ b/tirtos/README
@@ -7,6 +7,9 @@ library and the example applications.
 Also read TI-RTOS Getting Started Guide and TI-RTOS User Guide to learn more
 about TI-RTOS (http://www.ti.com/tool/ti-rtos).
 
+For more information see:
+(https://github.com/wolfSSL/wolfssl-examples/blob/master/tirtos_ccs_examples/README.md)
+
 ## Example Application
 
 A simple "TCP echo server with TLS" example application is provided with TI-RTOS
diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c
index 140865bfa..9e7b41ecf 100644
--- a/wolfcrypt/benchmark/benchmark.c
+++ b/wolfcrypt/benchmark/benchmark.c
@@ -106,37 +106,34 @@
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
 #endif
-#if defined(WOLFSSL_ASYNC_CRYPT) || defined(HAVE_ECC)
-    static int devId = INVALID_DEVID;
-#endif
 
 #ifdef HAVE_WNR
     const char* wnrConfigFile = "wnr-example.conf";
 #endif
 
 #if defined(WOLFSSL_MDK_ARM)
-    extern FILE * wolfSSL_fopen(const char *fname, const char *mode) ;
+    extern FILE * wolfSSL_fopen(const char *fname, const char *mode);
     #define fopen wolfSSL_fopen
 #endif
 
 #if defined(__GNUC__) && defined(__x86_64__) && !defined(NO_ASM)
     #define HAVE_GET_CYCLES
     static INLINE word64 get_intel_cycles(void);
-    static word64 total_cycles;
+    static THREAD_LS_T word64 total_cycles;
     #define INIT_CYCLE_COUNTER
     #define BEGIN_INTEL_CYCLES total_cycles = get_intel_cycles();
     #define END_INTEL_CYCLES   total_cycles = get_intel_cycles() - total_cycles;
     #define SHOW_INTEL_CYCLES  printf(" Cycles per byte = %6.2f", \
-                               (float)total_cycles / (numBlocks*sizeof(plain)));
+                               (float)total_cycles / (count*BENCH_SIZE));
 #elif defined(LINUX_CYCLE_COUNT)
     #include 
     #include 
     #include 
 
-    static word64 begin_cycles;
-    static word64 total_cycles;
-    static int cycles = -1;
-    static struct perf_event_attr atr;
+    static THREAD_LS_T word64 begin_cycles;
+    static THREAD_LS_T word64 total_cycles;
+    static THREAD_LS_T int cycles = -1;
+    static THREAD_LS_T struct perf_event_attr atr;
 
     #define INIT_CYCLE_COUNTER do { \
         atr.type   = PERF_TYPE_HARDWARE; \
@@ -151,7 +148,7 @@
     } while (0);
 
     #define SHOW_INTEL_CYCLES  printf(" Cycles per byte = %6.2f", \
-                               (float)total_cycles / (numBlocks*sizeof(plain)));
+                               (float)total_cycles / (count*BENCH_SIZE));
 
 #else
     #define INIT_CYCLE_COUNTER
@@ -184,44 +181,36 @@
 
 #include "wolfcrypt/benchmark/benchmark.h"
 
-#ifdef USE_WOLFSSL_MEMORY
-    #include "wolfssl/wolfcrypt/mem_track.h"
-#endif
-
-
-void bench_des(void);
+void bench_des(int);
 void bench_idea(void);
-void bench_arc4(void);
+void bench_arc4(int);
 void bench_hc128(void);
 void bench_rabbit(void);
 void bench_chacha(void);
 void bench_chacha20_poly1305_aead(void);
-void bench_aes(int);
-void bench_aesgcm(void);
+void bench_aescbc(int);
+void bench_aesgcm(int);
 void bench_aesccm(void);
 void bench_aesctr(void);
 void bench_poly1305(void);
 void bench_camellia(void);
 
-void bench_md5(void);
-void bench_sha(void);
-void bench_sha224(void);
-void bench_sha256(void);
-void bench_sha384(void);
-void bench_sha512(void);
+void bench_md5(int);
+void bench_sha(int);
+void bench_sha224(int);
+void bench_sha256(int);
+void bench_sha384(int);
+void bench_sha512(int);
 void bench_ripemd(void);
 void bench_cmac(void);
 void bench_scrypt(void);
 
-void bench_rsa(void);
-#ifdef WOLFSSL_ASYNC_CRYPT
-    void bench_rsa_async(void);
-#endif
-void bench_rsaKeyGen(void);
-void bench_dh(void);
+void bench_rsaKeyGen(int);
+void bench_rsa(int);
+void bench_dh(int);
 #ifdef HAVE_ECC
-void bench_eccKeyGen(void);
-void bench_eccKeyAgree(void);
+void bench_eccMakeKey(int);
+void bench_ecc(int);
     #ifdef HAVE_ECC_ENCRYPT
     void bench_eccEncrypt(void);
     #endif
@@ -253,32 +242,206 @@ void bench_rng(void);
 #if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND) && \
         !defined(HAVE_STACK_SIZE)
     WOLFSSL_API int wolfSSL_Debugging_ON();
+    WOLFSSL_API void wolfSSL_Debugging_OFF(void);
 #endif
 
 #if !defined(NO_RSA) || !defined(NO_DH) \
                         || defined(WOLFSSL_KEYGEN) || defined(HAVE_ECC) \
                         || defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
     #define HAVE_LOCAL_RNG
-    static WC_RNG rng;
+    static THREAD_LS_T WC_RNG rng;
 #endif
 
-/* use kB instead of mB for embedded benchmarking */
-#ifdef BENCH_EMBEDDED
-    static byte plain [1024];
+
+
+/* Asynchronous helper macros */
+static THREAD_LS_T int devId = INVALID_DEVID;
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    static THREAD_LS_T WOLF_EVENT_QUEUE eventQueue;
+    static THREAD_LS_T int asyncPending;
+
+    #define BENCH_ASYNC_GET_DEV(obj)      (&(obj)->asyncDev)
+    #define BENCH_ASYNC_GET_NAME(doAsync) (doAsync) ? "HW" : "SW"
+    #define BENCH_ASYNC_IS_PEND()         (asyncPending > 0)
+    #define BENCH_MAX_PENDING             (WOLF_ASYNC_MAX_PENDING)
+
+#ifndef WC_NO_ASYNC_THREADING
+    typedef struct ThreadData {
+        pthread_t thread_id;
+    } ThreadData;
+    static ThreadData* g_threadData;
+    static int g_threadCount;
+#endif
+
+    static INLINE int bench_async_begin(void) {
+        /* init event queue */
+        asyncPending = 0;
+        return wolfEventQueue_Init(&eventQueue);
+    }
+
+    static INLINE void bench_async_end(void) {
+        /* free event queue */
+        wolfEventQueue_Free(&eventQueue);
+    }
+
+    static INLINE void bench_async_complete(int* ret, WC_ASYNC_DEV* asyncDev,
+        int* times)
+    {
+        *ret = asyncDev->event.ret;
+        if (*ret >= 0) {
+            (*times)++;
+            asyncDev->event.done = 0; /* reset done flag */
+        }
+    }
+
+    static INLINE int bench_async_check(int* ret, WC_ASYNC_DEV* asyncDev,
+        int callAgain, int* times, int limit)
+    {
+        int allowNext = 0;
+
+        /* if algo doesn't require calling again then use this flow */
+        if (!callAgain) {
+            if (asyncDev->event.done) {
+                /* operation completed */
+                bench_async_complete(ret, asyncDev, times);
+            }
+        }
+        /* if algo does require calling again then use this flow */
+        else {
+            if (asyncDev->event.done) {
+                allowNext = 1;
+            }
+        }
+
+        if (asyncDev->event.pending == 0 &&
+                (*times + asyncPending) < limit) {
+            allowNext = 1;
+        }
+
+        return allowNext;
+    }
+
+    static INLINE int bench_async_handle(int* ret, WC_ASYNC_DEV* asyncDev,
+        int callAgain, int* times)
+    {
+        if (*ret == WC_PENDING_E) {
+            *ret = wc_AsyncHandle(asyncDev, &eventQueue,
+                callAgain ? WC_ASYNC_FLAG_CALL_AGAIN : WC_ASYNC_FLAG_NONE);
+            if (*ret == 0)
+                asyncPending++;
+        }
+        else if (*ret >= 0) {
+            /* operation completed */
+            bench_async_complete(ret, asyncDev, times);
+        }
+
+        return (*ret >= 0) ? 1 : 0;
+    }
+
+    static INLINE void bench_async_poll(void)
+    {
+        /* poll until there are events done */
+        if (asyncPending > 0) {
+            int ret, asyncDone = 0;
+            do {
+                ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
+                                       WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
+                if (ret != 0) {
+                    printf("Async poll failed %d\n", ret);
+                    return;
+                }
+            } while (asyncDone == 0);
+            asyncPending -= asyncDone;
+        }
+    }
+
 #else
-    static byte plain [1024*1024];
+    #define BENCH_MAX_PENDING             (1)
+    #define BENCH_ASYNC_GET_NAME(doAsync) ""
+    #define BENCH_ASYNC_GET_DEV(obj)      NULL
+    #define BENCH_ASYNC_IS_PEND()         (0)
+
+    #define bench_async_begin()
+    #define bench_async_end()             (void)doAsync;
+
+    static INLINE int bench_async_check(int* ret, void* asyncDev,
+        int callAgain, int* times, int limit)
+    {
+        (void)ret;
+        (void)asyncDev;
+        (void)callAgain;
+        (void)times;
+        (void)limit;
+
+        return 1;
+    }
+
+    static INLINE int bench_async_handle(int* ret, void* asyncDev,
+        int callAgain, int* times)
+    {
+        (void)asyncDev;
+        (void)callAgain;
+
+        if (*ret >= 0) {
+            /* operation completed */
+            (*times)++;
+            return 1;
+        }
+        return 0;
+    }
+    #define bench_async_poll()
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+
+
+/* maximum runtime for each benchmark */
+#define BENCH_MIN_RUNTIME_SEC   1.0f
+
+
+#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
+    #define AES_AUTH_ADD_SZ 13
+    #define AES_AUTH_TAG_SZ 16
+    #define BENCH_CIPHER_ADD AES_AUTH_TAG_SZ
+#endif
+#ifndef BENCH_CIPHER_ADD
+    #define BENCH_CIPHER_ADD 0
 #endif
 
 
 /* use kB instead of mB for embedded benchmarking */
 #ifdef BENCH_EMBEDDED
-    static byte cipher[1024];
+    enum BenchmarkBounds {
+        numBlocks  = 25, /* how many kB to test (en/de)cryption */
+        scryptCnt  = 1,
+        ntimes     = 2,
+        genTimes   = BENCH_MAX_PENDING,
+        agreeTimes = 2
+    };
+    static const char blockType[] = "kB";   /* used in printf output */
+    #define BENCH_SIZE (1024ul)
 #else
-    static byte cipher[1024*1024];
+    enum BenchmarkBounds {
+        numBlocks  = 5, /* how many megs to test (en/de)cryption */
+        scryptCnt  = 10,
+        ntimes     = 100,
+        genTimes   = BENCH_MAX_PENDING, /* must be at least BENCH_MAX_PENDING */
+        agreeTimes = 100
+    };
+    static const char blockType[] = "megs"; /* used in printf output */
+    #define BENCH_SIZE (1024*1024ul)
 #endif
 
 
-static const XGEN_ALIGN byte key[] =
+/* globals for cipher tests */
+#ifdef WOLFSSL_ASYNC_CRYPT
+    static byte* bench_plain = NULL;
+    static byte* bench_cipher = NULL;
+#else
+    static byte bench_plain[BENCH_SIZE];
+    static byte bench_cipher[BENCH_SIZE];
+#endif
+static const XGEN_ALIGN byte bench_key_buf[] =
 {
     0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
     0xfe,0xde,0xba,0x98,0x76,0x54,0x32,0x10,
@@ -286,13 +449,14 @@ static const XGEN_ALIGN byte key[] =
     0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef
 };
 
-static const XGEN_ALIGN byte iv[] =
+static const XGEN_ALIGN byte bench_iv_buf[] =
 {
     0x12,0x34,0x56,0x78,0x90,0xab,0xcd,0xef,
     0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
     0x11,0x21,0x31,0x41,0x51,0x61,0x71,0x81
 };
-
+static byte* bench_key = (byte*)bench_key_buf;
+static byte* bench_iv = (byte*)bench_iv_buf;
 
 #ifdef WOLFSSL_STATIC_MEMORY
     #ifdef BENCH_EMBEDDED
@@ -302,56 +466,222 @@ static const XGEN_ALIGN byte iv[] =
     #endif
 #endif
 
-#ifdef HAVE_STACK_SIZE
-THREAD_RETURN WOLFSSL_THREAD benchmark_test(void* args)
-#else
-int benchmark_test(void *args)
-#endif
-{
-    (void)args;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-    if (wc_LoadStaticMemory(&HEAP_HINT, gBenchMemory, sizeof(gBenchMemory),
-                                                WOLFMEM_GENERAL, 1) != 0) {
-        printf("unable to load static memory");
-        exit(EXIT_FAILURE);
+/******************************************************************************/
+/* Begin Stats Functions */
+/******************************************************************************/
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    typedef enum bench_stat_type {
+        BENCH_STAT_ASYM,
+        BENCH_STAT_SYM,
+    } bench_stat_type_t;
+    typedef struct bench_stats {
+        struct bench_stats* next;
+        struct bench_stats* prev;
+        const char* algo;
+        const char* desc;
+        double perfsec;
+        int strength;
+        int doAsync;
+        int finishCount;
+        bench_stat_type_t type;
+    } bench_stats_t;
+    static bench_stats_t* bench_stats_head;
+    static bench_stats_t* bench_stats_tail;
+    static pthread_mutex_t bench_lock = PTHREAD_MUTEX_INITIALIZER;
+
+    static bench_stats_t* bench_stats_add(bench_stat_type_t type,
+        const char* algo, int strength, const char* desc, int doAsync,
+        double perfsec)
+    {
+        bench_stats_t* stat;
+
+        pthread_mutex_lock(&bench_lock);
+
+        /* locate existing in list */
+        for (stat = bench_stats_head; stat != NULL; stat = stat->next) {
+            /* match based on algo, strength and desc */
+            if (stat->algo == algo && stat->strength == strength && stat->desc == desc && stat->doAsync == doAsync) {
+                break;
+            }
+        }
+
+        if (stat == NULL) {
+            /* allocate new and put on list */
+            stat = (bench_stats_t*)XMALLOC(sizeof(bench_stats_t), NULL, DYNAMIC_TYPE_INFO);
+            if (stat) {
+                XMEMSET(stat, 0, sizeof(bench_stats_t));
+
+                /* add to list */
+                stat->next = NULL;
+                if (bench_stats_tail == NULL)  {
+                    bench_stats_head = stat;
+                }
+                else {
+                    bench_stats_tail->next = stat;
+                    stat->prev = bench_stats_tail;
+                }
+                bench_stats_tail = stat; /* add to the end either way */
+            }
+        }
+
+        if (stat) {
+            int isLast = 0;
+            stat->type = type;
+            stat->algo = algo;
+            stat->strength = strength;
+            stat->desc = desc;
+            stat->doAsync = doAsync;
+            stat->perfsec += perfsec;
+            stat->finishCount++;
+
+            if (stat->finishCount == g_threadCount) {
+                isLast = 1;
+            }
+
+            pthread_mutex_unlock(&bench_lock);
+
+            /* wait until remaining are complete */
+            while (stat->finishCount < g_threadCount) {
+                wc_AsyncThreadYield();
+            }
+
+            /* print final stat */
+            if (isLast) {
+                if (stat->type == BENCH_STAT_SYM) {
+                    printf("%-8s%s %8.3f MB/s\n", stat->desc,
+                        BENCH_ASYNC_GET_NAME(stat->doAsync), stat->perfsec);
+                }
+                else {
+                    printf("%-5s %4d %-9s %s %.3f ops/sec\n",
+                        stat->algo, stat->strength, stat->desc,
+                        BENCH_ASYNC_GET_NAME(stat->doAsync), stat->perfsec);
+                }
+            }
+
+            (void)blockType;
+        }
+        else {
+            pthread_mutex_unlock(&bench_lock);
+        }
+
+        return stat;
     }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT && !WC_NO_ASYNC_THREADING */
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    InitMemoryTracker();
+static INLINE void bench_stats_init(void)
+{
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    bench_stats_head = NULL;
+    bench_stats_tail = NULL;
 #endif
-
     INIT_CYCLE_COUNTER
+}
 
-#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
-    wolfSSL_Debugging_ON();
+static INLINE void bench_stats_start(int* count, double* start)
+{
+    *count = 0;
+    *start = current_time(1);
+    BEGIN_INTEL_CYCLES
+}
+
+static INLINE int bench_stats_sym_check(double start)
+{
+    return ((current_time(0) - start) < BENCH_MIN_RUNTIME_SEC);
+}
+
+static void bench_stats_sym_finish(const char* desc, int doAsync, int count, double start)
+{
+    double total, persec;
+
+    END_INTEL_CYCLES
+    total = current_time(0) - start;
+
+    persec = 1 / total * count;
+#ifdef BENCH_EMBEDDED
+    /* since using kB, convert to MB/s */
+    persec = persec / 1024;
 #endif
 
-    (void)plain;
-    (void)cipher;
-    (void)key;
-    (void)iv;
+    printf("%-8s%s %5d %s took %5.3f seconds, %8.3f MB/s",
+        desc, BENCH_ASYNC_GET_NAME(doAsync), count, blockType, total, persec);
+    SHOW_INTEL_CYCLES
+    printf("\n");
+    (void)doAsync;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    /* Add to thread stats */
+    bench_stats_add(BENCH_STAT_SYM, NULL, 0, desc, doAsync, persec);
+#endif
+}
+
+static void bench_stats_asym_finish(const char* algo, int strength,
+    const char* desc, int doAsync, int count, double start)
+{
+    double total, each, opsSec, milliEach;
+
+    total = current_time(0) - start;
+    each  = total / count;     /* per second  */
+    opsSec = count / total;    /* ops/per second */
+    milliEach = each * 1000;   /* milliseconds */
+
+    printf("%-5s %4d %-9s %s %6d ops took %5.3f sec, avg %5.3f ms,"
+        " %.3f ops/sec\n", algo, strength, desc, BENCH_ASYNC_GET_NAME(doAsync),
+        count, total, milliEach, opsSec);
+    (void)doAsync;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    /* Add to thread stats */
+    bench_stats_add(BENCH_STAT_ASYM, algo, strength, desc, doAsync, opsSec);
+#endif
+}
+
+static INLINE void bench_stats_free(void)
+{
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+    bench_stats_t* stat;
+    for (stat = bench_stats_head; stat != NULL; ) {
+        bench_stats_t* next = stat->next;
+        XFREE(stat, NULL, DYNAMIC_TYPE_INFO);
+        stat = next;
+    }
+    bench_stats_head = NULL;
+    bench_stats_tail = NULL;
+#endif
+}
+/******************************************************************************/
+/* End Stats Functions */
+/******************************************************************************/
+
+
+static void* benchmarks_do(void* args)
+{
 #ifdef WOLFSSL_ASYNC_CRYPT
-    if (wolfAsync_DevOpen(&devId) != 0) {
-        printf("Async device open failed\n");
-        exit(-1);
+#ifndef WC_NO_ASYNC_THREADING
+    ThreadData* threadData = (ThreadData*)args;
+
+    if (wolfAsync_DevOpenThread(&devId, &threadData->thread_id) < 0)
+#else
+    if (wolfAsync_DevOpen(&devId) < 0)
+#endif
+    {
+        printf("Async device open failed\nRunning without async\n");
     }
 #endif /* WOLFSSL_ASYNC_CRYPT */
 
+    (void)args;
+
 #if defined(HAVE_LOCAL_RNG)
     {
         int rngRet;
 
 #ifndef HAVE_FIPS
-        rngRet = wc_InitRng_ex(&rng, HEAP_HINT);
+        rngRet = wc_InitRng_ex(&rng, HEAP_HINT, INVALID_DEVID);
 #else
         rngRet = wc_InitRng(&rng);
 #endif
         if (rngRet < 0) {
             printf("InitRNG failed\n");
-            EXIT_TEST(rngRet);
         }
     }
 #endif
@@ -361,11 +691,20 @@ int benchmark_test(void *args)
 #endif /* WC_NO_RNG */
 #ifndef NO_AES
 #ifdef HAVE_AES_CBC
-    bench_aes(0);
-    bench_aes(1);
+    #ifndef NO_SW_BENCH
+        bench_aescbc(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        bench_aescbc(1);
+    #endif
 #endif
 #ifdef HAVE_AESGCM
-    bench_aesgcm();
+    #ifndef NO_SW_BENCH
+        bench_aesgcm(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        bench_aesgcm(1);
+    #endif
 #endif
 #ifdef WOLFSSL_AES_COUNTER
     bench_aesctr();
@@ -379,7 +718,12 @@ int benchmark_test(void *args)
     bench_camellia();
 #endif
 #ifndef NO_RC4
-    bench_arc4();
+    #ifndef NO_SW_BENCH
+        bench_arc4(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+        bench_arc4(1);
+    #endif
 #endif
 #ifdef HAVE_HC128
     bench_hc128();
@@ -394,34 +738,67 @@ int benchmark_test(void *args)
     bench_chacha20_poly1305_aead();
 #endif
 #ifndef NO_DES3
-    bench_des();
+    #ifndef NO_SW_BENCH
+        bench_des(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+        bench_des(1);
+    #endif
 #endif
 #ifdef HAVE_IDEA
     bench_idea();
 #endif
 
-    printf("\n");
-
 #ifndef NO_MD5
-    bench_md5();
+    #ifndef NO_SW_BENCH
+        bench_md5(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+        bench_md5(1);
+    #endif
 #endif
 #ifdef HAVE_POLY1305
     bench_poly1305();
 #endif
 #ifndef NO_SHA
-    bench_sha();
+    #ifndef NO_SW_BENCH
+        bench_sha(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+        bench_sha(1);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA224
-    bench_sha224();
+    #ifndef NO_SW_BENCH
+        bench_sha224(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        bench_sha224(1);
+    #endif
 #endif
 #ifndef NO_SHA256
-    bench_sha256();
+    #ifndef NO_SW_BENCH
+        bench_sha256(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        bench_sha256(1);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA384
-    bench_sha384();
+    #ifndef NO_SW_BENCH
+        bench_sha384(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+        bench_sha384(1);
+    #endif
 #endif
 #ifdef WOLFSSL_SHA512
-    bench_sha512();
+    #ifndef NO_SW_BENCH
+        bench_sha512(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+        bench_sha512(1);
+    #endif
 #endif
 #ifdef WOLFSSL_RIPEMD
     bench_ripemd();
@@ -433,26 +810,37 @@ int benchmark_test(void *args)
     bench_cmac();
 #endif
 
-    printf("\n");
-
 #ifdef HAVE_SCRYPT
     bench_scrypt();
 #endif
 
-    printf("\n");
-
 #ifndef NO_RSA
-    bench_rsa();
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        bench_rsa_async();
-    #endif
     #ifdef WOLFSSL_KEY_GEN
-        bench_rsaKeyGen();
+        #ifndef NO_SW_BENCH
+            bench_rsaKeyGen(0);
+        #endif
+        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+            /* async supported in simulator only */
+            #ifdef WOLFSSL_ASYNC_CRYPT_TEST
+                bench_rsaKeyGen(1);
+            #endif
+        #endif
+    #endif
+    #ifndef NO_SW_BENCH
+        bench_rsa(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+        bench_rsa(1);
     #endif
 #endif
 
 #ifndef NO_DH
-    bench_dh();
+    #ifndef NO_SW_BENCH
+        bench_dh(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+        bench_dh(1);
+    #endif
 #endif
 
 #ifdef HAVE_NTRU
@@ -461,18 +849,24 @@ int benchmark_test(void *args)
 #endif
 
 #ifdef HAVE_ECC
-    bench_eccKeyGen();
-    bench_eccKeyAgree();
+    #ifndef NO_SW_BENCH
+        bench_eccMakeKey(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+        /* async supported in simulator only */
+        #ifdef WOLFSSL_ASYNC_CRYPT_TEST
+            bench_eccMakeKey(1);
+        #endif
+    #endif
+    #ifndef NO_SW_BENCH
+        bench_ecc(0);
+    #endif
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+        bench_ecc(1);
+    #endif
     #ifdef HAVE_ECC_ENCRYPT
         bench_eccEncrypt();
     #endif
-
-    #if defined(FP_ECC)
-        wc_ecc_fp_free();
-    #endif
-    #ifdef ECC_CACHE_CURVE
-        wc_ecc_curve_cache_free();
-    #endif
 #endif
 
 #ifdef HAVE_CURVE25519
@@ -495,139 +889,176 @@ int benchmark_test(void *args)
     wolfAsync_DevClose(&devId);
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    ShowMemoryTracker();
-#endif
-
-    EXIT_TEST(0);
+    return NULL;
 }
 
 
-#ifndef NO_MAIN_DRIVER
-int main(int argc, char** argv)
+/* so embedded projects can pull in tests on their own */
+#ifdef HAVE_STACK_SIZE
+THREAD_RETURN WOLFSSL_THREAD benchmark_test(void* args)
+#else
+int benchmark_test(void *args)
+#endif
 {
-    int ret;
+    int ret = 0;
 
-    (void)argc;
-    (void)argv;
-
-#ifdef HAVE_WNR
-    if (wc_InitNetRandom(wnrConfigFile, NULL, 5000) != 0) {
-        printf("Whitewood netRandom config init failed\n");
-        exit(-1);
+#ifdef WOLFSSL_STATIC_MEMORY
+    ret = wc_LoadStaticMemory(&HEAP_HINT, gBenchMemory, sizeof(gBenchMemory),
+                                                            WOLFMEM_GENERAL, 1);
+    if (ret != 0) {
+        printf("unable to load static memory %d\n", ret);
+        EXIT_TEST(EXIT_FAILURE);
     }
-#endif /* HAVE_WNR */
+#endif /* WOLFSSL_STATIC_MEMORY */
+
+    (void)args;
 
     wolfCrypt_Init();
 
-#ifdef HAVE_STACK_SIZE
-    ret = StackSizeCheck(NULL, benchmark_test);
-#else
-    ret = benchmark_test(NULL);
+    bench_stats_init();
+
+#if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
+    wolfSSL_Debugging_ON();
 #endif
 
-    if (wolfCrypt_Cleanup() != 0) {
-        printf("Error with wolfCrypt_Cleanup!\n");
-        exit(-1);
-    }
+    printf("wolfCrypt Benchmark (min %.1f sec each)\n", BENCH_MIN_RUNTIME_SEC);
 
 #ifdef HAVE_WNR
-    if (wc_FreeNetRandom() < 0) {
-        printf("Failed to free netRandom context\n");
-        exit(-1);
+    ret = wc_InitNetRandom(wnrConfigFile, NULL, 5000);
+    if (ret != 0) {
+        printf("Whitewood netRandom config init failed %d\n", ret);
+        EXIT_TEST(EXIT_FAILURE);
     }
 #endif /* HAVE_WNR */
 
-    return ret;
-}
-#endif /* NO_MAIN_DRIVER */
-
-
-#ifdef BENCH_EMBEDDED
-enum BenchmarkBounds {
-    numBlocks  = 25, /* how many kB to test (en/de)cryption */
-    scryptCnt  = 1,
-    ntimes     = 1,
-    genTimes   = 5,  /* public key iterations */
-    agreeTimes = 5
-};
-static const char blockType[] = "kB";   /* used in printf output */
-#else
-enum BenchmarkBounds {
-    numBlocks  = 50,  /* how many megs to test (en/de)cryption */
-    scryptCnt  = 10,
+    /* setup bench plain, cipher, key and iv globals */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    ntimes     = 1000,
-    genTimes   = 1000,
-    agreeTimes = 1000
+    bench_plain = (byte*)XMALLOC(BENCH_SIZE+BENCH_CIPHER_ADD, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    bench_cipher = (byte*)XMALLOC(BENCH_SIZE+BENCH_CIPHER_ADD, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    bench_key = (byte*)XMALLOC(sizeof(bench_key_buf), HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    bench_iv = (byte*)XMALLOC(sizeof(bench_iv_buf), HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    if (bench_plain == NULL || bench_cipher == NULL || bench_key == NULL || bench_iv == NULL) {
+        printf("Benchmark cipher buffer alloc failed!\n");
+        EXIT_TEST(EXIT_FAILURE);
+    }
+    XMEMCPY(bench_key, bench_key_buf, sizeof(bench_key_buf));
+    XMEMCPY(bench_iv, bench_iv_buf, sizeof(bench_iv_buf));
+    XMEMSET(bench_plain, 0, BENCH_SIZE+BENCH_CIPHER_ADD);
+    XMEMSET(bench_cipher, 0, BENCH_SIZE+BENCH_CIPHER_ADD);
+#endif
+    (void)bench_plain;
+    (void)bench_cipher;
+    (void)bench_key;
+    (void)bench_iv;
+
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && !defined(WC_NO_ASYNC_THREADING)
+{
+    int i;
+    int numCpus = wc_AsyncGetNumberOfCpus();
+
+    printf("CPUs: %d\n", numCpus);
+
+    g_threadData = (ThreadData*)XMALLOC(sizeof(ThreadData) * numCpus,
+        HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+    if (g_threadData == NULL) {
+        printf("Thread data alloc failed!\n");
+        EXIT_TEST(EXIT_FAILURE);
+    }
+    g_threadCount = numCpus;
+
+    /* Create threads */
+    for (i = 0; i < numCpus; i++) {
+        ret = wc_AsyncThreadCreate(&g_threadData[i].thread_id,
+            benchmarks_do, &g_threadData[i]);
+        if (ret != 0) {
+            printf("Error creating benchmark thread %d\n", ret);
+            EXIT_TEST(EXIT_FAILURE);
+        }
+    }
+
+    /* Start threads */
+    for (i = 0; i < numCpus; i++) {
+        wc_AsyncThreadJoin(&g_threadData[i].thread_id);
+    }
+
+    XFREE(g_threadData, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
+}
 #else
-    ntimes     = 100,
-    genTimes   = 100,
-    agreeTimes = 100
+    benchmarks_do(NULL);
 #endif
-};
-static const char blockType[] = "megs"; /* used in printf output */
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    XFREE(bench_plain, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    XFREE(bench_cipher, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    XFREE(bench_key, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
+    XFREE(bench_iv, HEAP_HINT, DYNAMIC_TYPE_WOLF_BIGINT);
 #endif
 
+#ifdef HAVE_WNR
+    ret = wc_FreeNetRandom();
+    if (ret < 0) {
+        printf("Failed to free netRandom context %d\n", ret);
+        EXIT_TEST(EXIT_FAILURE);
+    }
+#endif
+
+    bench_stats_free();
+
+	if (wolfCrypt_Cleanup() != 0) {
+        printf("error with wolfCrypt_Cleanup\n");
+    }
+
+    EXIT_TEST(ret);
+}
+
+
 #ifndef WC_NO_RNG
 void bench_rng(void)
 {
-    int    ret, i;
-    double start, total, persec;
-    int pos, len, remain;
-#ifndef HAVE_LOCAL_RNG
-    WC_RNG rng;
-#endif
+    int    ret, i, count;
+    double start;
+    int    pos, len, remain;
+    WC_RNG myrng;
 
-#ifndef HAVE_LOCAL_RNG
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&myrng, HEAP_HINT, devId);
 #else
-    ret = wc_InitRng(&rng);
+    ret = wc_InitRng(&myrng);
 #endif
     if (ret < 0) {
-        printf("InitRNG failed\n");
+        printf("InitRNG failed %d\n", ret);
         return;
     }
-#endif
 
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            /* Split request to handle large RNG request */
+            pos = 0;
+            remain = (int)BENCH_SIZE;
+            while (remain > 0) {
+                len = remain;
+                if (len > RNG_MAX_BLOCK_LEN)
+                    len = RNG_MAX_BLOCK_LEN;
+                ret = wc_RNG_GenerateBlock(&myrng, &bench_plain[pos], len);
+                if (ret < 0)
+                    goto exit_rng;
 
-    for(i = 0; i < numBlocks; i++) {
-        /* Split request to handle large RNG request */
-        pos = 0;
-        remain = (int)sizeof(plain);
-        while (remain > 0) {
-            len = remain;
-            if (len > RNG_MAX_BLOCK_LEN)
-                len = RNG_MAX_BLOCK_LEN;
-            ret = wc_RNG_GenerateBlock(&rng, &plain[pos], len);
-            if (ret < 0) {
-                printf("wc_RNG_GenerateBlock failed %d\n", ret);
-                break;
+                remain -= len;
+                pos += len;
             }
-            remain -= len;
-            pos += len;
         }
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_rng:
+    bench_stats_sym_finish("RNG", 0, count, start);
+
+    if (ret < 0) {
+        printf("wc_RNG_GenerateBlock failed %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-    printf("RNG      %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-
-#ifndef HAVE_LOCAL_RNG
-    wc_FreeRng(&rng);
-#endif
+    wc_FreeRng(&myrng);
 }
 #endif /* WC_NO_RNG */
 
@@ -635,146 +1066,180 @@ void bench_rng(void)
 #ifndef NO_AES
 
 #ifdef HAVE_AES_CBC
-void bench_aes(int show)
+void bench_aescbc(int doAsync)
 {
-    Aes    enc;
-    double start, total, persec;
-    int    i;
-    int    ret;
+    int    ret, i, count = 0, times;
+    Aes    enc[BENCH_MAX_PENDING];
+    double start;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if ((ret = wc_AesAsyncInit(&enc, devId)) != 0) {
-        printf("wc_AesAsyncInit failed, ret = %d\n", ret);
-        return;
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+                                doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("AesInit failed, ret = %d\n", ret);
+            goto exit;
+        }
+
+        ret = wc_AesSetKey(&enc[i], bench_key, 16, bench_iv, AES_ENCRYPTION);
+        if (ret != 0) {
+            printf("AesSetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-#endif
 
-    ret = wc_AesSetKey(&enc, key, 16, iv, AES_ENCRYPTION);
-    if (ret != 0) {
-        printf("AesSetKey failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_AesCbcEncrypt(&enc[i], bench_plain, bench_cipher,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_aes_enc;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_aes_enc:
+    bench_stats_sym_finish("AES-Enc", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCbcEncrypt(&enc, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    if (show) {
-        printf("AES enc  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-        SHOW_INTEL_CYCLES
-        printf("\n");
-    }
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_AesAsyncFree(&enc);
-    if ((ret = wc_AesAsyncInit(&enc, devId)) != 0) {
-        printf("wc_AesAsyncInit failed, ret = %d\n", ret);
-        return;
-    }
-#endif
 
 #ifdef HAVE_AES_DECRYPT
-    ret = wc_AesSetKey(&enc, key, 16, iv, AES_DECRYPTION);
-    if (ret != 0) {
-        printf("AesSetKey failed, ret = %d\n", ret);
-        return;
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_AesSetKey(&enc[i], bench_key, 16, bench_iv, AES_DECRYPTION);
+        if (ret != 0) {
+            printf("AesSetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCbcDecrypt(&enc, plain, cipher, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_AesCbcDecrypt(&enc[i], bench_plain, bench_cipher,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_aes_dec;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_aes_dec:
+    bench_stats_sym_finish("AES-Dec", doAsync, count, start);
 
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    if (show) {
-        printf("AES dec  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-        SHOW_INTEL_CYCLES
-        printf("\n");
-    }
 #endif /* HAVE_AES_DECRYPT */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_AesAsyncFree(&enc);
-#endif
+exit:
+
+    if (ret < 0) {
+        printf("bench_aescbc failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_AesFree(&enc[i]);
+    }
+
+    bench_async_end();
 }
 #endif /* HAVE_AES_CBC */
 
-#if defined(HAVE_AESGCM) || defined(HAVE_AESCCM)
-    static byte additional[13];
-    static byte tag[16];
-#endif
-
-
 #ifdef HAVE_AESGCM
-void bench_aesgcm(void)
+void bench_aesgcm(int doAsync)
 {
-    Aes    enc;
-    double start, total, persec;
-    int    i;
+    int    ret, i, count = 0, times;
+    Aes    enc[BENCH_MAX_PENDING];
+    double start;
 
-    wc_AesGcmSetKey(&enc, key, 16);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
+    DECLARE_VAR(bench_tag, byte, AES_AUTH_TAG_SZ, HEAP_HINT);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesGcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
-                        tag, 16, additional, 13);
+    bench_async_begin();
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (bench_additional)
 #endif
-
-    printf("AES-GCM  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-
-#if 0
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for(i = 0; i < numBlocks; i++)
-        wc_AesGcmDecrypt(&enc, plain, cipher, sizeof(cipher), iv, 12,
-                        tag, 16, additional, 13);
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+    {   XMEMSET(bench_additional, 0, AES_AUTH_ADD_SZ); }
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (bench_tag)
 #endif
+    {   XMEMSET(bench_tag, 0, AES_AUTH_TAG_SZ); }
 
-    printf("AES-GCM Decrypt %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-#endif
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_AesInit(&enc[i], HEAP_HINT,
+                        doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("AesInit failed, ret = %d\n", ret);
+            goto exit;
+        }
+
+        ret = wc_AesGcmSetKey(&enc[i], bench_key, 16);
+        if (ret != 0) {
+            printf("AesGcmSetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
+    }
+
+    /* GCM uses same routine in backend for both encrypt and decrypt */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_AesGcmEncrypt(&enc[i], bench_cipher,
+                        bench_plain, BENCH_SIZE,
+                        bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
+                        bench_additional, AES_AUTH_ADD_SZ);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_aes_gcm;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_aes_gcm:
+    bench_stats_sym_finish("AES-GCM", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_aesgcm failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_AesFree(&enc[i]);
+    }
+
+    FREE_VAR(bench_additional, HEAP_HINT);
+    FREE_VAR(bench_tag, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif /* HAVE_AESGCM */
 
@@ -783,29 +1248,19 @@ void bench_aesgcm(void)
 void bench_aesctr(void)
 {
     Aes    enc;
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
-    wc_AesSetKeyDirect(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    wc_AesSetKeyDirect(&enc, bench_key, AES_BLOCK_SIZE, bench_iv, AES_ENCRYPTION);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCtrEncrypt(&enc, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("AES-CTR  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_AesCtrEncrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("AES-CTR", 0, count, start);
 }
 #endif /* WOLFSSL_AES_COUNTER */
 
@@ -814,34 +1269,30 @@ void bench_aesctr(void)
 void bench_aesccm(void)
 {
     Aes    enc;
-    double start, total, persec;
-    int    i;
-    int    ret;
+    double start;
+    int    ret, i, count;
 
-    if ((ret = wc_AesCcmSetKey(&enc, key, 16)) != 0) {
+    DECLARE_VAR(bench_additional, byte, AES_AUTH_ADD_SZ, HEAP_HINT);
+    DECLARE_VAR(bench_tag, byte, AES_AUTH_TAG_SZ, HEAP_HINT);
+
+    if ((ret = wc_AesCcmSetKey(&enc, bench_key, 16)) != 0) {
         printf("wc_AesCcmSetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_AesCcmEncrypt(&enc, cipher, plain, sizeof(plain), iv, 12,
-                        tag, 16, additional, 13);
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_AesCcmEncrypt(&enc, bench_cipher, bench_plain, BENCH_SIZE,
+                bench_iv, 12, bench_tag, AES_AUTH_TAG_SZ,
+                bench_additional, AES_AUTH_ADD_SZ);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("AES-CCM", 0, count, start);
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("AES-CCM  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_VAR(bench_additional, HEAP_HINT);
+    FREE_VAR(bench_tag, HEAP_HINT);
 }
 #endif /* HAVE_AESCCM */
 #endif /* !NO_AES */
@@ -850,38 +1301,26 @@ void bench_aesccm(void)
 #ifdef HAVE_POLY1305
 void bench_poly1305()
 {
-    Poly1305    enc;
-    byte   mac[16];
-    double start, total, persec;
-    int    i;
-    int    ret;
+    Poly1305 enc;
+    byte     mac[16];
+    double   start;
+    int      ret, i, count;
 
-
-    ret = wc_Poly1305SetKey(&enc, key, 32);
+    ret = wc_Poly1305SetKey(&enc, bench_key, 32);
     if (ret != 0) {
         printf("Poly1305SetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Poly1305Update(&enc, plain, sizeof(plain));
-
-    wc_Poly1305Final(&enc, mac);
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("POLY1305 %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                                  blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_Poly1305Update(&enc, bench_plain, BENCH_SIZE);
+        }
+        wc_Poly1305Final(&enc, mac);
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("POLY1305", 0, count, start);
 }
 #endif /* HAVE_POLY1305 */
 
@@ -890,178 +1329,197 @@ void bench_poly1305()
 void bench_camellia(void)
 {
     Camellia cam;
-    double start, total, persec;
-    int    i, ret;
+    double   start;
+    int      ret, i, count;
 
-    ret = wc_CamelliaSetKey(&cam, key, 16, iv);
+    ret = wc_CamelliaSetKey(&cam, bench_key, 16, bench_iv);
     if (ret != 0) {
         printf("CamelliaSetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_CamelliaCbcEncrypt(&cam, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("Camellia %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_CamelliaCbcEncrypt(&cam, bench_plain, bench_cipher,
+                BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("Camellia", 0, count, start);
 }
 #endif
 
 
 #ifndef NO_DES3
-void bench_des(void)
+void bench_des(int doAsync)
 {
-    Des3   enc;
-    double start, total, persec;
-    int    i, ret;
+    int    ret, i, count = 0, times;
+    Des3   enc[BENCH_MAX_PENDING];
+    double start;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_Des3AsyncInit(&enc, devId) != 0)
-        printf("des3 async init failed\n");
-#endif
-    ret = wc_Des3_SetKey(&enc, key, iv, DES_ENCRYPTION);
-    if (ret != 0) {
-        printf("Des3_SetKey failed, ret = %d\n", ret);
-        return;
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_Des3Init(&enc[i], HEAP_HINT,
+                                doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("Des3Init failed, ret = %d\n", ret);
+            goto exit;
+        }
+
+        ret = wc_Des3_SetKey(&enc[i], bench_key, bench_iv, DES_ENCRYPTION);
+        if (ret != 0) {
+            printf("Des3_SetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Des3_CbcEncrypt(&enc, plain, cipher, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_Des3_CbcEncrypt(&enc[i], bench_plain, bench_cipher,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_3des;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_3des:
+    bench_stats_sym_finish("3DES", doAsync, count, start);
 
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
+exit:
 
-    printf("3DES     %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_Des3AsyncFree(&enc);
-#endif
+    if (ret < 0) {
+        printf("bench_des failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Des3Free(&enc[i]);
+    }
+
+    bench_async_end();
 }
-#endif
+#endif /* !NO_DES3 */
 
 
 #ifdef HAVE_IDEA
 void bench_idea(void)
 {
     Idea   enc;
-    double start, total, persec;
-    int    i, ret;
+    double start;
+    int    ret, i, count;
 
-    ret = wc_IdeaSetKey(&enc, key, IDEA_KEY_SIZE, iv, IDEA_ENCRYPTION);
+    ret = wc_IdeaSetKey(&enc, bench_key, IDEA_KEY_SIZE, bench_iv,
+        IDEA_ENCRYPTION);
     if (ret != 0) {
         printf("Des3_SetKey failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_IdeaCbcEncrypt(&enc, plain, cipher, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("IDEA     %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_IdeaCbcEncrypt(&enc, bench_plain, bench_cipher, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("IDEA", 0, count, start);
 }
 #endif /* HAVE_IDEA */
 
 
 #ifndef NO_RC4
-void bench_arc4(void)
+void bench_arc4(int doAsync)
 {
-    Arc4   enc;
-    double start, total, persec;
-    int    i;
+    int    ret, i, count = 0, times;
+    Arc4   enc[BENCH_MAX_PENDING];
+    double start;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_Arc4AsyncInit(&enc, devId) != 0)
-        printf("arc4 async init failed\n");
-#endif
+    bench_async_begin();
 
-    wc_Arc4SetKey(&enc, key, 16);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    /* clear for done cleanup */
+    XMEMSET(enc, 0, sizeof(enc));
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Arc4Process(&enc, cipher, plain, sizeof(plain));
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        if ((ret = wc_Arc4Init(&enc[i], HEAP_HINT,
+                            doAsync ? devId : INVALID_DEVID)) != 0) {
+            printf("Arc4Init failed, ret = %d\n", ret);
+            goto exit;
+        }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
+        ret = wc_Arc4SetKey(&enc[i], bench_key, 16);
+        if (ret != 0) {
+            printf("Arc4SetKey failed, ret = %d\n", ret);
+            goto exit;
+        }
+    }
 
-    printf("ARC4     %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_Arc4AsyncFree(&enc);
-#endif
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks)) {
+                    ret = wc_Arc4Process(&enc[i], bench_cipher, bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×)) {
+                        goto exit_arc4;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_arc4:
+    bench_stats_sym_finish("ARC4", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_arc4 failed: %d\n", ret);
+    }
+
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Arc4Free(&enc[i]);
+    }
+
+    bench_async_end();
 }
-#endif
+#endif /* !NO_RC4 */
 
 
 #ifdef HAVE_HC128
 void bench_hc128(void)
 {
     HC128  enc;
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
-    wc_Hc128_SetKey(&enc, key, iv);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    wc_Hc128_SetKey(&enc, bench_key, bench_iv);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Hc128_Process(&enc, cipher, plain, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("HC128    %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_Hc128_Process(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("HC128", 0, count, start);
 }
 #endif /* HAVE_HC128 */
 
@@ -1069,29 +1527,20 @@ void bench_hc128(void)
 #ifndef NO_RABBIT
 void bench_rabbit(void)
 {
-    Rabbit  enc;
-    double start, total, persec;
-    int    i;
+    Rabbit enc;
+    double start;
+    int    i, count;
 
-    wc_RabbitSetKey(&enc, key, iv);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    wc_RabbitSetKey(&enc, bench_key, bench_iv);
 
-    for(i = 0; i < numBlocks; i++)
-        wc_RabbitProcess(&enc, cipher, plain, sizeof(plain));
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("RABBIT   %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_RabbitProcess(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("RABBIT", 0, count, start);
 }
 #endif /* NO_RABBIT */
 
@@ -1100,315 +1549,496 @@ void bench_rabbit(void)
 void bench_chacha(void)
 {
     ChaCha enc;
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
-    wc_Chacha_SetKey(&enc, key, 16);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for (i = 0; i < numBlocks; i++) {
-        wc_Chacha_SetIV(&enc, iv, 0);
-        wc_Chacha_Process(&enc, cipher, plain, sizeof(plain));
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("CHACHA   %d %s took %5.3f seconds, %8.3f MB/s", numBlocks, blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    wc_Chacha_SetKey(&enc, bench_key, 16);
 
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_Chacha_SetIV(&enc, bench_iv, 0);
+            wc_Chacha_Process(&enc, bench_cipher, bench_plain, BENCH_SIZE);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("CHACHA", 0, count, start);
 }
 #endif /* HAVE_CHACHA*/
 
 #if defined(HAVE_CHACHA) && defined(HAVE_POLY1305)
 void bench_chacha20_poly1305_aead(void)
 {
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
     byte authTag[CHACHA20_POLY1305_AEAD_AUTHTAG_SIZE];
-    XMEMSET( authTag, 0, sizeof( authTag ) );
-
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
-
-    for (i = 0; i < numBlocks; i++)
-    {
-        wc_ChaCha20Poly1305_Encrypt(key, iv, NULL, 0, plain, sizeof(plain),
-                                    cipher, authTag );
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("CHA-POLY %d %s took %5.3f seconds, %8.3f MB/s",
-           numBlocks, blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    XMEMSET(authTag, 0, sizeof(authTag));
 
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_ChaCha20Poly1305_Encrypt(bench_key, bench_iv, NULL, 0,
+                bench_plain, BENCH_SIZE, bench_cipher, authTag);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("CHA-POLY", 0, count, start);
 }
 #endif /* HAVE_CHACHA && HAVE_POLY1305 */
 
 
 #ifndef NO_MD5
-void bench_md5(void)
+void bench_md5(int doAsync)
 {
-    Md5    hash;
-    byte   digest[MD5_DIGEST_SIZE];
-    double start, total, persec;
-    int    i;
+    Md5    hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, MD5_DIGEST_SIZE, HEAP_HINT);
 
-    wc_InitMd5(&hash);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++)
-        wc_Md5Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
 
-    wc_Md5Final(&hash, digest);
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitMd5_ex(&hash[i], HEAP_HINT,
+                    doAsync ? devId : INVALID_DEVID);
+        if (ret != 0) {
+            printf("InitMd5_ex failed, ret = %d\n", ret);
+            goto exit;
+        }
+    }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Md5Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_md5;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Md5Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_md5;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_md5:
+    bench_stats_sym_finish("MD5", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_md5 failed: %d\n", ret);
+    }
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Md5Free(&hash[i]);
+    }
 #endif
 
-    printf("MD5      %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
-#endif /* NO_MD5 */
+#endif /* !NO_MD5 */
 
 
 #ifndef NO_SHA
-void bench_sha(void)
+void bench_sha(int doAsync)
 {
-    Sha    hash;
-    byte   digest[SHA_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha    hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha(&hash);
-    if (ret != 0) {
-        printf("InitSha failed, ret = %d\n", ret);
-        return;
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
+        if (ret != 0) {
+            printf("InitSha failed, ret = %d\n", ret);
+            goto exit;
+        }
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_ShaUpdate(&hash, plain, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    wc_ShaFinal(&hash, digest);
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_ShaUpdate(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+        times = 0;
+        do {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_ShaFinal(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha:
+    bench_stats_sym_finish("SHA", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha failed: %d\n", ret);
+    }
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_ShaFree(&hash[i]);
+    }
 #endif
 
-    printf("SHA      %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif /* NO_SHA */
 
 
 #ifdef WOLFSSL_SHA224
-void bench_sha224(void)
+void bench_sha224(int doAsync)
 {
-    Sha224 hash;
-    byte   digest[SHA224_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha224 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA224_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha224(&hash);
-    if (ret != 0) {
-        printf("InitSha224 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha224Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha224_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha224Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha224_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha224Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha224Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha224Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha224;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha224Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha224;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha224:
+    bench_stats_sym_finish("SHA-224", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha224 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha224Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-224  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
 #ifndef NO_SHA256
-void bench_sha256(void)
+void bench_sha256(int doAsync)
 {
-    Sha256 hash;
-    byte   digest[SHA256_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha256 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA256_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha256(&hash);
-    if (ret != 0) {
-        printf("InitSha256 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha256Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha256_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha256Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha256_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha256Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha256Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha256Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha256;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha256Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha256;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha256:
+    bench_stats_sym_finish("SHA-256", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha256 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha256Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-256  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
 #ifdef WOLFSSL_SHA384
-void bench_sha384(void)
+void bench_sha384(int doAsync)
 {
-    Sha384 hash;
-    byte   digest[SHA384_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha384 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA384_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha384(&hash);
-    if (ret != 0) {
-        printf("InitSha384 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha384Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha384_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha384Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha384_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha384Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha384Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha384Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha384;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha384Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha384;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha384:
+    bench_stats_sym_finish("SHA-384", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha384 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha384Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-384  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
 #ifdef WOLFSSL_SHA512
-void bench_sha512(void)
+void bench_sha512(int doAsync)
 {
-    Sha512 hash;
-    byte   digest[SHA512_DIGEST_SIZE];
-    double start, total, persec;
-    int    i, ret;
+    Sha512 hash[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, SHA512_DIGEST_SIZE, HEAP_HINT);
 
-    ret = wc_InitSha512(&hash);
-    if (ret != 0) {
-        printf("InitSha512 failed, ret = %d\n", ret);
-        return;
-    }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
+    bench_async_begin();
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Sha512Update(&hash, plain, sizeof(plain));
+    /* clear for done cleanup */
+    XMEMSET(hash, 0, sizeof(hash));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        ret = wc_InitSha512_ex(&hash[i], HEAP_HINT,
+            doAsync ? devId : INVALID_DEVID);
         if (ret != 0) {
-            printf("Sha512Update failed, ret = %d\n", ret);
-            return;
+            printf("InitSha512_ex failed, ret = %d\n", ret);
+            goto exit;
         }
     }
 
-    ret = wc_Sha512Final(&hash, digest);
-    if (ret != 0) {
-        printf("Sha512Final failed, ret = %d\n", ret);
-        return;
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < numBlocks || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha512Update(&hash[i], bench_plain,
+                        BENCH_SIZE);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha512;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+
+        times = 0;
+        do {
+            bench_async_poll();
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, numBlocks)) {
+                    ret = wc_Sha512Final(&hash[i], digest[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×)) {
+                        goto exit_sha512;
+                    }
+                }
+            } /* for i */
+        } while (BENCH_ASYNC_IS_PEND());
+    } while (bench_stats_sym_check(start));
+exit_sha512:
+    bench_stats_sym_finish("SHA-512", doAsync, count, start);
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_sha512 failed: %d\n", ret);
     }
 
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_Sha512Free(&hash[i]);
+    }
 #endif
 
-    printf("SHA-512  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 #endif
 
@@ -1417,30 +2047,20 @@ void bench_ripemd(void)
 {
     RipeMd hash;
     byte   digest[RIPEMD_DIGEST_SIZE];
-    double start, total, persec;
-    int    i;
+    double start;
+    int    i, count;
 
     wc_InitRipeMd(&hash);
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++)
-        wc_RipeMdUpdate(&hash, plain, sizeof(plain));
-
-    wc_RipeMdFinal(&hash, digest);
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("RIPEMD   %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            wc_RipeMdUpdate(&hash, bench_plain, BENCH_SIZE);
+        }
+        wc_RipeMdFinal(&hash, digest);
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("RIPEMD", 0, count, start);
 }
 #endif
 
@@ -1450,43 +2070,32 @@ void bench_blake2(void)
 {
     Blake2b b2b;
     byte    digest[64];
-    double  start, total, persec;
-    int     i, ret;
+    double  start;
+    int     ret, i, count;
 
     ret = wc_InitBlake2b(&b2b, 64);
     if (ret != 0) {
         printf("InitBlake2b failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_Blake2bUpdate(&b2b, plain, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            ret = wc_Blake2bUpdate(&b2b, bench_plain, BENCH_SIZE);
+            if (ret != 0) {
+                printf("Blake2bUpdate failed, ret = %d\n", ret);
+                return;
+            }
+        }
+        ret = wc_Blake2bFinal(&b2b, digest, 64);
         if (ret != 0) {
-            printf("Blake2bUpdate failed, ret = %d\n", ret);
+            printf("Blake2bFinal failed, ret = %d\n", ret);
             return;
         }
-    }
-
-    ret = wc_Blake2bFinal(&b2b, digest, 64);
-    if (ret != 0) {
-        printf("Blake2bFinal failed, ret = %d\n", ret);
-        return;
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("BLAKE2b  %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("BLAKE2b", 0, count, start);
 }
 #endif
 
@@ -1498,43 +2107,32 @@ void bench_cmac(void)
     Cmac    cmac;
     byte    digest[AES_BLOCK_SIZE];
     word32  digestSz = sizeof(digest);
-    double  start, total, persec;
-    int     i, ret;
+    double  start;
+    int     ret, i, count;
 
-    ret = wc_InitCmac(&cmac, key, 16, WC_CMAC_AES, NULL);
+    ret = wc_InitCmac(&cmac, bench_key, 16, WC_CMAC_AES, NULL);
     if (ret != 0) {
         printf("InitCmac failed, ret = %d\n", ret);
         return;
     }
-    start = current_time(1);
-    BEGIN_INTEL_CYCLES
 
-    for(i = 0; i < numBlocks; i++) {
-        ret = wc_CmacUpdate(&cmac, plain, sizeof(plain));
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < numBlocks; i++) {
+            ret = wc_CmacUpdate(&cmac, bench_plain, BENCH_SIZE);
+            if (ret != 0) {
+                printf("CmacUpdate failed, ret = %d\n", ret);
+                return;
+            }
+        }
+        ret = wc_CmacFinal(&cmac, digest, &digestSz);
         if (ret != 0) {
-            printf("CmacUpdate failed, ret = %d\n", ret);
+            printf("CmacFinal failed, ret = %d\n", ret);
             return;
         }
-    }
-
-    ret = wc_CmacFinal(&cmac, digest, &digestSz);
-    if (ret != 0) {
-        printf("CmacFinal failed, ret = %d\n", ret);
-        return;
-    }
-
-    END_INTEL_CYCLES
-    total = current_time(0) - start;
-    persec = 1 / total * numBlocks;
-#ifdef BENCH_EMBEDDED
-    /* since using kB, convert to MB/s */
-    persec = persec / 1024;
-#endif
-
-    printf("AES-CMAC %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
-                                              blockType, total, persec);
-    SHOW_INTEL_CYCLES
-    printf("\n");
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_sym_finish("AES-CMAC", 0, count, start);
 }
 
 #endif /* WOLFSSL_CMAC */
@@ -1547,27 +2145,86 @@ void bench_scrypt(void)
     double start, total, each, milliEach;
     int    ret, i;
 
-    start = current_time(1);
-    for (i = 0; i < scryptCnt; i++) {
-        ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
-                        (byte*)"SodiumChloride", 14, 14, 8, 1, sizeof(derived));
-        if (ret != 0) {
-            printf("scrypt failed, ret = %d\n", ret);
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < scryptCnt; i++) {
+            ret = wc_scrypt(derived, (byte*)"pleaseletmein", 13,
+                            (byte*)"SodiumChloride", 14, 14, 8, 1, sizeof(derived));
+            if (ret != 0) {
+                printf("scrypt failed, ret = %d\n", ret);
+                goto exit;
+            }
         }
-    }
-    total = current_time(0) - start;
-    each  = total / scryptCnt;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("scrypt   %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, scryptCnt);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("scrypt", 0, "", 0, count, start);
 }
 
 #endif /* HAVE_SCRYPT */
 
 #ifndef NO_RSA
 
+#if defined(WOLFSSL_KEY_GEN)
+void bench_rsaKeyGen(int doAsync)
+{
+    RsaKey genKey[BENCH_MAX_PENDING];
+    double start;
+    int    ret, i, count = 0, times;
+    int    k, keySz;
+    const int  keySizes[2] = {1024, 2048};
+    const long rsa_e_val = 65537;
+
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(genKey, 0, sizeof(genKey));
+
+    for (k = 0; k < (int)(sizeof(keySizes)/sizeof(int)); k++) {
+        keySz = keySizes[k];
+
+        bench_stats_start(&count, &start);
+        do {
+            /* while free pending slots in queue, submit ops */
+            for (times = 0; times < genTimes || BENCH_ASYNC_IS_PEND(); ) {
+                bench_async_poll();
+
+                for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                    if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×, genTimes)) {
+
+                        wc_FreeRsaKey(&genKey[i]);
+                        ret = wc_InitRsaKey_ex(&genKey[i], HEAP_HINT,
+                            doAsync ? devId : INVALID_DEVID);
+                        if (ret < 0) {
+                            goto exit;
+                        }
+
+                        ret = wc_MakeRsaKey(&genKey[i], keySz, rsa_e_val, &rng);
+                        if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×)) {
+                            goto exit;
+                        }
+                    }
+                } /* for i */
+            } /* for times */
+            count += times;
+        } while (bench_stats_sym_check(start));
+    exit:
+        bench_stats_asym_finish("RSA", keySz, "key gen", doAsync, count, start);
+
+        if (ret < 0) {
+            printf("bench_rsaKeyGen failed: %d\n", ret);
+            break;
+        }
+    }
+
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_FreeRsaKey(&genKey[i]);
+    }
+
+    bench_async_end();
+}
+#endif /* WOLFSSL_KEY_GEN */
 
 #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
     #if defined(WOLFSSL_MDK_SHELL)
@@ -1581,21 +2238,23 @@ void bench_scrypt(void)
     #endif
 #endif
 
-void bench_rsa(void)
+#define RSA_BUF_SIZE 256  /* for up to 2048 bit */
+
+void bench_rsa(int doAsync)
 {
-    int    i;
-    int    ret;
-    size_t bytes;
-    word32 idx = 0;
+    int         ret, i, times, count = 0;
+    size_t      bytes;
+    word32      idx = 0;
     const byte* tmp;
+    const char* messageStr = "Everyone gets Friday off.";
+    const int   len = (int)XSTRLEN((char*)messageStr);
+    double      start = 0.0f;
+    RsaKey      rsaKey[BENCH_MAX_PENDING];
+    int         rsaKeySz = RSA_BUF_SIZE * 8; /* used in printf */
 
-    const byte message[] = "Everyone gets Friday off.";
-    byte      enc[256];  /* for up to 2048 bit */
-    const int len = (int)strlen((char*)message);
-    double    start, total, each, milliEach;
-
-    RsaKey rsaKey;
-    int    rsaKeySz = 2048; /* used in printf */
+    DECLARE_VAR_INIT(message, byte, len, messageStr, HEAP_HINT);
+    DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, RSA_BUF_SIZE, HEAP_HINT);
+    DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, RSA_BUF_SIZE, HEAP_HINT);
 
 #ifdef USE_CERT_BUFFERS_1024
     tmp = rsa_key_der_1024;
@@ -1608,271 +2267,112 @@ void bench_rsa(void)
     #error "need a cert buffer size"
 #endif /* USE_CERT_BUFFERS */
 
-    if ((ret = wc_InitRsaKey(&rsaKey, HEAP_HINT)) < 0) {
-        printf("InitRsaKey failed! %d\n", ret);
-        return;
-    }
-
-    /* decode the private key */
-    ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey, (word32)bytes);
-
-    start = current_time(1);
-
-    for (i = 0; i < ntimes; i++) {
-        ret = wc_RsaPublicEncrypt(message, len, enc, sizeof(enc),
-                                                        &rsaKey, &rng);
-        if (ret < 0) {
-            break;
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d public          %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
-
-    if (ret < 0) {
-        printf("Rsa Public Encrypt failed! %d\n", ret);
-        return;
-    }
-
-#ifdef WC_RSA_BLINDING
-    wc_RsaSetRNG(&rsaKey, &rng);
-#endif
-    start = current_time(1);
-
-    /* capture resulting encrypt length */
-    idx = ret;
-
-    for (i = 0; i < ntimes; i++) {
-        byte  out[256];  /* for up to 2048 bit */
-
-        ret = wc_RsaPrivateDecrypt(enc, idx, out, sizeof(out), &rsaKey);
-        if (ret < 0 && ret != WC_PENDING_E) {
-            break;
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d private         %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
-
-    wc_FreeRsaKey(&rsaKey);
-}
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-void bench_rsa_async(void)
-{
-    int    i;
-    int    ret;
-    size_t bytes;
-    word32 idx = 0;
-    const byte* tmp;
-
-    const byte message[] = "Everyone gets Friday off.";
-    byte      enc[256];  /* for up to 2048 bit */
-    const int len = (int)strlen((char*)message);
-    double    start, total, each, milliEach;
-
-    RsaKey rsaKey[WOLF_ASYNC_MAX_PENDING];
-    int    rsaKeySz = 2048; /* used in printf */
-
-    WOLF_EVENT events[WOLF_ASYNC_MAX_PENDING];
-    WOLF_EVENT_QUEUE eventQueue;
-    int evtNum, asyncDone, asyncPend;
-
-#ifdef USE_CERT_BUFFERS_1024
-    tmp = rsa_key_der_1024;
-    bytes = sizeof_rsa_key_der_1024;
-    rsaKeySz = 1024;
-#elif defined(USE_CERT_BUFFERS_2048)
-    tmp = rsa_key_der_2048;
-    bytes = sizeof_rsa_key_der_2048;
-#else
-    #error "need a cert buffer size"
-#endif /* USE_CERT_BUFFERS */
-
-    /* init event queue */
-    ret = wolfEventQueue_Init(&eventQueue);
-    if (ret != 0) {
-        return;
-    }
+    bench_async_begin();
 
     /* clear for done cleanup */
-    XMEMSET(&events, 0, sizeof(events));
-    XMEMSET(&rsaKey, 0, sizeof(rsaKey));
+    XMEMSET(rsaKey, 0, sizeof(rsaKey));
 
-    /* init events and keys */
-    for (i = 0; i < WOLF_ASYNC_MAX_PENDING; i++) {
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
         /* setup an async context for each key */
-        if ((ret = wc_InitRsaKey_ex(&rsaKey[i], HEAP_HINT, devId)) < 0) {
-            goto done;
+        if ((ret = wc_InitRsaKey_ex(&rsaKey[i], HEAP_HINT,
+                                        doAsync ? devId : INVALID_DEVID)) < 0) {
+            goto exit;
         }
+
     #ifdef WC_RSA_BLINDING
         wc_RsaSetRNG(&rsaKey[i], &rng);
     #endif
-        if ((ret = wolfAsync_EventInit(&events[i],
-                WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &rsaKey[i].asyncDev)) != 0) {
-            goto done;
-        }
-        events[i].pending = 0; /* Reset pending flag */
 
         /* decode the private key */
         idx = 0;
         if ((ret = wc_RsaPrivateKeyDecode(tmp, &idx, &rsaKey[i],
                                                         (word32)bytes)) != 0) {
             printf("wc_RsaPrivateKeyDecode failed! %d\n", ret);
-            goto done;
+            goto exit;
         }
     }
 
-    /* begin public async RSA */
-    start = current_time(1);
+    /* begin public RSA */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < ntimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    asyncPend = 0;
-    for (i = 0; i < ntimes; ) {
-
-        /* while free pending slots in queue, submit RSA operations */
-        for (evtNum = 0; evtNum < WOLF_ASYNC_MAX_PENDING; evtNum++) {
-            if (events[evtNum].done || (events[evtNum].pending == 0 &&
-                                                    (i + asyncPend) < ntimes))
-            {
-                /* check for event error */
-                if (events[evtNum].ret != WC_PENDING_E && events[evtNum].ret < 0) {
-                    printf("wc_RsaPublicEncrypt: Async event error: %d\n", events[evtNum].ret);
-                    goto done;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, ntimes)) {
+                    ret = wc_RsaPublicEncrypt(message, len, enc[i],
+                                            RSA_BUF_SIZE, &rsaKey[i], &rng);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×)) {
+                        goto exit_rsa_pub;
+                    }
                 }
-
-                ret = wc_RsaPublicEncrypt(message, len, enc, sizeof(enc),
-                                                        &rsaKey[evtNum], &rng);
-                if (ret == WC_PENDING_E) {
-                    ret = wc_RsaAsyncHandle(&rsaKey[evtNum], &eventQueue,
-                                                            &events[evtNum]);
-                    if (ret != 0) goto done;
-                    asyncPend++;
-                }
-                else if (ret >= 0) {
-                    /* operation completed */
-                    i++;
-                    asyncPend--;
-                    events[evtNum].done = 0;
-                }
-                else {
-                    printf("wc_RsaPublicEncrypt failed: %d\n", ret);
-                    goto done;
-                }
-            }
-        } /* for evtNum */
-
-        /* poll until there are events done */
-        if (asyncPend > 0) {
-            do {
-                ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
-                                        WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
-                if (ret != 0) goto done;
-            } while (asyncDone == 0);
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d public async    %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_rsa_pub:
+    bench_stats_asym_finish("RSA", rsaKeySz, "public", doAsync, count, start);
 
     if (ret < 0) {
-        goto done;
+        goto exit;
     }
 
-
-    /* begin private async RSA */
-    start = current_time(1);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&rsaKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
 
     /* capture resulting encrypt length */
-    idx = sizeof(enc); /* fixed at 2048 bit */
+    idx = RSA_BUF_SIZE; /* fixed at 2048 bit */
 
-    asyncPend = 0;
-    for (i = 0; i < ntimes; ) {
-        byte  out[256];  /* for up to 2048 bit */
+    /* begin private async RSA */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < ntimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-        /* while free pending slots in queue, submit RSA operations */
-        for (evtNum = 0; evtNum < WOLF_ASYNC_MAX_PENDING; evtNum++) {
-            if (events[evtNum].done || (events[evtNum].pending == 0 &&
-                                                    (i + asyncPend) < ntimes))
-            {
-                /* check for event error */
-                if (events[evtNum].ret != WC_PENDING_E && events[evtNum].ret < 0) {
-                    printf("wc_RsaPrivateDecrypt: Async event error: %d\n", events[evtNum].ret);
-                    goto done;
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, ntimes)) {
+                    ret = wc_RsaPrivateDecrypt(enc[i], idx, out[i],
+                                                    RSA_BUF_SIZE, &rsaKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×)) {
+                        goto exit;
+                    }
                 }
-
-                ret = wc_RsaPrivateDecrypt(enc, idx, out, sizeof(out),
-                                                            &rsaKey[evtNum]);
-                if (ret == WC_PENDING_E) {
-                    ret = wc_RsaAsyncHandle(&rsaKey[evtNum], &eventQueue,
-                                                            &events[evtNum]);
-                    if (ret != 0) goto done;
-                    asyncPend++;
-                }
-                else if (ret >= 0) {
-                    /* operation completed */
-                    i++;
-                    asyncPend--;
-                    events[evtNum].done = 0;
-                }
-                else {
-                    printf("wc_RsaPrivateDecrypt failed: %d\n", ret);
-                    goto done;
-                }
-            }
-        } /* for evtNum */
-
-        /* poll until there are events done */
-        if (asyncPend > 0) {
-            do {
-                ret = wolfAsync_EventQueuePoll(&eventQueue, NULL, NULL, 0,
-                                        WOLF_POLL_FLAG_CHECK_HW, &asyncDone);
-                if (ret != 0) goto done;
-            } while (asyncDone == 0);
-        }
-    } /* for ntimes */
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("RSA %d private async   %6.3f milliseconds, avg over %d"
-           " iterations\n", rsaKeySz, milliEach, ntimes);
-
-done:
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("RSA", rsaKeySz, "private", doAsync, count, start);
 
     if (ret < 0) {
-        printf("bench_rsa_async failed: %d\n", ret);
+        printf("bench_rsa failed: %d\n", ret);
     }
 
     /* cleanup */
-    for (i = 0; i < WOLF_ASYNC_MAX_PENDING; i++) {
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
         wc_FreeRsaKey(&rsaKey[i]);
     }
 
-    /* free event queue */
-    wolfEventQueue_Free(&eventQueue);
+    FREE_ARRAY(enc, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_ARRAY(out, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_VAR(message, HEAP_HINT);
+
+    bench_async_end();
 }
-#endif /* WOLFSSL_ASYNC_CRYPT */
 
 #endif /* !NO_RSA */
 
 
 #ifndef NO_DH
 
-
 #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048)
     #if defined(WOLFSSL_MDK_SHELL)
         static char *certDHname = "certs/dh2048.der";
@@ -1887,27 +2387,35 @@ done:
     #endif
 #endif
 
-void bench_dh(void)
+#define BENCH_DH_KEY_SIZE  256 /* for 2048 bit */
+#define BENCH_DH_PRIV_SIZE (BENCH_DH_KEY_SIZE/8)
+
+void bench_dh(int doAsync)
 {
-    int    i ;
-    size_t bytes;
-    word32 idx = 0, pubSz, privSz = 0, pubSz2, privSz2, agreeSz;
+    int    ret, i;
+    int    count = 0, times;
     const byte* tmp = NULL;
-
-    byte   pub[256];    /* for 2048 bit */
-    byte   pub2[256];   /* for 2048 bit */
-    byte   agree[256];  /* for 2048 bit */
-    byte   priv[32];    /* for 2048 bit */
-    byte   priv2[32];   /* for 2048 bit */
-
-    double start, total, each, milliEach;
-    DhKey  dhKey;
+    double start = 0.0f;
+    DhKey  dhKey[BENCH_MAX_PENDING];
     int    dhKeySz = 2048; /* used in printf */
+#ifndef NO_ASN
+    size_t bytes;
+    word32 idx;
+#endif
+    word32 pubSz[BENCH_MAX_PENDING];
+    word32 privSz[BENCH_MAX_PENDING];
+    word32 pubSz2;
+    word32 privSz2;
+    word32 agreeSz[BENCH_MAX_PENDING];
+
+    DECLARE_ARRAY(pub, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
+    DECLARE_VAR(pub2, byte, BENCH_DH_KEY_SIZE, HEAP_HINT);
+    DECLARE_ARRAY(agree, byte, BENCH_MAX_PENDING, BENCH_DH_KEY_SIZE, HEAP_HINT);
+    DECLARE_ARRAY(priv, byte, BENCH_MAX_PENDING, BENCH_DH_PRIV_SIZE, HEAP_HINT);
+    DECLARE_VAR(priv2, byte, BENCH_DH_PRIV_SIZE, HEAP_HINT);
 
-    (void)idx;
     (void)tmp;
 
-
 #if defined(NO_ASN)
     dhKeySz = 1024;
     /* do nothing, but don't use default FILE */
@@ -1922,90 +2430,112 @@ void bench_dh(void)
     #error "need to define a cert buffer size"
 #endif /* USE_CERT_BUFFERS */
 
+    bench_async_begin();
 
-    if (wc_InitDhKey(&dhKey) != 0) {
-        printf("InitDhKey failed!\n");
-        return;
+    /* clear for done cleanup */
+    XMEMSET(dhKey, 0, sizeof(dhKey));
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        /* setup an async context for each key */
+        ret = wc_InitDhKey_ex(&dhKey[i], HEAP_HINT,
+                        doAsync ? devId : INVALID_DEVID);
+        if (ret != 0)
+            goto exit;
+
+        /* setup key */
+    #ifdef NO_ASN
+        ret = wc_DhSetKey(&dhKey[i], dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
+    #else
+        idx = 0;
+        ret = wc_DhKeyDecode(tmp, &idx, &dhKey[i], (word32)bytes);
+    #endif
+        if (ret != 0) {
+            printf("DhKeyDecode failed %d, can't benchmark\n", ret);
+            goto exit;
+        }
     }
 
-#ifdef NO_ASN
-    bytes = wc_DhSetKey(&dhKey, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-#else
-    bytes = wc_DhKeyDecode(tmp, &idx, &dhKey, (word32)bytes);
-#endif
-    if (bytes != 0) {
-        printf("dhekydecode failed, can't benchmark\n");
-        return;
+    /* Key Gen */
+    bench_stats_start(&count, &start);
+    do {
+        /* while free pending slots in queue, submit ops */
+        for (times = 0; times < genTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×, genTimes)) {
+                    privSz[i] = 0;
+                    ret = wc_DhGenerateKeyPair(&dhKey[i], &rng, priv[i], &privSz[i],
+                        pub[i], &pubSz[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×)) {
+                        goto exit_dh_gen;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_dh_gen:
+    bench_stats_asym_finish("DH", dhKeySz, "key gen", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
     }
 
-    start = current_time(1);
+    /* Generate key to use as other public */
+    ret = wc_DhGenerateKeyPair(&dhKey[0], &rng, priv2, &privSz2, pub2, &pubSz2);
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wc_AsyncWait(ret, &dhKey[0].asyncDev, WC_ASYNC_FLAG_NONE);
 
-    for (i = 0; i < ntimes; i++)
-        wc_DhGenerateKeyPair(&dhKey, &rng, priv, &privSz, pub, &pubSz);
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("DH  %d key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", dhKeySz, milliEach, ntimes);
-
-    wc_DhGenerateKeyPair(&dhKey, &rng, priv2, &privSz2, pub2, &pubSz2);
-    start = current_time(1);
-
-    for (i = 0; i < ntimes; i++)
-        wc_DhAgree(&dhKey, agree, &agreeSz, priv, privSz, pub2, pubSz2);
-
-    total = current_time(0) - start;
-    each  = total / ntimes;   /* per second   */
-    milliEach = each * 1000; /* milliseconds */
-
-    printf("DH  %d key agreement   %6.3f milliseconds, avg over %d"
-           " iterations\n", dhKeySz, milliEach, ntimes);
-
-    wc_FreeDhKey(&dhKey);
-}
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&dhKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
 #endif
 
-#if defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA)
-void bench_rsaKeyGen(void)
-{
-    RsaKey genKey;
-    double start, total, each, milliEach;
-    int    i;
+    /* Key Agree */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
 
-    /* 1024 bit */
-    start = current_time(1);
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×, agreeTimes)) {
+                    ret = wc_DhAgree(&dhKey[i], agree[i], &agreeSz[i], priv[i], privSz[i],
+                        pub2, pubSz2);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dhKey[i]), 0, ×)) {
+                        goto exit;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("DH", dhKeySz, "key agree", doAsync, count, start);
 
-    for(i = 0; i < genTimes; i++) {
-        wc_InitRsaKey(&genKey, HEAP_HINT);
-        wc_MakeRsaKey(&genKey, 1024, 65537, &rng);
-        wc_FreeRsaKey(&genKey);
+    if (ret < 0) {
+        printf("bench_dh failed: %d\n", ret);
     }
 
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("RSA 1024 key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
-
-    /* 2048 bit */
-    start = current_time(1);
-
-    for(i = 0; i < genTimes; i++) {
-        wc_InitRsaKey(&genKey, HEAP_HINT);
-        wc_MakeRsaKey(&genKey, 2048, 65537, &rng);
-        wc_FreeRsaKey(&genKey);
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_FreeDhKey(&dhKey[i]);
     }
 
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("RSA 2048 key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    FREE_ARRAY(pub, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_VAR(pub2, HEAP_HINT);
+    FREE_ARRAY(priv, BENCH_MAX_PENDING, HEAP_HINT);
+    FREE_VAR(priv2, HEAP_HINT);
+    FREE_ARRAY(agree, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
-#endif /* WOLFSSL_KEY_GEN */
+#endif /* !NO_DH */
+
 #ifdef HAVE_NTRU
 byte GetEntropy(ENTROPY_CMD cmd, byte* out);
 
@@ -2031,7 +2561,7 @@ byte GetEntropy(ENTROPY_CMD cmd, byte* out)
 void bench_ntru(void)
 {
     int    i;
-    double start, total, each, milliEach;
+    double start;
 
     byte   public_key[1027];
     word16 public_key_len = sizeof(public_key);
@@ -2056,7 +2586,6 @@ void bench_ntru(void)
         'w', 'o', 'l', 'f', 'S', 'S', 'L', ' ', 'N', 'T', 'R', 'U'
     };
 
-    printf("\n");
     for (ntruBits = 128; ntruBits < 257; ntruBits += 64) {
         switch (ntruBits) {
             case 128:
@@ -2111,8 +2640,8 @@ void bench_ntru(void)
             printf("NTRU error occurred requesting the buffer size needed\n");
             return;
         }
-        start = current_time(1);
 
+        bench_stats_start(&i, &start);
         for (i = 0; i < ntimes; i++) {
             ret = ntru_crypto_ntru_encrypt(drbg, public_key_len, public_key,
                     sizeof(aes_key), aes_key, &ciphertext_len, ciphertext);
@@ -2121,21 +2650,14 @@ void bench_ntru(void)
                 return;
             }
         }
-        ret = ntru_crypto_drbg_uninstantiate(drbg);
+        bench_stats_asym_finish("NTRU", ntruBits, "encryption", 0, i, start);
 
+        ret = ntru_crypto_drbg_uninstantiate(drbg);
         if (ret != DRBG_OK) {
             printf("NTRU error occurred uninstantiating the DRBG\n");
             return;
         }
 
-        total = current_time(0) - start;
-        each  = total / ntimes;   /* per second   */
-        milliEach = each * 1000; /* milliseconds */
-
-        printf("NTRU %d encryption took %6.3f milliseconds, avg over %d"
-           " iterations\n", ntruBits, milliEach, ntimes);
-
-
         ret = ntru_crypto_ntru_decrypt(private_key_len, private_key,
                 ciphertext_len, ciphertext, &plaintext_len, NULL);
 
@@ -2145,8 +2667,8 @@ void bench_ntru(void)
         }
 
         plaintext_len = sizeof(plaintext);
-        start = current_time(1);
 
+        bench_stats_start(&i, &start);
         for (i = 0; i < ntimes; i++) {
             ret = ntru_crypto_ntru_decrypt(private_key_len, private_key,
                                       ciphertext_len, ciphertext,
@@ -2157,20 +2679,14 @@ void bench_ntru(void)
                 return;
             }
         }
-
-        total = current_time(0) - start;
-        each  = total / ntimes;   /* per second   */
-        milliEach = each * 1000; /* milliseconds */
-
-        printf("NTRU %d decryption took %6.3f milliseconds, avg over %d"
-           " iterations\n", ntruBits, milliEach, ntimes);
+        bench_stats_asym_finish("NTRU", ntruBits, "decryption", 0, i, start);
     }
 
 }
 
 void bench_ntruKeyGen(void)
 {
-    double start, total, each, milliEach;
+    double start;
     int    i;
 
     byte   public_key[1027];
@@ -2209,15 +2725,14 @@ void bench_ntruKeyGen(void)
         /* set key sizes */
         ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
                                                   NULL, &private_key_len, NULL);
-        start = current_time(1);
 
-        for(i = 0; i < genTimes; i++) {
+        bench_stats_start(&i, &start);
+        for (i = 0; i < genTimes; i++) {
             ret = ntru_crypto_ntru_encrypt_keygen(drbg, type, &public_key_len,
                                          public_key, &private_key_len,
                                          private_key);
         }
-
-        total = current_time(0) - start;
+        bench_stats_asym_finish("NTRU", ntruBits, "key gen", 0, i, start);
 
         if (ret != NTRU_OK) {
             printf("keygen failed\n");
@@ -2230,182 +2745,313 @@ void bench_ntruKeyGen(void)
             printf("NTRU drbg uninstantiate failed\n");
             return;
         }
-
-        each = total / genTimes;
-        milliEach = each * 1000;
-
-        printf("NTRU %d key generation  %6.3f milliseconds, avg over %d"
-            " iterations\n", ntruBits, milliEach, genTimes);
     }
 }
 #endif
 
 #ifdef HAVE_ECC
-void bench_eccKeyGen(void)
+#define BENCH_ECC_SIZE  32
+
+void bench_eccMakeKey(int doAsync)
 {
-    ecc_key genKey;
-    double start, total, each, milliEach;
-    int    i;
+    int ret, i, times, count;
+    const int keySize = BENCH_ECC_SIZE;
+    ecc_key genKey[BENCH_MAX_PENDING];
+    double start;
 
-    /* 256 bit */
-    start = current_time(1);
+    bench_async_begin();
 
-    for(i = 0; i < genTimes; i++) {
-        wc_ecc_init_ex(&genKey, HEAP_HINT, devId);
-        wc_ecc_make_key(&rng, 32, &genKey);
-        wc_ecc_free(&genKey);
+    /* clear for done cleanup */
+    XMEMSET(&genKey, 0, sizeof(genKey));
+
+    /* ECC Make Key */
+    bench_stats_start(&count, &start);
+    do {
+        /* while free pending slots in queue, submit ops */
+        for (times = 0; times < genTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×, genTimes)) {
+
+                    wc_ecc_free(&genKey[i]);
+                    ret = wc_ecc_init_ex(&genKey[i], HEAP_HINT, doAsync ? devId : INVALID_DEVID);
+                    if (ret < 0) {
+                        goto exit;
+                    }
+
+                    ret = wc_ecc_make_key(&rng, keySize, &genKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 0, ×)) {
+                        goto exit;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("ECC", keySize * 8, "key gen", doAsync, count, start);
+
+    if (ret < 0) {
+        printf("bench_eccMakeKey failed: %d\n", ret);
     }
 
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("ECC  256 key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_ecc_free(&genKey[i]);
+    }
+
+    bench_async_end();
+}
+
+void bench_ecc(int doAsync)
+{
+    int ret, i, times, count;
+    const int keySize = BENCH_ECC_SIZE;
+    ecc_key genKey[BENCH_MAX_PENDING];
+#ifdef HAVE_ECC_DHE
+    ecc_key genKey2[BENCH_MAX_PENDING];
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+#ifdef HAVE_ECC_VERIFY
+    int    verify[BENCH_MAX_PENDING];
+#endif
+#endif
+    word32 x[BENCH_MAX_PENDING];
+    double start;
+
+#ifdef HAVE_ECC_DHE
+    DECLARE_ARRAY(shared, byte, BENCH_MAX_PENDING, BENCH_ECC_SIZE, HEAP_HINT);
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+    DECLARE_ARRAY(sig, byte, BENCH_MAX_PENDING, ECC_MAX_SIG_SIZE, HEAP_HINT);
+#endif
+    DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, BENCH_ECC_SIZE, HEAP_HINT);
+
+    bench_async_begin();
+
+    /* clear for done cleanup */
+    XMEMSET(&genKey, 0, sizeof(genKey));
+#ifdef HAVE_ECC_DHE
+    XMEMSET(&genKey2, 0, sizeof(genKey2));
+#endif
+
+    /* init keys */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        /* setup an context for each key */
+        if ((ret = wc_ecc_init_ex(&genKey[i], HEAP_HINT,
+                                    doAsync ? devId : INVALID_DEVID)) < 0) {
+            goto exit;
+        }
+        ret = wc_ecc_make_key(&rng, keySize, &genKey[i]);
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wc_AsyncWait(ret, &genKey[i].asyncDev, WC_ASYNC_FLAG_NONE);
+    #endif
+        if (ret < 0) {
+            goto exit;
+        }
+
+    #ifdef HAVE_ECC_DHE
+        if ((ret = wc_ecc_init_ex(&genKey2[i], HEAP_HINT, INVALID_DEVID)) < 0) {
+            goto exit;
+        }
+        if ((ret = wc_ecc_make_key(&rng, keySize, &genKey2[i])) > 0) {
+            goto exit;
+        }
+    #endif
+    }
+
+#ifdef HAVE_ECC_DHE
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&genKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
+
+    /* ECC Shared Secret */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×, agreeTimes)) {
+                    x[i] = keySize;
+                    ret = wc_ecc_shared_secret(&genKey[i], &genKey2[i], shared[i], &x[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×)) {
+                        goto exit_ecdhe;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_ecdhe:
+    bench_stats_asym_finish("ECDHE", keySize * 8, "agree", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
+    }
+#endif /* HAVE_ECC_DHE */
+
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&genKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
+
+    /* Init digest to sign */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        for (count = 0; count < keySize; count++) {
+            digest[i][count] = (byte)count;
+        }
+    }
+
+    /* ECC Sign */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×, agreeTimes)) {
+                    if (genKey[i].state == 0)
+                        x[i] = ECC_MAX_SIG_SIZE;
+                    ret = wc_ecc_sign_hash(digest[i], keySize, sig[i], &x[i],
+                                                            &rng, &genKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×)) {
+                        goto exit_ecdsa_sign;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_ecdsa_sign:
+    bench_stats_asym_finish("ECDSA", keySize * 8, "sign", doAsync, count, start);
+
+    if (ret < 0) {
+        goto exit;
+    }
+
+#ifdef HAVE_ECC_VERIFY
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* Clear events */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        XMEMSET(&genKey[i].asyncDev.event, 0, sizeof(WOLF_EVENT));
+    }
+    asyncPending = 0;
+#endif
+
+    /* ECC Verify */
+    bench_stats_start(&count, &start);
+    do {
+        for (times = 0; times < agreeTimes || BENCH_ASYNC_IS_PEND(); ) {
+            bench_async_poll();
+
+            /* while free pending slots in queue, submit ops */
+            for (i = 0; i < BENCH_MAX_PENDING; i++) {
+                if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×, agreeTimes)) {
+                    if (genKey[i].state == 0)
+                        verify[i] = 0;
+                    ret = wc_ecc_verify_hash(sig[i], x[i], digest[i],
+                                        keySize, &verify[i], &genKey[i]);
+                    if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&genKey[i]), 1, ×)) {
+                        goto exit_ecdsa_verify;
+                    }
+                }
+            } /* for i */
+        } /* for times */
+        count += times;
+    } while (bench_stats_sym_check(start));
+exit_ecdsa_verify:
+    bench_stats_asym_finish("ECDSA", keySize * 8, "verify", doAsync, count, start);
+#endif /* HAVE_ECC_VERIFY */
+#endif /* !NO_ASN && HAVE_ECC_SIGN */
+
+exit:
+
+    if (ret < 0) {
+        printf("bench_ecc failed: %d\n", ret);
+    }
+
+    /* cleanup */
+    for (i = 0; i < BENCH_MAX_PENDING; i++) {
+        wc_ecc_free(&genKey[i]);
+    #ifdef HAVE_ECC_DHE
+        wc_ecc_free(&genKey2[i]);
+    #endif
+    }
+
+#ifdef HAVE_ECC_DHE
+    FREE_ARRAY(shared, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+#if !defined(NO_ASN) && defined(HAVE_ECC_SIGN)
+    FREE_ARRAY(sig, BENCH_MAX_PENDING, HEAP_HINT);
+#endif
+    FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT);
+
+    bench_async_end();
 }
 
 
-void bench_eccKeyAgree(void)
-{
-    ecc_key genKey, genKey2;
-    double start, total, each, milliEach;
-    int    i, ret;
-    byte   shared[32];
-#if !defined(NO_ASN) && !defined(NO_ECC_SIGN)
-    byte   sig[64+16];  /* der encoding too */
-#endif
-    byte   digest[32];
-    word32 x = 0;
-
-    wc_ecc_init_ex(&genKey, HEAP_HINT, devId);
-    wc_ecc_init_ex(&genKey2, HEAP_HINT, devId);
-
-    ret = wc_ecc_make_key(&rng, 32, &genKey);
-    if (ret != 0) {
-        printf("ecc_make_key failed\n");
-        return;
-    }
-    ret = wc_ecc_make_key(&rng, 32, &genKey2);
-    if (ret != 0) {
-        printf("ecc_make_key failed\n");
-        return;
-    }
-
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(shared);
-        ret = wc_ecc_shared_secret(&genKey, &genKey2, shared, &x);
-        if (ret != 0) {
-            printf("ecc_shared_secret failed\n");
-            return;
-        }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("EC-DHE   key agreement   %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
-
-    /* make dummy digest */
-    for (i = 0; i < (int)sizeof(digest); i++)
-        digest[i] = (byte)i;
-
-
-#if !defined(NO_ASN) && !defined(NO_ECC_SIGN)
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(sig);
-        ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, &rng, &genKey);
-        if (ret != 0) {
-            printf("ecc_sign_hash failed\n");
-            return;
-        }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("EC-DSA   sign   time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
-
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        int verify = 0;
-        ret = wc_ecc_verify_hash(sig, x, digest, sizeof(digest), &verify, &genKey);
-        if (ret != 0) {
-            printf("ecc_verify_hash failed\n");
-            return;
-        }
-    }
-#endif
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;     /* milliseconds */
-    printf("EC-DSA   verify time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
-
-    wc_ecc_free(&genKey2);
-    wc_ecc_free(&genKey);
-}
 #ifdef HAVE_ECC_ENCRYPT
 void bench_eccEncrypt(void)
 {
     ecc_key userA, userB;
+    const int keySize = BENCH_ECC_SIZE;
     byte    msg[48];
     byte    out[80];
     word32  outSz   = sizeof(out);
-    word32  plainSz = sizeof(plain);
-    int     ret, i;
-    double start, total, each, milliEach;
+    word32  bench_plainSz = BENCH_SIZE;
+    int     ret, i, count;
+    double start;
 
     wc_ecc_init_ex(&userA, HEAP_HINT, devId);
     wc_ecc_init_ex(&userB, HEAP_HINT, devId);
 
-    wc_ecc_make_key(&rng, 32, &userA);
-    wc_ecc_make_key(&rng, 32, &userB);
+    wc_ecc_make_key(&rng, keySize, &userA);
+    wc_ecc_make_key(&rng, keySize, &userB);
 
     for (i = 0; i < (int)sizeof(msg); i++)
         msg[i] = i;
 
-    start = current_time(1);
-
-    for(i = 0; i < ntimes; i++) {
-        /* encrypt msg to B */
-        ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
-        if (ret != 0) {
-            printf("wc_ecc_encrypt failed! %d\n", ret);
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < ntimes; i++) {
+            /* encrypt msg to B */
+            ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
+            if (ret != 0) {
+                printf("wc_ecc_encrypt failed! %d\n", ret);
+                goto exit_enc;
+            }
         }
-    }
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_enc:
+    bench_stats_asym_finish("ECC", keySize * 8, "encrypt", 0, count, start);
 
-    total = current_time(0) - start;
-    each  = total / ntimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("ECC      encrypt         %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, ntimes);
-
-    start = current_time(1);
-
-    for(i = 0; i < ntimes; i++) {
-        /* decrypt msg from A */
-        ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, NULL);
-        if (ret != 0) {
-            printf("wc_ecc_decrypt failed! %d\n", ret);
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < ntimes; i++) {
+            /* decrypt msg from A */
+            ret = wc_ecc_decrypt(&userB, &userA, out, outSz, bench_plain, &bench_plainSz, NULL);
+            if (ret != 0) {
+                printf("wc_ecc_decrypt failed! %d\n", ret);
+                goto exit_dec;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / ntimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("ECC      decrypt         %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, ntimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_dec:
+    bench_stats_asym_finish("ECC", keySize * 8, "decrypt", 0, count, start);
 
     /* cleanup */
     wc_ecc_free(&userB);
@@ -2418,31 +3064,27 @@ void bench_eccEncrypt(void)
 void bench_curve25519KeyGen(void)
 {
     curve25519_key genKey;
-    double start, total, each, milliEach;
-    int    i;
+    double start;
+    int    i, count;
 
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < genTimes; i++) {
-        wc_curve25519_make_key(&rng, 32, &genKey);
-        wc_curve25519_free(&genKey);
-    }
-
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("CURVE25519 256 key generation %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    /* Key Gen */
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < genTimes; i++) {
+            wc_curve25519_make_key(&rng, 32, &genKey);
+            wc_curve25519_free(&genKey);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_asym_finish("CURVE", 25519, "key gen", 0, count, start);
 }
 
 #ifdef HAVE_CURVE25519_SHARED_SECRET
 void bench_curve25519KeyAgree(void)
 {
     curve25519_key genKey, genKey2;
-    double start, total, each, milliEach;
-    int    i, ret;
+    double start;
+    int    ret, i, count;
     byte   shared[32];
     word32 x = 0;
 
@@ -2460,23 +3102,21 @@ void bench_curve25519KeyAgree(void)
         return;
     }
 
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(shared);
-        ret = wc_curve25519_shared_secret(&genKey, &genKey2, shared, &x);
-        if (ret != 0) {
-            printf("curve25519_shared_secret failed\n");
-            return;
+    /* Shared secret */
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < agreeTimes; i++) {
+            x = sizeof(shared);
+            ret = wc_curve25519_shared_secret(&genKey, &genKey2, shared, &x);
+            if (ret != 0) {
+                printf("curve25519_shared_secret failed\n");
+                goto exit;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("CURVE25519 key agreement      %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit:
+    bench_stats_asym_finish("CURVE", 25519, "key agree", 0, count, start);
 
     wc_curve25519_free(&genKey2);
     wc_curve25519_free(&genKey);
@@ -2488,24 +3128,20 @@ void bench_curve25519KeyAgree(void)
 void bench_ed25519KeyGen(void)
 {
     ed25519_key genKey;
-    double start, total, each, milliEach;
-    int    i;
+    double start;
+    int    i, count;
 
-    /* 256 bit */
-    start = current_time(1);
-
-    for(i = 0; i < genTimes; i++) {
-        wc_ed25519_init(&genKey);
-        wc_ed25519_make_key(&rng, 32, &genKey);
-        wc_ed25519_free(&genKey);
-    }
-
-    total = current_time(0) - start;
-    each  = total / genTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("\n");
-    printf("ED25519  key generation  %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, genTimes);
+    /* Key Gen */
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < genTimes; i++) {
+            wc_ed25519_init(&genKey);
+            wc_ed25519_make_key(&rng, 32, &genKey);
+            wc_ed25519_free(&genKey);
+        }
+        count += i;
+    } while (bench_stats_sym_check(start));
+    bench_stats_asym_finish("ED", 25519, "key gen", 0, count, start);
 }
 
 
@@ -2514,8 +3150,8 @@ void bench_ed25519KeySign(void)
     int    ret;
     ed25519_key genKey;
 #ifdef HAVE_ED25519_SIGN
-    double start, total, each, milliEach;
-    int    i;
+    double start;
+    int    i, count;
     byte   sig[ED25519_SIG_SIZE];
     byte   msg[512];
     word32 x = 0;
@@ -2534,41 +3170,37 @@ void bench_ed25519KeySign(void)
     for (i = 0; i < (int)sizeof(msg); i++)
         msg[i] = (byte)i;
 
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        x = sizeof(sig);
-        ret = wc_ed25519_sign_msg(msg, sizeof(msg), sig, &x, &genKey);
-        if (ret != 0) {
-            printf("ed25519_sign_msg failed\n");
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < agreeTimes; i++) {
+            x = sizeof(sig);
+            ret = wc_ed25519_sign_msg(msg, sizeof(msg), sig, &x, &genKey);
+            if (ret != 0) {
+                printf("ed25519_sign_msg failed\n");
+                goto exit_ed_sign;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;   /* milliseconds */
-    printf("ED25519  sign   time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_ed_sign:
+    bench_stats_asym_finish("ED", 25519, "sign", 0, count, start);
 
 #ifdef HAVE_ED25519_VERIFY
-    start = current_time(1);
-
-    for(i = 0; i < agreeTimes; i++) {
-        int verify = 0;
-        ret = wc_ed25519_verify_msg(sig, x, msg, sizeof(msg), &verify,
-                                    &genKey);
-        if (ret != 0 || verify != 1) {
-            printf("ed25519_verify_msg failed\n");
-            return;
+    bench_stats_start(&count, &start);
+    do {
+        for (i = 0; i < agreeTimes; i++) {
+            int verify = 0;
+            ret = wc_ed25519_verify_msg(sig, x, msg, sizeof(msg), &verify,
+                                        &genKey);
+            if (ret != 0 || verify != 1) {
+                printf("ed25519_verify_msg failed\n");
+                goto exit_ed_verify;
+            }
         }
-    }
-
-    total = current_time(0) - start;
-    each  = total / agreeTimes;  /* per second  */
-    milliEach = each * 1000;     /* milliseconds */
-    printf("ED25519  verify time     %6.3f milliseconds, avg over %d"
-           " iterations\n", milliEach, agreeTimes);
+        count += i;
+    } while (bench_stats_sym_check(start));
+exit_ed_verify:
+    bench_stats_asym_finish("ED", 25519, "verify", 0, count, start);
 #endif /* HAVE_ED25519_VERIFY */
 #endif /* HAVE_ED25519_SIGN */
 
@@ -2705,6 +3337,26 @@ static INLINE word64 get_intel_cycles(void)
 }
 
 #endif /* HAVE_GET_CYCLES */
+
+#ifndef NO_MAIN_DRIVER
+
+int main(int argc, char** argv)
+{
+    int ret = 0;
+
+#ifdef HAVE_STACK_SIZE
+    ret = StackSizeCheck(NULL, benchmark_test);
+#else
+    ret = benchmark_test(NULL);
+#endif
+
+    (void)argc;
+    (void)argv;
+
+    return ret;
+}
+#endif /* !NO_MAIN_DRIVER */
+
 #else
     #ifndef NO_MAIN_DRIVER
         int main() { return 0; }
diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c
old mode 100644
new mode 100755
index 7b8c4b40a..e872ae8a6
--- a/wolfcrypt/src/aes.c
+++ b/wolfcrypt/src/aes.c
@@ -30,176 +30,175 @@
 
 #include 
 
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
-int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
-                          int dir)
-{
-    return AesSetKey_fips(aes, key, len, iv, dir);
-}
+    int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv,
+                              int dir)
+    {
+        return AesSetKey_fips(aes, key, len, iv, dir);
+    }
+    int wc_AesSetIV(Aes* aes, const byte* iv)
+    {
+        return AesSetIV_fips(aes, iv);
+    }
+    #ifdef HAVE_AES_CBC
+        int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+        {
+            return AesCbcEncrypt_fips(aes, out, in, sz);
+        }
+        #ifdef HAVE_AES_DECRYPT
+            int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+            {
+                return AesCbcDecrypt_fips(aes, out, in, sz);
+            }
+        #endif /* HAVE_AES_DECRYPT */
+    #endif /* HAVE_AES_CBC */
 
+    /* AES-CTR */
+    #ifdef WOLFSSL_AES_COUNTER
+        void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
+        {
+            AesCtrEncrypt(aes, out, in, sz);
+        }
+    #endif
 
-int wc_AesSetIV(Aes* aes, const byte* iv)
-{
-    return AesSetIV_fips(aes, iv);
-}
+    /* AES-DIRECT */
+    #if defined(WOLFSSL_AES_DIRECT)
+        void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
+        {
+            AesEncryptDirect(aes, out, in);
+        }
 
+        #ifdef HAVE_AES_DECRYPT
+            void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
+            {
+                AesDecryptDirect(aes, out, in);
+            }
+        #endif /* HAVE_AES_DECRYPT */
 
-#ifdef HAVE_AES_CBC
-int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
-    return AesCbcEncrypt_fips(aes, out, in, sz);
-}
+        int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
+                                        const byte* iv, int dir)
+        {
+            return AesSetKeyDirect(aes, key, len, iv, dir);
+        }
+    #endif /* WOLFSSL_AES_DIRECT */
 
-#ifdef HAVE_AES_DECRYPT
-int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
-    return AesCbcDecrypt_fips(aes, out, in, sz);
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AES_CBC */
+    /* AES-GCM */
+    #ifdef HAVE_AESGCM
+        int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
+        {
+            return AesGcmSetKey_fips(aes, key, len);
+        }
+        int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+                                      const byte* iv, word32 ivSz,
+                                      byte* authTag, word32 authTagSz,
+                                      const byte* authIn, word32 authInSz)
+        {
+            return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
+                authTagSz, authIn, authInSz);
+        }
 
-/* AES-CTR */
-#ifdef WOLFSSL_AES_COUNTER
-void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
-{
-    AesCtrEncrypt(aes, out, in, sz);
-}
-#endif
+        #ifdef HAVE_AES_DECRYPT
+            int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
+                                          const byte* iv, word32 ivSz,
+                                          const byte* authTag, word32 authTagSz,
+                                          const byte* authIn, word32 authInSz)
+            {
+                return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag,
+                    authTagSz, authIn, authInSz);
+            }
+        #endif /* HAVE_AES_DECRYPT */
 
-/* AES-DIRECT */
-#if defined(WOLFSSL_AES_DIRECT)
-void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in)
-{
-    AesEncryptDirect(aes, out, in);
-}
+        int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
+        {
+            return GmacSetKey(gmac, key, len);
+        }
+        int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
+                                      const byte* authIn, word32 authInSz,
+                                      byte* authTag, word32 authTagSz)
+        {
+            return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
+                              authTag, authTagSz);
+        }
+    #endif /* HAVE_AESGCM */
 
-#ifdef HAVE_AES_DECRYPT
-void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in)
-{
-    AesDecryptDirect(aes, out, in);
-}
-#endif /* HAVE_AES_DECRYPT */
+    /* AES-CCM */
+    #ifdef HAVE_AESCCM
+        void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
+        {
+            AesCcmSetKey(aes, key, keySz);
+        }
+        int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
+                                      const byte* nonce, word32 nonceSz,
+                                      byte* authTag, word32 authTagSz,
+                                      const byte* authIn, word32 authInSz)
+        {
+            /* sanity check on arguments */
+            if (aes == NULL || out == NULL || in == NULL || nonce == NULL
+                    || authTag == NULL || nonceSz < 7 || nonceSz > 13)
+                return BAD_FUNC_ARG;
 
-int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len,
-                                const byte* iv, int dir)
-{
-    return AesSetKeyDirect(aes, key, len, iv, dir);
-}
-#endif
+            AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag,
+                authTagSz, authIn, authInSz);
+            return 0;
+        }
 
+        #ifdef HAVE_AES_DECRYPT
+            int  wc_AesCcmDecrypt(Aes* aes, byte* out,
+                const byte* in, word32 inSz,
+                const byte* nonce, word32 nonceSz,
+                const byte* authTag, word32 authTagSz,
+                const byte* authIn, word32 authInSz)
+            {
+                return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz,
+                    authTag, authTagSz, authIn, authInSz);
+            }
+        #endif /* HAVE_AES_DECRYPT */
+    #endif /* HAVE_AESCCM */
 
-#ifdef HAVE_AESGCM
-int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len)
-{
-    return AesGcmSetKey_fips(aes, key, len);
-}
+    int  wc_AesInit(Aes* aes, void* h, int i)
+    {
+        (void)aes;
+        (void)h;
+        (void)i;
+        /* FIPS doesn't support:
+            return AesInit(aes, h, i); */
+        return 0;
+    }
+    void wc_AesFree(Aes* aes)
+    {
+        (void)aes;
+        /* FIPS doesn't support:
+            AesFree(aes); */
+    }
 
-
-int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
-                              const byte* iv, word32 ivSz,
-                              byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
-                              authIn, authInSz);
-}
-
-#ifdef HAVE_AES_DECRYPT
-int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
-                              const byte* iv, word32 ivSz,
-                              const byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz,
-                              authIn, authInSz);
-}
-#endif /* HAVE_AES_DECRYPT */
-
-int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
-{
-    return GmacSetKey(gmac, key, len);
-}
-
-
-int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
-                              const byte* authIn, word32 authInSz,
-                              byte* authTag, word32 authTagSz)
-{
-    return GmacUpdate(gmac, iv, ivSz, authIn, authInSz,
-                      authTag, authTagSz);
-}
-
-#endif /* HAVE_AESGCM */
-#ifdef HAVE_AESCCM
-int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
-{
-    AesCcmSetKey(aes, key, keySz);
-    return 0;
-}
-
-
-int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
-                              const byte* nonce, word32 nonceSz,
-                              byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    /* sanity check on arguments */
-    if (aes == NULL || out == NULL || in == NULL || nonce == NULL
-            || authTag == NULL || nonceSz < 7 || nonceSz > 13)
-        return BAD_FUNC_ARG;
-
-    AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
-                  authIn, authInSz);
-    return 0;
-}
-
-#ifdef HAVE_AES_DECRYPT
-int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
-                              const byte* nonce, word32 nonceSz,
-                              const byte* authTag, word32 authTagSz,
-                              const byte* authIn, word32 authInSz)
-{
-    return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz,
-                         authIn, authInSz);
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AESCCM */
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-int  wc_AesAsyncInit(Aes* aes, int i)
-{
-    return AesAsyncInit(aes, i);
-}
-
-void wc_AesAsyncFree(Aes* aes)
-{
-    AesAsyncFree(aes);
-}
-#endif
 #else /* HAVE_FIPS */
 
-#ifdef WOLFSSL_TI_CRYPT
-#include 
+
+#if defined(WOLFSSL_TI_CRYPT)
+    #include 
 #else
 
 #include 
 #include 
+
 #ifdef NO_INLINE
     #include 
 #else
     #define WOLFSSL_MISC_INCLUDED
     #include 
 #endif
+
 #ifdef DEBUG_AESNI
     #include 
 #endif
 
-
 #ifdef _MSC_VER
     /* 4127 warning constant while(1)  */
     #pragma warning(disable: 4127)
 #endif
 
+
 /* Define AES implementation includes and functions */
 #if defined(STM32F2_CRYPTO) || defined(STM32F4_CRYPTO)
      /* STM32F2/F4 hardware AES support for CBC, CTR modes */
@@ -416,13 +415,12 @@ void wc_AesAsyncFree(Aes* aes)
         return ret;
     }
     #endif /* HAVE_AES_DECRYPT */
+
 #elif defined(WOLFSSL_PIC32MZ_CRYPT)
     /* NOTE: no support for AES-CCM/Direct */
     #define DEBUG_WOLFSSL
     #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h"
-#elif defined(HAVE_CAVIUM)
-    /* still leave SW crypto available */
-    #define NEED_AES_TABLES
+
 #elif defined(WOLFSSL_NRF51_AES)
     /* Use built-in AES hardware - AES 128 ECB Encrypt Only */
     #include "wolfssl/wolfcrypt/port/nrf51.h"
@@ -431,9 +429,176 @@ void wc_AesAsyncFree(Aes* aes)
     {
         return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock);
     }
+
     #ifdef HAVE_AES_DECRYPT
         #error nRF51 AES Hardware does not support decrypt
     #endif /* HAVE_AES_DECRYPT */
+
+
+#elif defined(WOLFSSL_AESNI)
+
+    #define NEED_AES_TABLES
+
+    /* Each platform needs to query info type 1 from cpuid to see if aesni is
+     * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+     */
+
+    #ifndef AESNI_ALIGN
+        #define AESNI_ALIGN 16
+    #endif
+
+    #ifndef _MSC_VER
+        #define cpuid(reg, func)\
+            __asm__ __volatile__ ("cpuid":\
+                 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+                 "a" (func));
+
+        #define XASM_LINK(f) asm(f)
+    #else
+
+        #include 
+        #define cpuid(a,b) __cpuid((int*)a,b)
+
+        #define XASM_LINK(f)
+    #endif /* _MSC_VER */
+
+
+    static int Check_CPU_support_AES(void)
+    {
+        unsigned int reg[4];  /* put a,b,c,d into 0,1,2,3 */
+        cpuid(reg, 1);        /* query info 1 */
+
+        if (reg[2] & 0x2000000)
+            return 1;
+
+        return 0;
+    }
+
+    static int checkAESNI = 0;
+    static int haveAESNI  = 0;
+
+
+    /* tell C compiler these are asm functions in case any mix up of ABI underscore
+       prefix between clang/gcc/llvm etc */
+    #ifdef HAVE_AES_CBC
+        void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
+                             unsigned char* ivec, unsigned long length,
+                             const unsigned char* KS, int nr)
+                             XASM_LINK("AES_CBC_encrypt");
+
+        #ifdef HAVE_AES_DECRYPT
+            #if defined(WOLFSSL_AESNI_BY4)
+                void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
+                                         unsigned char* ivec, unsigned long length,
+                                         const unsigned char* KS, int nr)
+                                         XASM_LINK("AES_CBC_decrypt_by4");
+            #elif defined(WOLFSSL_AESNI_BY6)
+                void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
+                                         unsigned char* ivec, unsigned long length,
+                                         const unsigned char* KS, int nr)
+                                         XASM_LINK("AES_CBC_decrypt_by6");
+            #else /* WOLFSSL_AESNI_BYx */
+                void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
+                                         unsigned char* ivec, unsigned long length,
+                                         const unsigned char* KS, int nr)
+                                         XASM_LINK("AES_CBC_decrypt_by8");
+            #endif /* WOLFSSL_AESNI_BYx */
+        #endif /* HAVE_AES_DECRYPT */
+    #endif /* HAVE_AES_CBC */
+
+    void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
+                         unsigned long length, const unsigned char* KS, int nr)
+                         XASM_LINK("AES_ECB_encrypt");
+
+    #ifdef HAVE_AES_DECRYPT
+        void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
+                             unsigned long length, const unsigned char* KS, int nr)
+                             XASM_LINK("AES_ECB_decrypt");
+    #endif
+
+    void AES_128_Key_Expansion(const unsigned char* userkey,
+                               unsigned char* key_schedule)
+                               XASM_LINK("AES_128_Key_Expansion");
+
+    void AES_192_Key_Expansion(const unsigned char* userkey,
+                               unsigned char* key_schedule)
+                               XASM_LINK("AES_192_Key_Expansion");
+
+    void AES_256_Key_Expansion(const unsigned char* userkey,
+                               unsigned char* key_schedule)
+                               XASM_LINK("AES_256_Key_Expansion");
+
+
+    static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+                                   Aes* aes)
+    {
+        int ret;
+
+        if (!userKey || !aes)
+            return BAD_FUNC_ARG;
+
+        switch (bits) {
+            case 128:
+               AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
+               return 0;
+            case 192:
+               AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
+               return 0;
+            case 256:
+               AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
+               return 0;
+            default:
+                ret = BAD_FUNC_ARG;
+        }
+
+        return ret;
+    }
+
+    #ifdef HAVE_AES_DECRYPT
+        static int AES_set_decrypt_key(const unsigned char* userKey,
+                                                    const int bits, Aes* aes)
+        {
+            int nr;
+            Aes temp_key;
+            __m128i *Key_Schedule = (__m128i*)aes->key;
+            __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
+
+            if (!userKey || !aes)
+                return BAD_FUNC_ARG;
+
+            if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
+                return BAD_FUNC_ARG;
+
+            nr = temp_key.rounds;
+            aes->rounds = nr;
+
+            Key_Schedule[nr] = Temp_Key_Schedule[0];
+            Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
+            Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
+            Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
+            Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
+            Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
+            Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
+            Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
+            Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
+            Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
+
+            if (nr>10) {
+                Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
+                Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
+            }
+
+            if (nr>12) {
+                Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
+                Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
+            }
+
+            Key_Schedule[0] = Temp_Key_Schedule[nr];
+
+            return 0;
+        }
+    #endif /* HAVE_AES_DECRYPT */
+
 #else
 
     /* using wolfCrypt software AES implementation */
@@ -441,6 +606,7 @@ void wc_AesAsyncFree(Aes* aes)
 #endif
 
 
+
 #ifdef NEED_AES_TABLES
 
 static const word32 rcon[] = {
@@ -1027,166 +1193,8 @@ static const byte Td4[256] =
 #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
 
 
-#ifdef WOLFSSL_AESNI
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-
-    #define cpuid(reg, func)\
-        __asm__ __volatile__ ("cpuid":\
-             "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
-             "a" (func));
-
-    #define XASM_LINK(f) asm(f)
-#else
-
-    #include 
-    #define cpuid(a,b) __cpuid((int*)a,b)
-
-    #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-
-static int Check_CPU_support_AES(void)
-{
-    unsigned int reg[4];  /* put a,b,c,d into 0,1,2,3 */
-    cpuid(reg, 1);        /* query info 1 */
-
-    if (reg[2] & 0x2000000)
-        return 1;
-
-    return 0;
-}
-
-static int checkAESNI = 0;
-static int haveAESNI  = 0;
-
-
-/* tell C compiler these are asm functions in case any mix up of ABI underscore
-   prefix between clang/gcc/llvm etc */
-#ifdef HAVE_AES_CBC
-void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
-                     unsigned char* ivec, unsigned long length,
-                     const unsigned char* KS, int nr)
-                     XASM_LINK("AES_CBC_encrypt");
-
-#ifdef HAVE_AES_DECRYPT
-    #if defined(WOLFSSL_AESNI_BY4)
-    void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
-                             unsigned char* ivec, unsigned long length,
-                             const unsigned char* KS, int nr)
-                             XASM_LINK("AES_CBC_decrypt_by4");
-    #elif defined(WOLFSSL_AESNI_BY6)
-    void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
-                             unsigned char* ivec, unsigned long length,
-                             const unsigned char* KS, int nr)
-                             XASM_LINK("AES_CBC_decrypt_by6");
-    #else /* WOLFSSL_AESNI_BYx */
-    void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
-                             unsigned char* ivec, unsigned long length,
-                             const unsigned char* KS, int nr)
-                             XASM_LINK("AES_CBC_decrypt_by8");
-    #endif /* WOLFSSL_AESNI_BYx */
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AES_CBC */
-
-void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
-                     unsigned long length, const unsigned char* KS, int nr)
-                     XASM_LINK("AES_ECB_encrypt");
-
-#ifdef HAVE_AES_DECRYPT
-void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
-                     unsigned long length, const unsigned char* KS, int nr)
-                     XASM_LINK("AES_ECB_decrypt");
-#endif
-
-void AES_128_Key_Expansion(const unsigned char* userkey,
-                           unsigned char* key_schedule)
-                           XASM_LINK("AES_128_Key_Expansion");
-
-void AES_192_Key_Expansion(const unsigned char* userkey,
-                           unsigned char* key_schedule)
-                           XASM_LINK("AES_192_Key_Expansion");
-
-void AES_256_Key_Expansion(const unsigned char* userkey,
-                           unsigned char* key_schedule)
-                           XASM_LINK("AES_256_Key_Expansion");
-
-
-static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
-                               Aes* aes)
-{
-    if (!userKey || !aes)
-        return BAD_FUNC_ARG;
-
-    if (bits == 128) {
-       AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
-       return 0;
-    }
-    else if (bits == 192) {
-       AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
-       return 0;
-    }
-    else if (bits == 256) {
-       AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
-       return 0;
-    }
-    return BAD_FUNC_ARG;
-}
-
-#ifdef HAVE_AES_DECRYPT
-static int AES_set_decrypt_key(const unsigned char* userKey, const int bits,
-                               Aes* aes)
-{
-    int nr;
-    Aes temp_key;
-    __m128i *Key_Schedule = (__m128i*)aes->key;
-    __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
-
-    if (!userKey || !aes)
-        return BAD_FUNC_ARG;
-
-    if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
-        return BAD_FUNC_ARG;
-
-    nr = temp_key.rounds;
-    aes->rounds = nr;
-
-    Key_Schedule[nr] = Temp_Key_Schedule[0];
-    Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
-    Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
-    Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
-    Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
-    Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
-    Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
-    Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
-    Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
-    Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
-
-    if(nr>10) {
-        Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
-        Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
-    }
-
-    if(nr>12) {
-        Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
-        Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
-    }
-
-    Key_Schedule[0] = Temp_Key_Schedule[nr];
-
-    return 0;
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* WOLFSSL_AESNI */
-
-#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) ||\
-    defined(HAVE_AESGCM)
 
+#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM)
 
 #ifndef WC_CACHE_LINE_SZ
     #if defined(__x86_64__) || defined(_M_X64) || \
@@ -1220,12 +1228,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     word32 s0, s1, s2, s3;
     word32 t0, t1, t2, t3;
     word32 r = aes->rounds >> 1;
-
     const word32* rk = aes->key;
+
     if (r > 7 || r == 0) {
         WOLFSSL_MSG("AesEncrypt encountered improper key, set it up");
         return;  /* stop instead of segfaulting, set up your keys! */
     }
+
 #ifdef WOLFSSL_AESNI
     if (haveAESNI && aes->use_aesni) {
         #ifdef DEBUG_AESNI
@@ -1238,16 +1247,19 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         #endif
 
         /* check alignment, decrypt doesn't need alignment */
-        if ((wolfssl_word)inBlock % 16) {
+        if ((wolfssl_word)inBlock % AESNI_ALIGN) {
         #ifndef NO_WOLFSSL_ALLOC_ALIGN
             byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE, aes->heap,
                                                       DYNAMIC_TYPE_TMP_BUFFER);
+            byte* tmp_align;
             if (tmp == NULL) return;
 
-            XMEMCPY(tmp, inBlock, AES_BLOCK_SIZE);
-            AES_ECB_encrypt(tmp, tmp, AES_BLOCK_SIZE, (byte*)aes->key,
+            tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
+
+            XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE);
+            AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, (byte*)aes->key,
                             aes->rounds);
-            XMEMCPY(outBlock, tmp, AES_BLOCK_SIZE);
+            XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE);
             XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
             return;
         #else
@@ -1277,12 +1289,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
 
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     s0 ^= rk[0];
     s1 ^= rk[1];
@@ -1383,12 +1395,12 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         rk[3];
 
     /* write out */
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     XMEMCPY(outBlock,                  &s0, sizeof(s0));
     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
@@ -1398,6 +1410,7 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 }
 #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */
 
+
 #ifdef HAVE_AES_DECRYPT
 #if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT)
 
@@ -1416,7 +1429,6 @@ static INLINE word32 PreFetchTd(void)
     return x;
 }
 
-
 /* load Td Table4 into cache by cache line stride */
 static INLINE word32 PreFetchTd4(void)
 {
@@ -1429,7 +1441,6 @@ static INLINE word32 PreFetchTd4(void)
     return x;
 }
 
-
 static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 {
     word32 s0, s1, s2, s3;
@@ -1463,7 +1474,7 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
             printf("Skipping AES-NI\n");
         #endif
     }
-#endif
+#endif /* WOLFSSL_AESNI */
 
     /*
      * map byte array block to cipher state
@@ -1474,12 +1485,12 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
 
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     s0 ^= rk[0];
     s1 ^= rk[1];
@@ -1581,12 +1592,12 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
         rk[3];
 
     /* write out */
-    #ifdef LITTLE_ENDIAN_ORDER
-        s0 = ByteReverseWord32(s0);
-        s1 = ByteReverseWord32(s1);
-        s2 = ByteReverseWord32(s2);
-        s3 = ByteReverseWord32(s3);
-    #endif
+#ifdef LITTLE_ENDIAN_ORDER
+    s0 = ByteReverseWord32(s0);
+    s1 = ByteReverseWord32(s1);
+    s2 = ByteReverseWord32(s2);
+    s3 = ByteReverseWord32(s3);
+#endif
 
     XMEMCPY(outBlock,                  &s0, sizeof(s0));
     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
@@ -1598,10 +1609,11 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
 #endif /* NEED_AES_TABLES */
 
 
+
 /* wc_AesSetKey */
 #if defined(STM32F2_CRYPTO) || defined(STM32F4_CRYPTO)
 
-int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             const byte* iv, int dir)
     {
         word32 *rk = aes->key;
@@ -1611,6 +1623,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
             return BAD_FUNC_ARG;
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         XMEMCPY(rk, userKey, keylen);
     #ifndef WOLFSSL_STM32_CUBEMX
@@ -1647,12 +1660,12 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
 
     extern volatile unsigned char __MBAR[];
 
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
         if (AESBuffIn == NULL) {
             #if defined (HAVE_THREADX)
-                int s1, s2, s3, s4, s5 ;
+			    int s1, s2, s3, s4, s5;
                 s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
                                       sizeof(SECdescriptorType), TX_NO_WAIT);
                 s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn,
@@ -1679,6 +1692,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         if (aes == NULL)
             return BAD_FUNC_ARG;
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         XMEMCPY(aes->key, userKey, keylen);
 
@@ -1710,8 +1724,8 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
 #elif defined(FREESCALE_MMCAU)
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
         int ret;
         byte *rk = (byte*)aes->key;
@@ -1728,6 +1742,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             aes->left = 0;
         #endif /* WOLFSSL_AES_COUNTER */
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
 
         ret = wolfSSL_CryptHwMutexLock();
@@ -1746,9 +1761,10 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
     {
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
+
 #elif defined(WOLFSSL_NRF51_AES)
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
         int ret;
 
@@ -1758,6 +1774,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         if (keylen != 16)
             return BAD_FUNC_ARG;
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
         ret = nrf51_aes_set_key(userKey);
 
@@ -1769,6 +1786,7 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
     {
         return wc_AesSetKey(aes, userKey, keylen, iv, dir);
     }
+
 #else
     static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
                 const byte* iv, int dir)
@@ -1776,25 +1794,25 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         word32 temp, *rk = aes->key;
         unsigned int i = 0;
 
-        #ifdef WOLFSSL_AESNI
-            aes->use_aesni = 0;
-        #endif /* WOLFSSL_AESNI */
-        #ifdef WOLFSSL_AES_COUNTER
-            aes->left = 0;
-        #endif /* WOLFSSL_AES_COUNTER */
+    #ifdef WOLFSSL_AESNI
+        aes->use_aesni = 0;
+    #endif /* WOLFSSL_AESNI */
+    #ifdef WOLFSSL_AES_COUNTER
+        aes->left = 0;
+    #endif /* WOLFSSL_AES_COUNTER */
 
+        aes->keylen = keylen;
         aes->rounds = keylen/4 + 6;
 
         XMEMCPY(rk, userKey, keylen);
-        #ifdef LITTLE_ENDIAN_ORDER
-            ByteReverseWords(rk, rk, keylen);
-        #endif
+    #ifdef LITTLE_ENDIAN_ORDER
+        ByteReverseWords(rk, rk, keylen);
+    #endif
 
         #ifdef WOLFSSL_PIC32MZ_CRYPT
         {
             word32 *akey1 = aes->key_ce;
-            word32 *areg = aes->iv_ce ;
-            aes->keylen = keylen ;
+            word32 *areg = aes->iv_ce;
             XMEMCPY(akey1, userKey, keylen);
             if (iv)
                 XMEMCPY(areg, iv, AES_BLOCK_SIZE);
@@ -1931,17 +1949,17 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
         return wc_AesSetIV(aes, iv);
     }
 
-    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
-                  int dir)
+    int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
+        const byte* iv, int dir)
     {
     #if defined(AES_MAX_KEY_SIZE)
         const word32 max_key_len = (AES_MAX_KEY_SIZE / 8);
     #endif
 
-        if (aes == NULL)
-            return BAD_FUNC_ARG;
-        if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
+        if (aes == NULL ||
+                !((keylen == 16) || (keylen == 24) || (keylen == 32))) {
             return BAD_FUNC_ARG;
+        }
 
     #if defined(AES_MAX_KEY_SIZE)
         /* Check key length */
@@ -1949,12 +1967,15 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
             return BAD_FUNC_ARG;
         }
     #endif
+        aes->keylen = keylen;
+        aes->rounds = keylen/4 + 6;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
         if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) {
-            return NitroxAesSetKey(aes, userKey, keylen, iv);
+            aes->asyncKey = userKey;
+            aes->asyncIv = iv;
         }
-    #endif
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
     #ifdef WOLFSSL_AESNI
         if (checkAESNI == 0) {
@@ -1981,14 +2002,12 @@ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen,
     }
 
     #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER)
-
-    /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
-    int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
-                        const byte* iv, int dir)
-    {
-        return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
-    }
-
+        /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
+        int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
+                            const byte* iv, int dir)
+        {
+            return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir);
+        }
     #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
 #endif /* wc_AesSetKey block */
 
@@ -2007,21 +2026,6 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
     return 0;
 }
 
-
-/* set the heap hint for aes struct */
-int wc_InitAes_h(Aes* aes, void* h)
-{
-    if (aes == NULL)
-        return BAD_FUNC_ARG;
-
-    aes->heap = h;
-
-    return 0;
-}
-
-
-
-
 /* AES-DIRECT */
 #if defined(WOLFSSL_AES_DIRECT)
     #if defined(HAVE_COLDFIRE_SEC)
@@ -2404,7 +2408,7 @@ int wc_InitAes_h(Aes* aes, void* h)
 
 #elif defined(HAVE_COLDFIRE_SEC)
     static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz,
-                           word32 descHeader)
+        word32 descHeader)
     {
         #ifdef DEBUG_WOLFSSL
             int i; int stat1, stat2; int ret;
@@ -2426,9 +2430,9 @@ int wc_InitAes_h(Aes* aes, void* h)
         secDesc->pointer2 = (byte *)secReg; /* Initial Vector */
 
         switch(aes->rounds) {
-            case 10: secDesc->length3 = 16 ; break ;
-            case 12: secDesc->length3 = 24 ; break ;
-            case 14: secDesc->length3 = 32 ; break ;
+            case 10: secDesc->length3 = 16; break;
+            case 12: secDesc->length3 = 24; break;
+            case 14: secDesc->length3 = 32; break;
         }
         XMEMCPY(secKey, aes->key, secDesc->length3);
 
@@ -2613,21 +2617,22 @@ int wc_InitAes_h(Aes* aes, void* h)
         return 0;
     }
     #endif /* HAVE_AES_DECRYPT */
+
 #elif defined(WOLFSSL_PIC32MZ_CRYPT)
     /* core hardware crypt engine driver */
     static void wc_AesCrypt(Aes *aes, byte* out, const byte* in, word32 sz,
                                             int dir, int algo, int cryptoalgo)
     {
-        securityAssociation *sa_p ;
-        bufferDescriptor *bd_p ;
+        securityAssociation *sa_p;
+        bufferDescriptor *bd_p;
 
         volatile securityAssociation sa __attribute__((aligned (8)));
         volatile bufferDescriptor bd __attribute__((aligned (8)));
-        volatile int k ;
+        volatile int k;
 
         /* get uncached address */
-        sa_p = KVA0_TO_KVA1(&sa) ;
-        bd_p = KVA0_TO_KVA1(&bd) ;
+        sa_p = KVA0_TO_KVA1(&sa);
+        bd_p = KVA0_TO_KVA1(&bd);
 
         /* Sync cache and physical memory */
         if(PIC32MZ_IF_RAM(in)) {
@@ -2636,27 +2641,27 @@ int wc_InitAes_h(Aes* aes, void* h)
         XMEMSET((void *)KVA0_TO_KVA1(out), 0, sz);
         /* Set up the Security Association */
         XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
-        sa_p->SA_CTRL.ALGO = algo ; /* AES */
+        sa_p->SA_CTRL.ALGO = algo; /* AES */
         sa_p->SA_CTRL.LNC = 1;
         sa_p->SA_CTRL.LOADIV = 1;
         sa_p->SA_CTRL.FB = 1;
-        sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
+        sa_p->SA_CTRL.ENCTYPE = dir; /* Encryption/Decryption */
         sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
 
         if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM){
             switch(aes->keylen) {
             case 32:
-                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256 ;
-                break ;
+                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256;
+                break;
             case 24:
-                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192 ;
-                break ;
+                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192;
+                break;
             case 16:
-                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
-                break ;
+                sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128;
+                break;
             }
         } else
-            sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
+            sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128;
 
         ByteReverseWords(
         (word32 *)KVA0_TO_KVA1(sa.SA_ENCKEY + 8 - aes->keylen/sizeof(word32)),
@@ -2669,27 +2674,27 @@ int wc_InitAes_h(Aes* aes, void* h)
         bd_p->BD_CTRL.BUFLEN = sz;
         if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM) {
             if(sz % 0x10)
-                bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10 ;
+                bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10;
         }
         bd_p->BD_CTRL.LIFM = 1;
         bd_p->BD_CTRL.SA_FETCH_EN = 1;
         bd_p->BD_CTRL.LAST_BD = 1;
         bd_p->BD_CTRL.DESC_EN = 1;
 
-        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ;
-        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ;
+        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa);
+        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in);
         bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out);
-        bd_p->MSGLEN = sz ;
+        bd_p->MSGLEN = sz;
 
         CECON = 1 << 6;
         while (CECON);
 
         /* Run the engine */
-        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ;
+        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd);
         CEINTEN = 0x07;
         CECON = 0x27;
 
-        WAIT_ENGINE ;
+        WAIT_ENGINE;
 
         if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
            (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
@@ -2698,14 +2703,14 @@ int wc_InitAes_h(Aes* aes, void* h)
             if(dir == PIC32_ENCRYPTION) {
                 XMEMCPY((void *)aes->iv_ce,
                         (void*)KVA0_TO_KVA1(out + sz - AES_BLOCK_SIZE),
-                        AES_BLOCK_SIZE) ;
+                        AES_BLOCK_SIZE);
             } else {
                 ByteReverseWords((word32*)aes->iv_ce,
                         (word32 *)KVA0_TO_KVA1(in + sz - AES_BLOCK_SIZE),
                         AES_BLOCK_SIZE);
             }
         }
-        XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz) ;
+        XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz);
         ByteReverseWords((word32*)out, (word32 *)out, sz);
     }
 
@@ -2713,14 +2718,14 @@ int wc_InitAes_h(Aes* aes, void* h)
     {
         wc_AesCrypt(aes, out, in, sz, PIC32_ENCRYPTION, PIC32_ALGO_AES,
                                                       PIC32_CRYPTOALGO_RCBC );
-        return 0 ;
+        return 0;
     }
     #ifdef HAVE_AES_DECRYPT
     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
         wc_AesCrypt(aes, out, in, sz, PIC32_DECRYPTION, PIC32_ALGO_AES,
                                                       PIC32_CRYPTOALGO_RCBC);
-        return 0 ;
+        return 0;
     }
     #endif /* HAVE_AES_DECRYPT */
 
@@ -2729,10 +2734,28 @@ int wc_InitAes_h(Aes* aes, void* h)
     {
         word32 blocks = sz / AES_BLOCK_SIZE;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES)
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        /* if async and byte count above threshold */
+        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_CBC) {
+        #if defined(HAVE_CAVIUM)
             return NitroxAesCbcEncrypt(aes, out, in, sz);
-    #endif
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz,
+                aes->asyncKey, aes->keylen, aes->asyncIv, AES_BLOCK_SIZE);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_AES_CBC_ENCRYPT;
+                testDev->aes.aes = aes;
+                testDev->aes.out = out;
+                testDev->aes.in = in;
+                testDev->aes.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
     #ifdef WOLFSSL_AESNI
         if (haveAESNI) {
@@ -2747,22 +2770,25 @@ int wc_InitAes_h(Aes* aes, void* h)
             #endif
 
             /* check alignment, decrypt doesn't need alignment */
-            if ((wolfssl_word)in % 16) {
+            if ((wolfssl_word)in % AESNI_ALIGN) {
             #ifndef NO_WOLFSSL_ALLOC_ALIGN
-                byte* tmp = (byte*)XMALLOC(sz, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
-                WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
+                byte* tmp = (byte*)XMALLOC(sz + AESNI_ALIGN, aes->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+                byte* tmp_align;
                 if (tmp == NULL) return MEMORY_E;
 
-                XMEMCPY(tmp, in, sz);
-                AES_CBC_encrypt(tmp, tmp, (byte*)aes->reg, sz, (byte*)aes->key,
+                tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN));
+                XMEMCPY(tmp_align, in, sz);
+                AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz, (byte*)aes->key,
                             aes->rounds);
                 /* store iv for next call */
-                XMEMCPY(aes->reg, tmp + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+                XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
 
-                XMEMCPY(out, tmp, sz);
+                XMEMCPY(out, tmp_align, sz);
                 XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER);
                 return 0;
             #else
+                WOLFSSL_MSG("AES-CBC encrypt with bad alignment");
                 return BAD_ALIGN_E;
             #endif
             }
@@ -2791,11 +2817,28 @@ int wc_InitAes_h(Aes* aes, void* h)
     #ifdef HAVE_AES_DECRYPT
     int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     {
-        word32 blocks = sz / AES_BLOCK_SIZE;
+        word32 blocks;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES) {
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+        /* if async and byte count above threshold */
+        if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_CBC) {
+        #if defined(HAVE_CAVIUM)
             return NitroxAesCbcDecrypt(aes, out, in, sz);
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz,
+                aes->asyncKey, aes->keylen, aes->asyncIv, AES_BLOCK_SIZE);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_AES_CBC_DECRYPT;
+                testDev->aes.aes = aes;
+                testDev->aes.out = out;
+                testDev->aes.in = in;
+                testDev->aes.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
         }
     #endif
 
@@ -2829,6 +2872,7 @@ int wc_InitAes_h(Aes* aes, void* h)
         }
     #endif
 
+        blocks = sz / AES_BLOCK_SIZE;
         while (blocks--) {
             XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE);
             wc_AesDecrypt(aes, (byte*)aes->tmp, out);
@@ -3016,58 +3060,58 @@ int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
     #elif defined(WOLFSSL_PIC32MZ_CRYPT)
         void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
         {
-            int i ;
-            char out_block[AES_BLOCK_SIZE] ;
-            int odd ;
-            int even ;
-            char *tmp ; /* (char *)aes->tmp, for short */
+            int i;
+            char out_block[AES_BLOCK_SIZE];
+            int odd;
+            int even;
+            char *tmp; /* (char *)aes->tmp, for short */
 
-            tmp = (char *)aes->tmp ;
+            tmp = (char *)aes->tmp;
             if(aes->left) {
                 if((aes->left + sz) >= AES_BLOCK_SIZE){
-                    odd = AES_BLOCK_SIZE - aes->left ;
+                    odd = AES_BLOCK_SIZE - aes->left;
                 } else {
-                    odd = sz ;
+                    odd = sz;
                 }
-                XMEMCPY(tmp+aes->left, in, odd) ;
+                XMEMCPY(tmp+aes->left, in, odd);
                 if((odd+aes->left) == AES_BLOCK_SIZE){
                     wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
                         PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
-                    XMEMCPY(out, out_block+aes->left, odd) ;
-                    aes->left = 0 ;
-                    XMEMSET(tmp, 0x0, AES_BLOCK_SIZE) ;
+                    XMEMCPY(out, out_block+aes->left, odd);
+                    aes->left = 0;
+                    XMEMSET(tmp, 0x0, AES_BLOCK_SIZE);
                     /* Increment IV */
                     for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
                         if (++((byte *)aes->iv_ce)[i])
-                            break ;
+                            break;
                     }
                 }
-                in += odd ;
-                out+= odd ;
-                sz -= odd ;
+                in += odd;
+                out+= odd;
+                sz -= odd;
             }
-            odd = sz % AES_BLOCK_SIZE ;  /* if there is tail fragment */
+            odd = sz % AES_BLOCK_SIZE;  /* if there is tail fragment */
             if(sz / AES_BLOCK_SIZE) {
-                even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE ;
+                even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE;
                 wc_AesCrypt(aes, out, in, even, PIC32_ENCRYPTION, PIC32_ALGO_AES,
                                                         PIC32_CRYPTOALGO_RCTR);
-                out += even ;
-                in  += even ;
+                out += even;
+                in  += even;
                 do {  /* Increment IV */
                     for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
                         if (++((byte *)aes->iv_ce)[i])
-                            break ;
+                            break;
                     }
-                    even -= AES_BLOCK_SIZE ;
-                } while((int)even > 0) ;
+                    even -= AES_BLOCK_SIZE;
+                } while((int)even > 0);
             }
             if(odd) {
-                XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ;
-                XMEMCPY(tmp+aes->left, in, odd) ;
+                XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left);
+                XMEMCPY(tmp+aes->left, in, odd);
                 wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
                         PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
-                XMEMCPY(out, out_block+aes->left,odd) ;
-                aes->left += odd ;
+                XMEMCPY(out, out_block+aes->left,odd);
+                aes->left += odd;
             }
         }
 
@@ -3155,6 +3199,7 @@ int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
 
 #endif /* WOLFSSL_AES_COUNTER */
 
+
 #ifdef HAVE_AESGCM
 
 /*
@@ -3751,8 +3796,8 @@ static void GMULT(byte* X, byte* Y)
 }
 
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     byte x[AES_BLOCK_SIZE];
     byte scratch[AES_BLOCK_SIZE];
@@ -3900,8 +3945,8 @@ static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE])
 }
 
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     byte x[AES_BLOCK_SIZE];
     byte scratch[AES_BLOCK_SIZE];
@@ -3960,9 +4005,9 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
 static void GMULT(word64* X, word64* Y)
 {
     word64 Z[2] = {0,0};
-    word64 V[2] ;
+    word64 V[2];
     int i, j;
-    V[0] = X[0] ;  V[1] = X[1] ;
+    V[0] = X[0];  V[1] = X[1];
 
     for (i = 0; i < 2; i++)
     {
@@ -3976,13 +4021,15 @@ static void GMULT(word64* X, word64* Y)
 
             if (V[1] & 0x0000000000000001) {
                 V[1] >>= 1;
-                V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0);
+                V[1] |= ((V[0] & 0x0000000000000001) ?
+                    0x8000000000000000ULL : 0);
                 V[0] >>= 1;
                 V[0] ^= 0xE100000000000000ULL;
             }
             else {
                 V[1] >>= 1;
-                V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0);
+                V[1] |= ((V[0] & 0x0000000000000001) ?
+                    0x8000000000000000ULL : 0);
                 V[0] >>= 1;
             }
             y <<= 1;
@@ -3992,8 +4039,9 @@ static void GMULT(word64* X, word64* Y)
     X[1] = Z[1];
 }
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     word64 x[2] = {0,0};
     word32 blocks, partial;
@@ -4060,8 +4108,8 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
 
     /* Hash in the lengths in bits of A and C */
     {
-        word64 len[2] ;
-        len[0] = aSz ; len[1] = cSz;
+        word64 len[2];
+        len[0] = aSz; len[1] = cSz;
 
         /* Lengths are in bytes. Convert to bits. */
         len[0] *= 8;
@@ -4084,7 +4132,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz,
 static void GMULT(word32* X, word32* Y)
 {
     word32 Z[4] = {0,0,0,0};
-    word32 V[4] ;
+    word32 V[4];
     int i, j;
 
     V[0] = X[0];  V[1] = X[1]; V[2] =  X[2]; V[3] =  X[3];
@@ -4129,8 +4177,8 @@ static void GMULT(word32* X, word32* Y)
 }
 
 
-static void GHASH(Aes* aes, const byte* a, word32 aSz,
-                                const byte* c, word32 cSz, byte* s, word32 sSz)
+static void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c,
+    word32 cSz, byte* s, word32 sSz)
 {
     word32 x[4] = {0,0,0,0};
     word32 blocks, partial;
@@ -4263,7 +4311,7 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
     byte* c = out;
     byte counter[AES_BLOCK_SIZE];
     byte initialCounter[AES_BLOCK_SIZE];
-    byte *ctr ;
+    byte *ctr;
     byte scratch[AES_BLOCK_SIZE];
 
     /* Sanity check for XMEMCPY in GHASH function and local xorbuf call */
@@ -4275,6 +4323,35 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
         return BAD_FUNC_ARG;
     }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    /* if async and byte count above threshold */
+    if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_GCM) {
+    #if defined(HAVE_CAVIUM)
+        /* Not yet supported, contact wolfSSL if interested in using */
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz,
+            aes->asyncKey, aes->keylen, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+    #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+        WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_AES_GCM_ENCRYPT;
+            testDev->aes.aes = aes;
+            testDev->aes.out = out;
+            testDev->aes.in = in;
+            testDev->aes.sz = sz;
+            testDev->aes.iv = iv;
+            testDev->aes.ivSz = ivSz;
+            testDev->aes.authTag = authTag;
+            testDev->aes.authTagSz = authTagSz;
+            testDev->aes.authIn = authIn;
+            testDev->aes.authInSz = authInSz;
+        }
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
 #ifdef WOLFSSL_AESNI
     if (haveAESNI) {
         AES_GCM_encrypt(in, out, authIn, iv, authTag,
@@ -4284,9 +4361,9 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 #endif
 
 #ifdef WOLFSSL_PIC32MZ_CRYPT
-    ctr = (char *)aes->iv_ce ;
+    ctr = (char *)aes->iv_ce;
 #else
-    ctr = counter ;
+    ctr = counter;
 #endif
 
     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
@@ -4363,12 +4440,44 @@ int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
     byte* p = out;
     byte counter[AES_BLOCK_SIZE];
     byte initialCounter[AES_BLOCK_SIZE];
-    byte *ctr ;
+    byte *ctr;
     byte scratch[AES_BLOCK_SIZE];
 
-    /* Sanity check for local ConstantCompare call */
-    if (authTagSz > AES_BLOCK_SIZE)
+    /* argument checks */
+    if (aes == NULL || out == NULL || in == NULL || sz == 0 || iv == NULL ||
+        authTag == NULL || authIn == NULL || authTagSz > AES_BLOCK_SIZE) {
         return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    /* if async and byte count above threshold */
+    if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES &&
+                                                sz >= WC_ASYNC_THRESH_AES_GCM) {
+    #if defined(HAVE_CAVIUM)
+        /* Not yet supported, contact wolfSSL if interested in using */
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz,
+            aes->asyncKey, aes->keylen, iv, ivSz,
+            authTag, authTagSz, authIn, authInSz);
+    #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+        WC_ASYNC_TEST* testDev = &aes->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_AES_GCM_DECRYPT;
+            testDev->aes.aes = aes;
+            testDev->aes.out = out;
+            testDev->aes.in = in;
+            testDev->aes.sz = sz;
+            testDev->aes.iv = iv;
+            testDev->aes.ivSz = ivSz;
+            testDev->aes.authTag = (byte*)authTag;
+            testDev->aes.authTagSz = authTagSz;
+            testDev->aes.authIn = authIn;
+            testDev->aes.authInSz = authInSz;
+            return WC_PENDING_E;
+        }
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef WOLFSSL_AESNI
     if (haveAESNI) {
@@ -4380,9 +4489,9 @@ int  wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
 #endif
 
 #ifdef WOLFSSL_PIC32MZ_CRYPT
-    ctr = (char *)aes->iv_ce ;
+    ctr = (char *)aes->iv_ce;
 #else
-    ctr = counter ;
+    ctr = counter;
 #endif
 
     XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
@@ -4926,28 +5035,36 @@ int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz,
 #endif /* HAVE_AES_KEYWRAP */
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Aes for use with Nitrox device */
-int wc_AesAsyncInit(Aes* aes, int devId)
+/* Initialize Aes for use with async hardware */
+int wc_AesInit(Aes* aes, void* heap, int devId)
 {
+    int ret = 0;
+
     if (aes == NULL)
         return BAD_FUNC_ARG;
 
-    return wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES, devId);
+    aes->heap = heap;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES,
+                                                        aes->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
 }
 
-
-/* Free Aes from use with Nitrox device */
-void wc_AesAsyncFree(Aes* aes)
+/* Free Aes from use with async hardware */
+void wc_AesFree(Aes* aes)
 {
     if (aes == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&aes->asyncDev);
-}
-
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)
+    wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
 
 int wc_AesGetKeySize(Aes* aes, word32* keySize)
diff --git a/wolfcrypt/src/arc4.c b/wolfcrypt/src/arc4.c
index 6922089de..160c36a91 100644
--- a/wolfcrypt/src/arc4.c
+++ b/wolfcrypt/src/arc4.c
@@ -32,12 +32,14 @@
 #include 
 
 
-void wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
+int wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
 {
+    int ret = 0;
     word32 i;
     word32 keyIndex = 0, stateIndex = 0;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+        defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
     if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) {
         return NitroxArc4SetKey(arc4, key, length);
     }
@@ -59,6 +61,8 @@ void wc_Arc4SetKey(Arc4* arc4, const byte* key, word32 length)
         if (++keyIndex >= length)
             keyIndex = 0;
     }
+
+    return ret;
 }
 
 
@@ -76,12 +80,14 @@ static INLINE byte MakeByte(word32* x, word32* y, byte* s)
 }
 
 
-void wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
+int wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
 {
+    int ret = 0;
     word32 x;
     word32 y;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4) && \
+        defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
     if (arc4->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ARC4) {
         return NitroxArc4Process(arc4, out, in, length);
     }
@@ -95,31 +101,41 @@ void wc_Arc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
 
     arc4->x = (byte)x;
     arc4->y = (byte)y;
+
+    return ret;
 }
 
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Arc4 for use with Nitrox device */
-int wc_Arc4AsyncInit(Arc4* arc4, int devId)
+/* Initialize Arc4 for use with async device */
+int wc_Arc4Init(Arc4* arc4, void* heap, int devId)
 {
+    int ret = 0;
+
     if (arc4 == NULL)
         return BAD_FUNC_ARG;
 
-    return wolfAsync_DevCtxInit(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4, devId);
+    arc4->heap = heap;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+    ret = wolfAsync_DevCtxInit(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4,
+        arc4->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
 }
 
 
-/* Free Arc4 from use with Nitrox device */
-void wc_Arc4AsyncFree(Arc4* arc4)
+/* Free Arc4 from use with async device */
+void wc_Arc4Free(Arc4* arc4)
 {
     if (arc4 == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&arc4->asyncDev);
-}
-
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ARC4)
+    wolfAsync_DevCtxFree(&arc4->asyncDev, WOLFSSL_ASYNC_MARKER_ARC4);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
 #endif /* NO_RC4 */
 
diff --git a/wolfcrypt/src/asn.c b/wolfcrypt/src/asn.c
index 6466a7cc9..876d699f8 100644
--- a/wolfcrypt/src/asn.c
+++ b/wolfcrypt/src/asn.c
@@ -81,6 +81,10 @@ ASN Options:
     #include 
 #endif
 
+#ifndef NO_RSA
+    #include 
+#endif
+
 #ifdef WOLFSSL_DEBUG_ENCODING
     #if defined(FREESCALE_MQX) || defined(FREESCALE_KSDK_MQX)
         #if MQX_USE_IO_OLD
@@ -864,7 +868,7 @@ WOLFSSL_LOCAL int GetMyVersion(const byte* input, word32* inOutIdx,
 {
     word32 idx = *inOutIdx;
 
-    WOLFSSL_ENTER("GetMyVersion");
+    //WOLFSSL_ENTER("GetMyVersion");
 
     if ((idx + MIN_VERSION_SZ) > maxIdx)
         return ASN_PARSE_E;
@@ -955,7 +959,15 @@ int GetInt(mp_int* mpi, const byte* input, word32* inOutIdx, word32 maxIdx)
         return ASN_GETINT_E;
     }
 
+#ifdef HAVE_WOLF_BIGINT
+    if (wc_bigint_from_unsigned_bin(&mpi->raw, input + idx, length) != 0) {
+        mp_clear(mpi);
+        return ASN_GETINT_E;
+    }
+#endif /* HAVE_WOLF_BIGINT */
+
     *inOutIdx = idx + length;
+
     return 0;
 }
 
@@ -1059,54 +1071,6 @@ static word32 SetBitString16Bit(word16 val, byte* output)
 #endif /* !NO_RSA && (WOLFSSL_CERT_GEN || (WOLFSSL_KEY_GEN &&
                                            !HAVE_USER_RSA)) */
 
-#if !defined(NO_RSA) && !defined(HAVE_USER_RSA)
-static int GetIntRsa(RsaKey* key, mp_int* mpi, const byte* input,
-                        word32* inOutIdx, word32 maxIdx)
-{
-    word32 idx = *inOutIdx;
-    int    ret;
-    int    length;
-
-    (void)key;
-
-    ret = GetASNInt(input, &idx, &length, maxIdx);
-    if (ret != 0)
-        return ret;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        XMEMSET(mpi, 0, sizeof(mp_int));
-        mpi->used = length;
-    #ifdef USE_FAST_MATH
-        if (length > (FP_SIZE * (int)sizeof(fp_digit))) {
-            return MEMORY_E;
-        }
-        mpi->dpraw = (byte*)mpi->dp;
-    #else
-        mpi->dpraw = (byte*)XMALLOC(length, key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-    #endif
-        if (mpi->dpraw == NULL) {
-            return MEMORY_E;
-        }
-
-        XMEMCPY(mpi->dpraw, input + idx, length);
-    }
-    else
-#endif /* WOLFSSL_ASYNC_CRYPT && HAVE_CAVIUM */
-    {
-        if (mp_init(mpi) != MP_OKAY)
-            return MP_INIT_E;
-
-        if (mp_read_unsigned_bin(mpi, (byte*)input + idx, length) != 0) {
-            mp_clear(mpi);
-            return ASN_GETINT_E;
-        }
-    }
-
-    *inOutIdx = idx + length;
-    return 0;
-}
-#endif /* !NO_RSA && !HAVE_USER_RSA */
 
 
 /* hashType */
@@ -1870,14 +1834,14 @@ int wc_RsaPrivateKeyDecode(const byte* input, word32* inOutIdx, RsaKey* key,
 
     key->type = RSA_PRIVATE;
 
-    if (GetIntRsa(key, &key->n,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->e,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->d,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->p,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->q,  input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->dP, input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->dQ, input, inOutIdx, inSz) < 0 ||
-        GetIntRsa(key, &key->u,  input, inOutIdx, inSz) < 0 )  return ASN_RSA_KEY_E;
+    if (GetInt(&key->n,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->e,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->d,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->p,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->q,  input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->dP, input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->dQ, input, inOutIdx, inSz) < 0 ||
+        GetInt(&key->u,  input, inOutIdx, inSz) < 0 )  return ASN_RSA_KEY_E;
 
     return 0;
 }
@@ -3128,6 +3092,9 @@ void InitDecodedCert(DecodedCert* cert, byte* source, word32 inSz, void* heap)
     XMEMSET(cert->extCertPolicies, 0, MAX_CERTPOL_NB*MAX_CERTPOL_SZ);
     cert->extCertPoliciesNb = 0;
 #endif
+
+    cert->ca = NULL;
+    InitSignatureCtx(&cert->sigCtx, heap, INVALID_DEVID);
 }
 
 
@@ -3188,6 +3155,7 @@ void FreeDecodedCert(DecodedCert* cert)
     if (cert->subjectName.fullName != NULL)
         XFREE(cert->subjectName.fullName, cert->heap, DYNAMIC_TYPE_X509);
 #endif /* OPENSSL_EXTRA */
+    FreeSignatureCtx(&cert->sigCtx);
 }
 
 static int GetCertHeader(DecodedCert* cert)
@@ -4412,280 +4380,348 @@ int wc_GetCTC_HashOID(int type)
     };
 }
 
-/* return 0=success, else failure */
-static int ConfirmSignature(const byte* buf, word32 bufSz,
-    const byte* key, word32 keySz, word32 keyOID,
-    const byte* sig, word32 sigSz, word32 sigOID,
-    void* heap)
+void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId)
 {
-    int  typeH = 0, digestSz = 0, ret = -1;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* digest;
-#else
-    byte digest[WC_MAX_DIGEST_SIZE];
-#endif
+    if (sigCtx) {
+        XMEMSET(sigCtx, 0, sizeof(SignatureCtx));
+        sigCtx->devId = devId;
+        sigCtx->heap = heap;
+    }
+}
 
-#ifdef WOLFSSL_SMALL_STACK
-    digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (digest == NULL)
-        return MEMORY_E;
+void FreeSignatureCtx(SignatureCtx* sigCtx)
+{
+    if (sigCtx == NULL)
+        return;
+
+    if (sigCtx->digest) {
+        XFREE(sigCtx->digest, sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        sigCtx->digest = NULL;
+    }
+#ifndef NO_RSA
+    if (sigCtx->plain) {
+        XFREE(sigCtx->plain, sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+        sigCtx->plain = NULL;
+    }
 #endif
+    if (sigCtx->key.ptr) {
+        switch (sigCtx->keyOID) {
+        #ifndef NO_RSA
+            case RSAk:
+                wc_FreeRsaKey(sigCtx->key.rsa);
+                XFREE(sigCtx->key.ptr, sigCtx->heap, DYNAMIC_TYPE_RSA);
+                break;
+        #endif /* !NO_RSA */
+        #ifdef HAVE_ECC
+            case ECDSAk:
+                wc_ecc_free(sigCtx->key.ecc);
+                XFREE(sigCtx->key.ecc, sigCtx->heap, DYNAMIC_TYPE_ECC);
+                break;
+        #endif /* HAVE_ECC */
+            default:
+                break;
+        } /* switch (keyOID) */
+        sigCtx->key.ptr = NULL;
+    }
+
+    /* reset state, we are done */
+    sigCtx->state = SIG_STATE_BEGIN;
+}
+
+/* Return codes: 0=Success, Negative (see error-crypt.h), ASN_SIG_CONFIRM_E */
+static int ConfirmSignature(SignatureCtx* sigCtx,
+    const byte* buf, word32 bufSz,
+    const byte* key, word32 keySz, word32 keyOID,
+    const byte* sig, word32 sigSz, word32 sigOID)
+{
+    int ret = 0;
+
+    if (sigCtx == NULL || buf == NULL || bufSz == 0 || key == NULL ||
+        keySz == 0 || sig == NULL || sigSz == 0) {
+        return BAD_FUNC_ARG;
+    }
 
     (void)key;
     (void)keySz;
     (void)sig;
     (void)sigSz;
-    (void)heap;
 
-    switch (sigOID) {
-    #ifndef NO_MD5
-        case CTC_MD5wRSA:
-        if (wc_Md5Hash(buf, bufSz, digest) == 0) {
-            typeH    = MD5h;
-            digestSz = MD5_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #if defined(WOLFSSL_MD2)
-        case CTC_MD2wRSA:
-        if (wc_Md2Hash(buf, bufSz, digest) == 0) {
-            typeH    = MD2h;
-            digestSz = MD2_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifndef NO_SHA
-        case CTC_SHAwRSA:
-        case CTC_SHAwDSA:
-        case CTC_SHAwECDSA:
-        if (wc_ShaHash(buf, bufSz, digest) == 0) {
-            typeH    = SHAh;
-            digestSz = SHA_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA224
-        case CTC_SHA224wRSA:
-        case CTC_SHA224wECDSA:
-        if (wc_Sha224Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA224h;
-            digestSz = SHA224_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifndef NO_SHA256
-        case CTC_SHA256wRSA:
-        case CTC_SHA256wECDSA:
-        if (wc_Sha256Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA256h;
-            digestSz = SHA256_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA512
-        case CTC_SHA512wRSA:
-        case CTC_SHA512wECDSA:
-        if (wc_Sha512Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA512h;
-            digestSz = SHA512_DIGEST_SIZE;
-        }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA384
-        case CTC_SHA384wRSA:
-        case CTC_SHA384wECDSA:
-        if (wc_Sha384Hash(buf, bufSz, digest) == 0) {
-            typeH    = SHA384h;
-            digestSz = SHA384_DIGEST_SIZE;
-        }
-        break;
-    #endif
-        default:
-            WOLFSSL_MSG("Verify Signature has unsupported type");
-    }
+    WOLFSSL_ENTER("ConfirmSignature");
 
-    if (typeH == 0) {
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-        return ALGO_ID_E;
-    }
-
-    switch (keyOID) {
-    #ifndef NO_RSA
-        case RSAk:
+    switch (sigCtx->state) {
+        case SIG_STATE_BEGIN:
         {
-            word32 idx = 0;
-            int    encodedSigSz, verifySz;
-            byte*  out;
-#ifdef WOLFSSL_SMALL_STACK
-            RsaKey* pubKey;
-            byte* plain;
-            byte* encodedSig;
-#else
-            RsaKey pubKey[1];
-            byte plain[MAX_ENCODED_SIG_SZ];
-            byte encodedSig[MAX_ENCODED_SIG_SZ];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-            pubKey = (RsaKey*)XMALLOC(sizeof(RsaKey), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-            plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-            encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-
-            if (pubKey == NULL || plain == NULL || encodedSig == NULL) {
-                WOLFSSL_MSG("Failed to allocate memory at ConfirmSignature");
-
-                if (pubKey)
-                    XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                if (plain)
-                    XFREE(plain, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-                if (encodedSig)
-                    XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-                break; /* not confirmed */
+            sigCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, sigCtx->heap,
+                                                    DYNAMIC_TYPE_TMP_BUFFER);
+            if (sigCtx->digest == NULL) {
+                ERROR_OUT(MEMORY_E, exit_cs);
             }
-#endif
-            if (wc_InitRsaKey(pubKey, heap) != 0) {
-                WOLFSSL_MSG("InitRsaKey failed");
-            }
-            else if (sigSz > MAX_ENCODED_SIG_SZ) {
-                WOLFSSL_MSG("Verify Signature is too big");
-            }
-            else if (wc_RsaPublicKeyDecode(key, &idx, pubKey, keySz) < 0) {
-                WOLFSSL_MSG("ASN Key decode error RSA");
-            }
-            else {
-                XMEMCPY(plain, sig, sigSz);
 
-                ret = 0;
-                do {
-                #if defined(WOLFSSL_ASYNC_CRYPT)
-                    ret = wc_RsaAsyncWait(ret, pubKey);
-                #endif
-                    if (ret >= 0) {
-                        ret = wc_RsaSSL_VerifyInline(plain, sigSz, &out,
-                                                                    pubKey);
-                    }
-                } while (ret == WC_PENDING_E);
+            /* fall through */
+            sigCtx->state = SIG_STATE_HASH;
+        } /* SIG_STATE_BEGIN */
 
-                if (ret < 0) {
-                    WOLFSSL_MSG("Rsa SSL verify error");
+        case SIG_STATE_HASH:
+        {
+            switch (sigOID) {
+            #ifndef NO_MD5
+                case CTC_MD5wRSA:
+                if ((ret = wc_Md5Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = MD5h;
+                    sigCtx->digestSz = MD5_DIGEST_SIZE;
                 }
-                else {
-                #ifdef WOLFSSL_DEBUG_ENCODING
-                    int x;
+                break;
+            #endif
+            #if defined(WOLFSSL_MD2)
+                case CTC_MD2wRSA:
+                if ((ret = wc_Md2Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = MD2h;
+                    sigCtx->digestSz = MD2_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifndef NO_SHA
+                case CTC_SHAwRSA:
+                case CTC_SHAwDSA:
+                case CTC_SHAwECDSA:
+                if ((ret = wc_ShaHash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHAh;
+                    sigCtx->digestSz = SHA_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifdef WOLFSSL_SHA224
+                case CTC_SHA224wRSA:
+                case CTC_SHA224wECDSA:
+                if ((ret = wc_Sha224Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA224h;
+                    sigCtx->digestSz = SHA224_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifndef NO_SHA256
+                case CTC_SHA256wRSA:
+                case CTC_SHA256wECDSA:
+                if ((ret = wc_Sha256Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA256h;
+                    sigCtx->digestSz = SHA256_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifdef WOLFSSL_SHA512
+                case CTC_SHA512wRSA:
+                case CTC_SHA512wECDSA:
+                if ((ret = wc_Sha512Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA512h;
+                    sigCtx->digestSz = SHA512_DIGEST_SIZE;
+                }
+                break;
+            #endif
+            #ifdef WOLFSSL_SHA384
+                case CTC_SHA384wRSA:
+                case CTC_SHA384wECDSA:
+                if ((ret = wc_Sha384Hash(buf, bufSz, sigCtx->digest)) == 0) {
+                    sigCtx->typeH    = SHA384h;
+                    sigCtx->digestSz = SHA384_DIGEST_SIZE;
+                }
+                break;
+            #endif
+                default:
+                    ret = HASH_TYPE_E;
+                    WOLFSSL_MSG("Verify Signature has unsupported type");
+            }
+
+            if (ret != 0) {
+                goto exit_cs;
+            }
+
+            /* fall through */
+            sigCtx->state = SIG_STATE_KEY;
+        } /* SIG_STATE_HASH */
+
+        case SIG_STATE_KEY:
+        {
+            sigCtx->keyOID = keyOID;
+
+            switch (keyOID) {
+            #ifndef NO_RSA
+                case RSAk:
+                {
+                    word32 idx = 0;
+
+                    sigCtx->key.rsa = (RsaKey*)XMALLOC(sizeof(RsaKey),
+                                                sigCtx->heap, DYNAMIC_TYPE_RSA);
+                    sigCtx->plain = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+                                         sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    if (sigCtx->key.rsa == NULL || sigCtx->plain == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+
+                    if ((ret = wc_InitRsaKey_ex(sigCtx->key.rsa, sigCtx->heap,
+                                                        sigCtx->devId)) != 0) {
+                        goto exit_cs;
+                    }
+
+                    if (sigSz > MAX_ENCODED_SIG_SZ) {
+                        WOLFSSL_MSG("Verify Signature is too big");
+                        ERROR_OUT(BUFFER_E, exit_cs);
+                    }
+
+                    if ((ret = wc_RsaPublicKeyDecode(key, &idx, sigCtx->key.rsa,
+                                                                 keySz)) != 0) {
+                        WOLFSSL_MSG("ASN Key decode error RSA");
+                        goto exit_cs;
+                    }
+
+                    XMEMCPY(sigCtx->plain, sig, sigSz);
+                    sigCtx->out = NULL;
+                    break;
+                }
+            #endif /* !NO_RSA */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                {
+                    sigCtx->verify = 0;
+                    sigCtx->key.ecc = (ecc_key*)XMALLOC(sizeof(ecc_key),
+                                                sigCtx->heap, DYNAMIC_TYPE_ECC);
+                    if (sigCtx->key.ecc == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+
+                    if ((ret = wc_ecc_init_ex(sigCtx->key.ecc, sigCtx->heap,
+                                                          sigCtx->devId)) < 0) {
+                        goto exit_cs;
+                    }
+                    if ((ret = wc_ecc_import_x963(key, keySz,
+                                                        sigCtx->key.ecc)) < 0) {
+                        WOLFSSL_MSG("ASN Key import error ECC");
+                        goto exit_cs;
+                    }
+                    break;
+                }
+            #endif /* HAVE_ECC */
+                default:
+                    WOLFSSL_MSG("Verify Key type unknown");
+                    ret = ASN_UNKNOWN_OID_E;
+                    break;
+            } /* switch (keyOID) */
+
+            if (ret != 0) {
+                goto exit_cs;
+            }
+
+            /* fall through */
+            sigCtx->state = SIG_STATE_DO;
+        } /* SIG_STATE_KEY */
+
+        case SIG_STATE_DO:
+        {
+            switch (keyOID) {
+            #ifndef NO_RSA
+                case RSAk:
+                {
+                    ret = wc_RsaSSL_VerifyInline(sigCtx->plain, sigSz,
+                                                &sigCtx->out, sigCtx->key.rsa);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (ret == WC_PENDING_E)
+                        sigCtx->asyncDev = &sigCtx->key.rsa->asyncDev;
                 #endif
+                    break;
+                }
+            #endif /* !NO_RSA */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                {
+                    ret = wc_ecc_verify_hash(sig, sigSz, sigCtx->digest,
+                        sigCtx->digestSz, &sigCtx->verify, sigCtx->key.ecc);
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    if (ret == WC_PENDING_E)
+                        sigCtx->asyncDev = &sigCtx->key.ecc->asyncDev;
+                #endif
+                    break;
+                }
+            #endif /* HAVE_ECC */
+                default:
+                    break;
+            }  /* switch (keyOID) */
+
+            if (ret < 0) {
+                /* treat all non async RSA errors as ASN_SIG_CONFIRM_E */
+                if (ret != WC_PENDING_E)
+                    ret = ASN_SIG_CONFIRM_E;
+                goto exit_cs;
+            }
+
+            /* fall through */
+            sigCtx->state = SIG_STATE_CHECK;
+        } /* SIG_STATE_DO */
+
+        case SIG_STATE_CHECK:
+        {
+            switch (keyOID) {
+            #ifndef NO_RSA
+                case RSAk:
+                {
+                    int encodedSigSz, verifySz;
+                #ifdef WOLFSSL_SMALL_STACK
+                    byte* encodedSig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ,
+                                        sigCtx->heap, DYNAMIC_TYPE_TMP_BUFFER);
+                    if (encodedSig == NULL) {
+                        ERROR_OUT(MEMORY_E, exit_cs);
+                    }
+                #else
+                    byte encodedSig[MAX_ENCODED_SIG_SZ];
+                #endif
+
                     verifySz = ret;
 
-                    ret = wc_EncodeSignature(encodedSig, digest, digestSz, typeH);
-                    if (ret > 0) {
-                        encodedSigSz = ret;
-
-                        /* check length to make sure we're right justified */
-                        if (encodedSigSz == verifySz &&
-                                XMEMCMP(out, encodedSig, encodedSigSz) == 0) {
-                            ret = 0; /* match */
-                        }
-                        else {
-                            WOLFSSL_MSG("Rsa SSL verify match encode error");
-                            ret = SIG_VERIFY_E;
-                        }
-
-                    #ifdef WOLFSSL_DEBUG_ENCODING
-                        printf("wolfssl encodedSig:\n");
-
-                        for (x = 0; x < encodedSigSz; x++) {
-                            printf("%02x ", encodedSig[x]);
-                            if ( (x % 16) == 15)
-                                printf("\n");
-                        }
-
-                        printf("\n");
-                        printf("actual digest:\n");
-
-                        for (x = 0; x < verifySz; x++) {
-                            printf("%02x ", out[x]);
-                            if ( (x % 16) == 15)
-                                printf("\n");
-                        }
-
-                        printf("\n");
-                    #endif /* WOLFSSL_DEBUG_ENCODING */
+                    /* make sure we're right justified */
+                    encodedSigSz = wc_EncodeSignature(encodedSig,
+                            sigCtx->digest, sigCtx->digestSz, sigCtx->typeH);
+                    if (encodedSigSz == verifySz &&
+                        XMEMCMP(sigCtx->out, encodedSig, encodedSigSz) == 0) {
+                        ret = 0;
                     }
+                    else {
+                        WOLFSSL_MSG("RSA SSL verify match encode error");
+                        ret = ASN_SIG_CONFIRM_E;
+                    }
+
+                #ifdef WOLFSSL_SMALL_STACK
+                    XFREE(encodedSig, heap, DYNAMIC_TYPE_TMP_BUFFER);
+                #endif
+                    break;
                 }
-            }
+            #endif /* NO_RSA */
+            #ifdef HAVE_ECC
+                case ECDSAk:
+                {
+                    if (sigCtx->verify == 1) {
+                        ret = 0;
+                    }
+                    else {
+                        WOLFSSL_MSG("ECC Verify didn't match");
+                        ret = ASN_SIG_CONFIRM_E;
+                    }
+                    break;
+                }
+            #endif /* HAVE_ECC */
+                default:
+                    break;
+            }  /* switch (keyOID) */
 
-            wc_FreeRsaKey(pubKey);
-
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(pubKey,     NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            XFREE(plain,      NULL, DYNAMIC_TYPE_TMP_BUFFER);
-            XFREE(encodedSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
             break;
-        }
+        } /* SIG_STATE_CHECK */
+    } /* switch (sigCtx->state) */
 
-    #endif /* NO_RSA */
-    #ifdef HAVE_ECC
-        case ECDSAk:
-        {
-            int verify = 0;
-#ifdef WOLFSSL_SMALL_STACK
-            ecc_key* pubKey;
-#else
-            ecc_key pubKey[1];
-#endif
+exit_cs:
 
-#ifdef WOLFSSL_SMALL_STACK
-            pubKey = (ecc_key*)XMALLOC(sizeof(ecc_key), NULL,
-                                                       DYNAMIC_TYPE_TMP_BUFFER);
-            if (pubKey == NULL) {
-                WOLFSSL_MSG("Failed to allocate pubKey");
-                break; /* not confirmed */
-            }
-#endif
+    WOLFSSL_LEAVE("ConfirmSignature", ret);
 
-            if (wc_ecc_init(pubKey) < 0) {
-                WOLFSSL_MSG("Failed to initialize key");
-                break; /* not confirmed */
-            }
-            if (wc_ecc_import_x963(key, keySz, pubKey) < 0) {
-                WOLFSSL_MSG("ASN Key import error ECC");
-            }
-            else {
-                ret = wc_ecc_verify_hash(sig, sigSz, digest, digestSz, &verify,
-                                                                        pubKey);
-                if (ret != 0) {
-                    WOLFSSL_MSG("ECC verify hash error");
-                }
-                else if (verify != 1) {
-                    WOLFSSL_MSG("ECC Verify didn't match");
-                    ret = SIG_VERIFY_E;
-                } else {
-                    ret = 0; /* match */
-                }
-            }
-            wc_ecc_free(pubKey);
-
-#ifdef WOLFSSL_SMALL_STACK
-            XFREE(pubKey, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-            break;
-        }
-    #endif /* HAVE_ECC */
-        default:
-            WOLFSSL_MSG("Verify Key type unknown");
+    if (ret != WC_PENDING_E) {
+        FreeSignatureCtx(sigCtx);
     }
 
-    (void)digestSz;
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
     return ret;
 }
 
@@ -4914,7 +4950,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             length -= strLen;
             idx    += strLen;
         }
-#ifndef IGNORE_NAME_CONSTRAINTS
+    #ifndef IGNORE_NAME_CONSTRAINTS
         else if (b == (ASN_CONTEXT_SPECIFIC | ASN_RFC822_TYPE)) {
             DNS_entry* emailEntry;
             int strLen;
@@ -4950,8 +4986,8 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             length -= strLen;
             idx    += strLen;
         }
-#endif /* IGNORE_NAME_CONSTRAINTS */
-#ifdef WOLFSSL_SEP
+    #endif /* IGNORE_NAME_CONSTRAINTS */
+    #ifdef WOLFSSL_SEP
         else if (b == (ASN_CONTEXT_SPECIFIC | ASN_CONSTRUCTED | ASN_OTHER_TYPE))
         {
             int strLen;
@@ -5024,7 +5060,7 @@ static int DecodeAltNames(byte* input, int sz, DecodedCert* cert)
             cert->hwSerialNumSz = strLen;
             idx += strLen;
         }
-#endif /* WOLFSSL_SEP */
+    #endif /* WOLFSSL_SEP */
         else {
             int strLen;
             word32 lenStartIdx = idx;
@@ -5238,10 +5274,10 @@ static int DecodeAuthKeyId(byte* input, int sz, DecodedCert* cert)
         return ASN_PARSE_E;
     }
 
-    #ifdef OPENSSL_EXTRA
-        cert->extAuthKeyIdSrc = &input[idx];
-        cert->extAuthKeyIdSz = length;
-    #endif /* OPENSSL_EXTRA */
+#ifdef OPENSSL_EXTRA
+    cert->extAuthKeyIdSrc = &input[idx];
+    cert->extAuthKeyIdSz = length;
+#endif /* OPENSSL_EXTRA */
 
     if (length == KEYID_SIZE) {
         XMEMCPY(cert->extAuthKeyId, input + idx, length);
@@ -5323,10 +5359,10 @@ static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
         return ASN_PARSE_E;
     }
 
-    #ifdef OPENSSL_EXTRA
-        cert->extExtKeyUsageSrc = input + idx;
-        cert->extExtKeyUsageSz = length;
-    #endif
+#ifdef OPENSSL_EXTRA
+    cert->extExtKeyUsageSrc = input + idx;
+    cert->extExtKeyUsageSz = length;
+#endif
 
     while (idx < (word32)sz) {
         if (GetObjectId(input, &idx, &oid, oidCertKeyUseType, sz) < 0)
@@ -5347,9 +5383,9 @@ static int DecodeExtKeyUsage(byte* input, int sz, DecodedCert* cert)
                 break;
         }
 
-        #ifdef OPENSSL_EXTRA
-            cert->extExtKeyUsageCount++;
-        #endif
+    #ifdef OPENSSL_EXTRA
+        cert->extExtKeyUsageCount++;
+    #endif
     }
 
     return 0;
@@ -5453,6 +5489,7 @@ static int DecodeNameConstraints(byte* input, int sz, DecodedCert* cert)
 }
 #endif /* IGNORE_NAME_CONSTRAINTS */
 
+
 #if defined(WOLFSSL_CERT_EXT) && !defined(WOLFSSL_SEP)
 
 static int Word32ToString(char* d, word32 number)
@@ -5904,127 +5941,141 @@ Signer* GetCAByName(void* signers, byte* hash)
 
 int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
 {
-    word32 confirmOID;
-    int    ret;
-    int    badDate     = 0;
+    int    ret = 0;
+    int    badDate = 0;
     int    criticalExt = 0;
+    word32 confirmOID;
 
-    if ((ret = DecodeToKey(cert, verify)) < 0) {
-        if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
-            badDate = ret;
-        else
-            return ret;
+    if (cert == NULL) {
+        return BAD_FUNC_ARG;
     }
 
-    WOLFSSL_MSG("Parsed Past Key");
-
-    if (cert->srcIdx < cert->sigIndex) {
-        #ifndef ALLOW_V1_EXTENSIONS
-            if (cert->version < 2) {
-                WOLFSSL_MSG("    v1 and v2 certs not allowed extensions");
-                return ASN_VERSION_E;
-            }
-        #endif
-        /* save extensions */
-        cert->extensions    = &cert->source[cert->srcIdx];
-        cert->extensionsSz  =  cert->sigIndex - cert->srcIdx;
-        cert->extensionsIdx = cert->srcIdx;   /* for potential later use */
-
-        if ((ret = DecodeCertExtensions(cert)) < 0) {
-            if (ret == ASN_CRIT_EXT_E)
-                criticalExt = ret;
+    if (cert->sigCtx.state == SIG_STATE_BEGIN) {
+        if ((ret = DecodeToKey(cert, verify)) < 0) {
+            if (ret == ASN_BEFORE_DATE_E || ret == ASN_AFTER_DATE_E)
+                badDate = ret;
             else
                 return ret;
         }
 
-        /* advance past extensions */
-        cert->srcIdx =  cert->sigIndex;
-    }
+        WOLFSSL_MSG("Parsed Past Key");
 
-    if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID,
-                         oidSigType, cert->maxIdx)) < 0)
-        return ret;
+        if (cert->srcIdx < cert->sigIndex) {
+        #ifndef ALLOW_V1_EXTENSIONS
+            if (cert->version < 2) {
+                WOLFSSL_MSG("\tv1 and v2 certs not allowed extensions");
+                return ASN_VERSION_E;
+            }
+        #endif
 
-    if ((ret = GetSignature(cert)) < 0)
-        return ret;
+            /* save extensions */
+            cert->extensions    = &cert->source[cert->srcIdx];
+            cert->extensionsSz  =  cert->sigIndex - cert->srcIdx;
+            cert->extensionsIdx = cert->srcIdx;   /* for potential later use */
 
-    if (confirmOID != cert->signatureOID)
-        return ASN_SIG_OID_E;
+            if ((ret = DecodeCertExtensions(cert)) < 0) {
+                if (ret == ASN_CRIT_EXT_E)
+                    criticalExt = ret;
+                else
+                    return ret;
+            }
+
+            /* advance past extensions */
+            cert->srcIdx = cert->sigIndex;
+        }
+
+        if ((ret = GetAlgoId(cert->source, &cert->srcIdx, &confirmOID,
+                             oidSigType, cert->maxIdx)) < 0)
+            return ret;
+
+        if ((ret = GetSignature(cert)) < 0)
+            return ret;
+
+        if (confirmOID != cert->signatureOID)
+            return ASN_SIG_OID_E;
 
     #ifndef NO_SKID
-        if (cert->extSubjKeyIdSet == 0
-                          && cert->publicKey != NULL && cert->pubKeySize > 0) {
+        if (cert->extSubjKeyIdSet == 0 && cert->publicKey != NULL &&
+                                                        cert->pubKeySize > 0) {
         #ifdef NO_SHA
             ret = wc_Sha256Hash(cert->publicKey, cert->pubKeySize,
                                                             cert->extSubjKeyId);
         #else
             ret = wc_ShaHash(cert->publicKey, cert->pubKeySize,
                                                             cert->extSubjKeyId);
-        #endif
+        #endif /* NO_SHA */
             if (ret != 0)
                 return ret;
         }
-    #endif
+    #endif /* !NO_SKID */
 
-   if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
-        Signer* ca = NULL;
+        if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
+            cert->ca = NULL;
         #ifndef NO_SKID
             if (cert->extAuthKeyIdSet)
-                ca = GetCA(cm, cert->extAuthKeyId);
-            if (ca == NULL)
-                ca = GetCAByName(cm, cert->issuerHash);
-        #else /* NO_SKID */
-            ca = GetCA(cm, cert->issuerHash);
-        #endif /* NO SKID */
-        WOLFSSL_MSG("About to verify certificate signature");
+                cert->ca = GetCA(cm, cert->extAuthKeyId);
+            if (cert->ca == NULL)
+                cert->ca = GetCAByName(cm, cert->issuerHash);
+        #else
+            cert->ca = GetCA(cm, cert->issuerHash);
+        #endif /* !NO_SKID */
 
-        if (ca) {
-            if (cert->isCA) {
-                if (ca->pathLengthSet) {
-                    if (ca->pathLength == 0) {
-                        WOLFSSL_MSG("CA with path length 0 signing a CA");
-                        return ASN_PATHLEN_INV_E;
-                    }
-                    if (cert->pathLengthSet &&
-                        cert->pathLength >= ca->pathLength) {
+            WOLFSSL_MSG("About to verify certificate signature");
+            if (cert->ca) {
+                if (cert->isCA) {
+                    if (cert->ca->pathLengthSet) {
+                        if (cert->ca->pathLength == 0) {
+                            WOLFSSL_MSG("CA with path length 0 signing a CA");
+                            return ASN_PATHLEN_INV_E;
+                        }
+                        if (cert->pathLengthSet &&
+                            cert->pathLength >= cert->ca->pathLength) {
 
-                        WOLFSSL_MSG("CA signing CA with longer path length");
-                        return ASN_PATHLEN_INV_E;
+                            WOLFSSL_MSG("CA signing CA with longer path length");
+                            return ASN_PATHLEN_INV_E;
+                        }
                     }
                 }
+
+        #ifdef HAVE_OCSP
+                /* Need the CA's public key hash for OCSP */
+            #ifdef NO_SHA
+                ret = wc_Sha256Hash(cert->ca->publicKey, cert->ca->pubKeySize,
+                                                            cert->issuerKeyHash);
+            #else
+                ret = wc_ShaHash(cert->ca->publicKey, cert->ca->pubKeySize,
+                                                            cert->issuerKeyHash);
+            #endif /* NO_SHA */
+                if (ret != 0)
+                    return ret;
+        #endif /* HAVE_OCSP */
             }
+        }
+    }
 
-#ifdef HAVE_OCSP
-            /* Need the ca's public key hash for OCSP */
-    #ifdef NO_SHA
-            ret = wc_Sha256Hash(ca->publicKey, ca->pubKeySize,
-                                cert->issuerKeyHash);
-    #else /* NO_SHA */
-            ret = wc_ShaHash(ca->publicKey, ca->pubKeySize,
-                                cert->issuerKeyHash);
-    #endif /* NO_SHA */
-            if (ret != 0)
-                return ret;
-#endif /* HAVE_OCSP */
-
+    if (verify != NO_VERIFY && type != CA_TYPE && type != TRUSTED_PEER_TYPE) {
+        if (cert->ca) {
             if (verify == VERIFY) {
                 /* try to confirm/verify signature */
-                if (ConfirmSignature(cert->source + cert->certBegin,
-                            cert->sigIndex - cert->certBegin,
-                        ca->publicKey, ca->pubKeySize, ca->keyOID,
-                        cert->signature, cert->sigLength, cert->signatureOID,
-                        cert->heap) != 0) {
-                    WOLFSSL_MSG("Confirm signature failed");
-                    return ASN_SIG_CONFIRM_E;
+                if ((ret = ConfirmSignature(&cert->sigCtx,
+                        cert->source + cert->certBegin,
+                        cert->sigIndex - cert->certBegin,
+                        cert->ca->publicKey, cert->ca->pubKeySize,
+                        cert->ca->keyOID, cert->signature,
+                        cert->sigLength, cert->signatureOID)) != 0) {
+                    if (ret != WC_PENDING_E) {
+                        WOLFSSL_MSG("Confirm signature failed");
+                    }
+                    return ret;
                 }
-                #ifndef IGNORE_NAME_CONSTRAINTS
+            #ifndef IGNORE_NAME_CONSTRAINTS
                 /* check that this cert's name is permitted by the signer's
                  * name constraints */
-                if (!ConfirmNameConstraints(ca, cert)) {
+                if (!ConfirmNameConstraints(cert->ca, cert)) {
                     WOLFSSL_MSG("Confirm name constraint failed");
                     return ASN_NAME_INVALID_E;
                 }
-                #endif /* IGNORE_NAME_CONSTRAINTS */
+            #endif /* IGNORE_NAME_CONSTRAINTS */
             }
         }
         else {
@@ -6040,7 +6091,7 @@ int ParseCertRelative(DecodedCert* cert, int type, int verify, void* cm)
     if (criticalExt != 0)
         return criticalExt;
 
-    return 0;
+    return ret;
 }
 
 /* Create and init an new signer */
@@ -8046,28 +8097,14 @@ static int WriteCertBody(DerCert* der, byte* buffer)
 
 
 /* Make RSA signature from buffer (sz), write to sig (sigSz) */
-static int MakeSignature(const byte* buffer, int sz, byte* sig, int sigSz,
-                         RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng,
-                         int sigAlgoType)
+static int MakeSignature(CertSignCtx* certSignCtx, const byte* buffer, int sz,
+    byte* sig, int sigSz, RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng,
+    int sigAlgoType, void* heap)
 {
-    int encSigSz, digestSz, typeH = 0, ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* digest;
-#else
-    byte digest[WC_MAX_DIGEST_SIZE]; /* max size */
-#endif
-#ifdef WOLFSSL_SMALL_STACK
-    byte* encSig;
-#else
-    byte encSig[MAX_DER_DIGEST_SZ];
-#endif
+    int digestSz = 0, typeH = 0, ret = 0;
 
-    (void)digest;
     (void)digestSz;
-    (void)encSig;
-    (void)encSigSz;
     (void)typeH;
-
     (void)buffer;
     (void)sz;
     (void)sig;
@@ -8076,119 +8113,141 @@ static int MakeSignature(const byte* buffer, int sz, byte* sig, int sigSz,
     (void)eccKey;
     (void)rng;
 
-#ifdef WOLFSSL_SMALL_STACK
-    digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (digest == NULL)
-        return 0; /* not confirmed */
-#endif
+    switch (certSignCtx->state) {
+    case CERTSIGN_STATE_BEGIN:
+    case CERTSIGN_STATE_DIGEST:
 
-    switch (sigAlgoType) {
-    #ifndef NO_MD5
-        case CTC_MD5wRSA:
-        if ((ret = wc_Md5Hash(buffer, sz, digest)) == 0) {
-            typeH    = MD5h;
-            digestSz = MD5_DIGEST_SIZE;
+        certSignCtx->state = CERTSIGN_STATE_DIGEST;
+        certSignCtx->digest = (byte*)XMALLOC(WC_MAX_DIGEST_SIZE, heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (certSignCtx->digest == NULL) {
+            ret = MEMORY_E; goto exit_ms;
         }
-        break;
-    #endif
-    #ifndef NO_SHA
-        case CTC_SHAwRSA:
-        case CTC_SHAwECDSA:
-        if ((ret = wc_ShaHash(buffer, sz, digest)) == 0) {
-            typeH    = SHAh;
-            digestSz = SHA_DIGEST_SIZE;
+
+        switch (sigAlgoType) {
+        #ifndef NO_MD5
+            case CTC_MD5wRSA:
+            if ((ret = wc_Md5Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = MD5h;
+                digestSz = MD5_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifndef NO_SHA
+            case CTC_SHAwRSA:
+            case CTC_SHAwECDSA:
+            if ((ret = wc_ShaHash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHAh;
+                digestSz = SHA_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifdef WOLFSSL_SHA224
+            case CTC_SHA224wRSA:
+            case CTC_SHA224wECDSA:
+            if ((ret = wc_Sha224Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA224h;
+                digestSz = SHA224_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifndef NO_SHA256
+            case CTC_SHA256wRSA:
+            case CTC_SHA256wECDSA:
+            if ((ret = wc_Sha256Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA256h;
+                digestSz = SHA256_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifdef WOLFSSL_SHA384
+            case CTC_SHA384wRSA:
+            case CTC_SHA384wECDSA:
+            if ((ret = wc_Sha384Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA384h;
+                digestSz = SHA384_DIGEST_SIZE;
+            }
+            break;
+        #endif
+        #ifdef WOLFSSL_SHA512
+            case CTC_SHA512wRSA:
+            case CTC_SHA512wECDSA:
+            if ((ret = wc_Sha512Hash(buffer, sz, certSignCtx->digest)) == 0) {
+                typeH    = SHA512h;
+                digestSz = SHA512_DIGEST_SIZE;
+            }
+            break;
+        #endif
+            default:
+                WOLFSSL_MSG("MakeSignautre called with unsupported type");
+                ret = ALGO_ID_E;
         }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA224
-        case CTC_SHA224wRSA:
-        case CTC_SHA224wECDSA:
-        if ((ret = wc_Sha224Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA224h;
-            digestSz = SHA224_DIGEST_SIZE;
+
+        /* set next state, since WC_PENDING rentry for these are not "call again" */
+        certSignCtx->state = CERTSIGN_STATE_ENCODE;
+        if (ret != 0) {
+            goto exit_ms;
         }
-        break;
-    #endif
-    #ifndef NO_SHA256
-        case CTC_SHA256wRSA:
-        case CTC_SHA256wECDSA:
-        if ((ret = wc_Sha256Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA256h;
-            digestSz = SHA256_DIGEST_SIZE;
+
+        /* fall-through */
+    case CERTSIGN_STATE_ENCODE:
+    #ifndef NO_RSA
+        if (rsaKey) {
+            certSignCtx->encSig = (byte*)XMALLOC(MAX_DER_DIGEST_SZ, heap,
+                DYNAMIC_TYPE_TMP_BUFFER);
+            if (certSignCtx->encSig == NULL) {
+                ret = MEMORY_E; goto exit_ms;
+            }
+
+            /* signature */
+            certSignCtx->encSigSz = wc_EncodeSignature(certSignCtx->encSig,
+                                          certSignCtx->digest, digestSz, typeH);
         }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA384
-        case CTC_SHA384wRSA:
-        case CTC_SHA384wECDSA:
-        if ((ret = wc_Sha384Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA384h;
-            digestSz = SHA384_DIGEST_SIZE;
+    #endif /* !NO_RSA */
+
+        /* fall-through */
+    case CERTSIGN_STATE_DO:
+        certSignCtx->state = CERTSIGN_STATE_DO;
+        ret = ALGO_ID_E; /* default to error */
+
+    #ifndef NO_RSA
+        if (rsaKey) {
+            /* signature */
+            ret = wc_RsaSSL_Sign(certSignCtx->encSig, certSignCtx->encSigSz,
+                                 sig, sigSz, rsaKey, rng);
         }
-        break;
-    #endif
-    #ifdef WOLFSSL_SHA512
-        case CTC_SHA512wRSA:
-        case CTC_SHA512wECDSA:
-        if ((ret = wc_Sha512Hash(buffer, sz, digest)) == 0) {
-            typeH    = SHA512h;
-            digestSz = SHA512_DIGEST_SIZE;
+    #endif /* !NO_RSA */
+
+    #ifdef HAVE_ECC
+        if (!rsaKey && eccKey) {
+            word32 outSz = sigSz;
+
+            ret = wc_ecc_sign_hash(certSignCtx->digest, digestSz,
+                                   sig, &outSz, rng, eccKey);
+            if (ret == 0)
+                ret = outSz;
         }
+    #endif /* HAVE_ECC */
         break;
-    #endif
-        default:
-            WOLFSSL_MSG("MakeSignautre called with unsupported type");
-            ret = ALGO_ID_E;
     }
 
-    if (ret != 0) {
-    #ifdef WOLFSSL_SMALL_STACK
-        XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    #endif
+exit_ms:
+
+    if (ret == WC_PENDING_E) {
         return ret;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    encSig = (byte*)XMALLOC(MAX_DER_DIGEST_SZ,
-                                                 NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (encSig == NULL) {
-        XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-
-    ret = ALGO_ID_E;
-
 #ifndef NO_RSA
     if (rsaKey) {
-        /* signature */
-        encSigSz = wc_EncodeSignature(encSig, digest, digestSz, typeH);
-        ret = 0;
-        do {
-#if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, rsaKey);
-#endif
-            if (ret >= 0) {
-                ret = wc_RsaSSL_Sign(encSig, encSigSz, sig, sigSz, rsaKey, rng);
-            }
-        } while (ret == WC_PENDING_E);
+        XFREE(certSignCtx->encSig, heap, DYNAMIC_TYPE_TMP_BUFFER);
     }
-#endif
+#endif /* !NO_RSA */
 
-#ifdef HAVE_ECC
-    if (!rsaKey && eccKey) {
-        word32 outSz = sigSz;
-        ret = wc_ecc_sign_hash(digest, digestSz, sig, &outSz, rng, eccKey);
+    XFREE(certSignCtx->digest, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    certSignCtx->digest = NULL;
 
-        if (ret == 0)
-            ret = outSz;
-    }
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(digest, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(encSig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    /* reset state */
+    certSignCtx->state = CERTSIGN_STATE_BEGIN;
 
     return ret;
 }
@@ -8562,35 +8621,57 @@ int wc_MakeCertReq(Cert* cert, byte* derBuffer, word32 derSz,
 int wc_SignCert(int requestSz, int sType, byte* buffer, word32 buffSz,
              RsaKey* rsaKey, ecc_key* eccKey, WC_RNG* rng)
 {
-    int sigSz;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* sig;
-#else
-    byte sig[MAX_ENCODED_SIG_SZ];
+    int sigSz = 0;
+    void* heap = NULL;
+    CertSignCtx* certSignCtx = NULL;
+#ifndef WOLFSSL_ASYNC_CRYPT
+    CertSignCtx  certSignCtx_lcl;
+    certSignCtx = &certSignCtx_lcl;
+    XMEMSET(certSignCtx, 0, sizeof(CertSignCtx));
 #endif
 
     if (requestSz < 0)
         return requestSz;
 
-#ifdef WOLFSSL_SMALL_STACK
-    sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sig == NULL)
-        return MEMORY_E;
-#endif
+    /* locate ctx */
+    if (rsaKey) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        certSignCtx = &rsaKey->certSignCtx;
+    #endif
+        heap = rsaKey->heap;
+    }
+    else if (eccKey) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        certSignCtx = &eccKey->certSignCtx;
+    #endif
+        heap = eccKey->heap;
+    }
 
-    sigSz = MakeSignature(buffer, requestSz, sig, MAX_ENCODED_SIG_SZ, rsaKey,
-                          eccKey, rng, sType);
+    if (certSignCtx == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (certSignCtx->sig == NULL) {
+        certSignCtx->sig = (byte*)XMALLOC(MAX_ENCODED_SIG_SZ, heap,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (certSignCtx->sig == NULL)
+            return MEMORY_E;
+    }
+
+    sigSz = MakeSignature(certSignCtx, buffer, requestSz, certSignCtx->sig,
+        MAX_ENCODED_SIG_SZ, rsaKey, eccKey, rng, sType, heap);
+    if (sigSz == WC_PENDING_E)
+        return sigSz;
 
     if (sigSz >= 0) {
         if (requestSz + MAX_SEQ_SZ * 2 + sigSz > (int)buffSz)
             sigSz = BUFFER_E;
         else
-            sigSz = AddSignature(buffer, requestSz, sig, sigSz, sType);
+            sigSz = AddSignature(buffer, requestSz, certSignCtx->sig, sigSz, sType);
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sig, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    XFREE(certSignCtx->sig, heap, DYNAMIC_TYPE_TMP_BUFFER);
+    certSignCtx->sig = NULL;
 
     return sigSz;
 }
@@ -8828,7 +8909,7 @@ int wc_SetAuthKeyIdFromCert(Cert *cert, const byte *der, int derSz)
 #endif
 
     /* decode certificate and get SKID that will be AKID of current cert */
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCert(decoded, CERT_TYPE, NO_VERIFY, 0);
     if (ret != 0) {
         FreeDecodedCert(decoded);
@@ -8980,7 +9061,7 @@ static int SetAltNamesFromCert(Cert* cert, const byte* der, int derSz)
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
@@ -9076,7 +9157,7 @@ static int SetDatesFromCert(Cert* cert, const byte* der, int derSz)
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
@@ -9132,7 +9213,7 @@ static int SetNameFromCert(CertName* cn, const byte* der, int derSz)
         return MEMORY_E;
 #endif
 
-    InitDecodedCert(decoded, (byte*)der, derSz, 0);
+    InitDecodedCert(decoded, (byte*)der, derSz, NULL);
     ret = ParseCertRelative(decoded, CA_TYPE, NO_VERIFY, 0);
 
     if (ret < 0) {
@@ -9325,7 +9406,7 @@ int StoreECC_DSA_Sig(byte* out, word32* outLen, mp_int* r, mp_int* s)
                    headerSz + 2))  /* SEQ_TAG + LEN(ENUM) */
         return BUFFER_E;
 
-    idx = SetSequence(rLen+rLeadingZero+sLen+sLeadingZero+headerSz, out);
+    idx = SetSequence(rLen + rLeadingZero + sLen+sLeadingZero + headerSz, out);
 
     /* store r */
     rSz = SetASNIntMP(r, -1, &out[idx]);
@@ -9351,17 +9432,21 @@ int DecodeECC_DSA_Sig(const byte* sig, word32 sigLen, mp_int* r, mp_int* s)
     word32 idx = 0;
     int    len = 0;
 
-    if (GetSequence(sig, &idx, &len, sigLen) < 0)
+    if (GetSequence(sig, &idx, &len, sigLen) < 0) {
         return ASN_ECC_KEY_E;
+    }
 
-    if ((word32)len > (sigLen - idx))
+    if ((word32)len > (sigLen - idx)) {
         return ASN_ECC_KEY_E;
+    }
 
-    if (GetInt(r, sig, &idx, sigLen) < 0)
+    if (GetInt(r, sig, &idx, sigLen) < 0) {
         return ASN_ECC_KEY_E;
+    }
 
-    if (GetInt(s, sig, &idx, sigLen) < 0)
+    if (GetInt(s, sig, &idx, sigLen) < 0) {
         return ASN_ECC_KEY_E;
+    }
 
     return 0;
 }
@@ -10057,9 +10142,11 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
             return ret;
         }
 
-        ret = ConfirmSignature(resp->response, resp->responseSz,
-                            cert.publicKey, cert.pubKeySize, cert.keyOID,
-                            resp->sig, resp->sigSz, resp->sigOID, NULL);
+        /* ConfirmSignature is blocking here */
+        ret = ConfirmSignature(&cert.sigCtx,
+            resp->response, resp->responseSz,
+            cert.publicKey, cert.pubKeySize, cert.keyOID,
+            resp->sig, resp->sigSz, resp->sigOID);
         FreeDecodedCert(&cert);
 
         if (ret != 0) {
@@ -10071,6 +10158,7 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
 #endif /* WOLFSSL_NO_OCSP_OPTIONAL_CERTS */
     {
         Signer* ca = NULL;
+        int sigValid = -1;
 
         #ifndef NO_SKID
             ca = GetCA(cm, resp->issuerKeyHash);
@@ -10078,9 +10166,16 @@ static int DecodeBasicOcspResponse(byte* source, word32* ioIndex,
             ca = GetCA(cm, resp->issuerHash);
         #endif
 
-        if (!ca || ConfirmSignature(resp->response, resp->responseSz,
-                    ca->publicKey, ca->pubKeySize, ca->keyOID,
-                    resp->sig, resp->sigSz, resp->sigOID, NULL) != 0) {
+        if (ca) {
+            SignatureCtx sigCtx;
+            InitSignatureCtx(&sigCtx, heap, INVALID_DEVID);
+
+            /* ConfirmSignature is blocking here */
+            sigValid = ConfirmSignature(&sigCtx, resp->response,
+                resp->responseSz, ca->publicKey, ca->pubKeySize, ca->keyOID,
+                                resp->sig, resp->sigSz, resp->sigOID);
+        }
+        if (ca == NULL || sigValid != 0) {
             WOLFSSL_MSG("\tOCSP Confirm signature failed");
             return ASN_OCSP_CONFIRM_E;
         }
@@ -10282,6 +10377,8 @@ int EncodeOcspRequest(OcspRequest* req, byte* output, word32 size)
 int InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce,
                                                                      void* heap)
 {
+    int ret;
+
     WOLFSSL_ENTER("InitOcspRequest");
 
     if (req == NULL)
@@ -10313,17 +10410,17 @@ int InitOcspRequest(OcspRequest* req, DecodedCert* cert, byte useNonce,
             XMEMCPY(req->url, cert->extAuthInfo, cert->extAuthInfoSz);
             req->urlSz = cert->extAuthInfoSz;
         }
-
     }
 
     if (useNonce) {
         WC_RNG rng;
 
-#ifdef WOLFSSL_STATIC_MEMORY
-        if (wc_InitRng_ex(&rng, req->heap) != 0) {
-#else
-        if (wc_InitRng(&rng) != 0) {
-#endif
+    #ifndef HAVE_FIPS
+        ret = wc_InitRng_ex(&rng, req->heap, INVALID_DEVID);
+    #else
+        ret = wc_InitRng(&rng);
+    #endif
+        if (ret != 0) {
             WOLFSSL_MSG("\tCannot initialize RNG. Skipping the OSCP Nonce.");
         } else {
             if (wc_RNG_GenerateBlock(&rng, req->nonce, MAX_OCSP_NONCE_SZ) != 0)
@@ -10577,7 +10674,7 @@ static int GetCRL_Signature(const byte* source, word32* idx, DecodedCRL* dcrl,
 /* prase crl buffer into decoded state, 0 on success */
 int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
 {
-    int     version, len, doNextDate = 1;
+    int     ret = 0, version, len, doNextDate = 1;
     word32  oid, idx = 0, dateIdx;
     Signer* ca = NULL;
 
@@ -10660,29 +10757,33 @@ int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
 
     /* openssl doesn't add skid by default for CRLs cause firefox chokes
        we're not assuming it's available yet */
-    #if !defined(NO_SKID) && defined(CRL_SKID_READY)
-        if (dcrl->extAuthKeyIdSet)
-            ca = GetCA(cm, dcrl->extAuthKeyId);
-        if (ca == NULL)
-            ca = GetCAByName(cm, dcrl->issuerHash);
-    #else /* NO_SKID */
-        ca = GetCA(cm, dcrl->issuerHash);
-    #endif /* NO_SKID */
+#if !defined(NO_SKID) && defined(CRL_SKID_READY)
+    if (dcrl->extAuthKeyIdSet)
+        ca = GetCA(cm, dcrl->extAuthKeyId);
+    if (ca == NULL)
+        ca = GetCAByName(cm, dcrl->issuerHash);
+#else
+    ca = GetCA(cm, dcrl->issuerHash);
+#endif /* !NO_SKID && CRL_SKID_READY */
     WOLFSSL_MSG("About to verify CRL signature");
 
     if (ca) {
+        SignatureCtx sigCtx;
+
         WOLFSSL_MSG("Found CRL issuer CA");
         /* try to confirm/verify signature */
-        #ifndef IGNORE_KEY_EXTENSIONS
-            if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) {
-                WOLFSSL_MSG("CA cannot sign CRLs");
-                return ASN_CRL_NO_SIGNER_E;
-            }
-        #endif /* IGNORE_KEY_EXTENSIONS */
-        if (ConfirmSignature(buff + dcrl->certBegin,
-                dcrl->sigIndex - dcrl->certBegin, ca->publicKey,
-                ca->pubKeySize, ca->keyOID, dcrl->signature, dcrl->sigLength,
-                                            dcrl->signatureOID, NULL) != 0) {
+    #ifndef IGNORE_KEY_EXTENSIONS
+        if ((ca->keyUsage & KEYUSE_CRL_SIGN) == 0) {
+            WOLFSSL_MSG("CA cannot sign CRLs");
+            return ASN_CRL_NO_SIGNER_E;
+        }
+    #endif /* IGNORE_KEY_EXTENSIONS */
+
+        InitSignatureCtx(&sigCtx, dcrl->heap, INVALID_DEVID);
+        if (ConfirmSignature(&sigCtx, buff + dcrl->certBegin,
+                dcrl->sigIndex - dcrl->certBegin,
+                ca->publicKey, ca->pubKeySize, ca->keyOID,
+                dcrl->signature, dcrl->sigLength, dcrl->signatureOID) != 0) {
             WOLFSSL_MSG("CRL Confirm signature failed");
             return ASN_CRL_CONFIRM_E;
         }
@@ -10692,7 +10793,7 @@ int ParseCRL(DecodedCRL* dcrl, const byte* buff, word32 sz, void* cm)
         return ASN_CRL_NO_SIGNER_E;
     }
 
-    return 0;
+    return ret;
 }
 
 #endif /* HAVE_CRL */
diff --git a/wolfcrypt/src/des3.c b/wolfcrypt/src/des3.c
old mode 100644
new mode 100755
index 005b03f33..739fb62d6
--- a/wolfcrypt/src/des3.c
+++ b/wolfcrypt/src/des3.c
@@ -26,95 +26,78 @@
 
 #include 
 
+
 #ifndef NO_DES3
 
 #include 
 
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
+    int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+    {
+        return Des_SetKey(des, key, iv, dir);
+    }
+    int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+    {
+        return Des3_SetKey_fips(des, key, iv, dir);
+    }
+    int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des_CbcEncrypt(des, out, in, sz);
+    }
+    int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des_CbcDecrypt(des, out, in, sz);
+    }
+    int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des3_CbcEncrypt_fips(des, out, in, sz);
+    }
+    int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        return Des3_CbcDecrypt_fips(des, out, in, sz);
+    }
 
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
-    return Des_SetKey(des, key, iv, dir);
-}
+    #ifdef WOLFSSL_DES_ECB
+        /* One block, compatibility only */
+        int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+        {
+            return Des_EcbEncrypt(des, out, in, sz);
+        }
+        int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+        {
+            return Des3_EcbEncrypt(des, out, in, sz);
+        }
+    #endif /* WOLFSSL_DES_ECB */
 
+    void wc_Des_SetIV(Des* des, const byte* iv)
+    {
+        Des_SetIV(des, iv);
+    }
+    int wc_Des3_SetIV(Des3* des, const byte* iv)
+    {
+        return Des3_SetIV_fips(des, iv);
+    }
 
-int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
-{
-    return Des3_SetKey_fips(des, key, iv, dir);
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    return Des_CbcEncrypt(des, out, in, sz);
-}
-
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    return Des_CbcDecrypt(des, out, in, sz);
-}
-
-
-int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    return Des3_CbcEncrypt_fips(des, out, in, sz);
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    return Des3_CbcDecrypt_fips(des, out, in, sz);
-}
-
-
-#ifdef WOLFSSL_DES_ECB
-
-/* One block, compatibility only */
-int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    return Des_EcbEncrypt(des, out, in, sz);
-}
-
-int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    return Des3_EcbEncrypt(des, out, in, sz);
-}
-#endif /* WOLFSSL_DES_ECB */
-
-
-void wc_Des_SetIV(Des* des, const byte* iv)
-{
-    Des_SetIV(des, iv);
-}
-
-
-int wc_Des3_SetIV(Des3* des, const byte* iv)
-{
-    return Des3_SetIV_fips(des, iv);
-}
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Des3 for use with Nitrox device */
-int wc_Des3AsyncInit(Des3* des3, int devId)
-{
-    return Des3AsyncInit(des3, devId);
-}
-
-
-/* Free Des3 from use with Nitrox device */
-void wc_Des3AsyncFree(Des3* des3)
-{
-    Des3AsyncFree(des3);
-}
-
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
+    int wc_Des3Init(Des3* des3, void* heap, int devId)
+    {
+        (void)des3;
+        (void)heap;
+        (void)devId;
+        /* FIPS doesn't support:
+            return Des3Init(des3, heap, devId); */
+        return 0;
+    }
+    void wc_Des3Free(Des3* des3)
+    {
+        (void)des3;
+        /* FIPS doesn't support:
+            Des3Free(des3); */
+    }
 
 #else /* build without fips */
 
+
 #if defined(WOLFSSL_TI_CRYPT)
     #include 
 #else
@@ -130,6 +113,7 @@ void wc_Des3AsyncFree(Des3* des3)
 #endif
 
 
+/* Hardware Acceleration */
 #if defined(STM32F2_CRYPTO) || defined(STM32F4_CRYPTO)
 
     /*
@@ -446,227 +430,224 @@ void wc_Des3AsyncFree(Des3* des3)
 
 #elif defined(HAVE_COLDFIRE_SEC)
 
-#include 
+    #include 
 
-#include "sec.h"
-#include "mcf5475_sec.h"
-#include "mcf5475_siu.h"
+    #include "sec.h"
+    #include "mcf5475_sec.h"
+    #include "mcf5475_siu.h"
 
-#if defined (HAVE_THREADX)
-#include "memory_pools.h"
-extern TX_BYTE_POOL mp_ncached;  /* Non Cached memory pool */
-#endif
-
-#define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64)
-static unsigned char *desBuffIn = NULL ;
-static unsigned char *desBuffOut = NULL ;
-static byte *secIV ;
-static byte *secKey ;
-static volatile SECdescriptorType *secDesc ;
-
-static wolfSSL_Mutex Mutex_DesSEC ;
-
-#define SEC_DESC_DES_CBC_ENCRYPT  0x20500010
-#define SEC_DESC_DES_CBC_DECRYPT  0x20400010
-#define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010
-#define SEC_DESC_DES3_CBC_DECRYPT 0x20600010
-
-#define DES_IVLEN 8
-#define DES_KEYLEN 8
-#define DES3_IVLEN 8
-#define DES3_KEYLEN 24
-
-extern volatile unsigned char __MBAR[];
-
-static void wc_Des_Cbc(byte* out, const byte* in, word32 sz,
-                    byte *key, byte *iv, word32 desc)
-{
-    #ifdef DEBUG_WOLFSSL
-    int ret ;  int stat1,stat2 ;
+    #if defined (HAVE_THREADX)
+    #include "memory_pools.h"
+    extern TX_BYTE_POOL mp_ncached;  /* Non Cached memory pool */
     #endif
-    int size ;
-    volatile int v ;
 
-    wc_LockMutex(&Mutex_DesSEC) ;
+    #define DES_BUFFER_SIZE (DES_BLOCK_SIZE * 64)
+    static unsigned char *desBuffIn = NULL;
+    static unsigned char *desBuffOut = NULL;
+    static byte *secIV;
+    static byte *secKey;
+    static volatile SECdescriptorType *secDesc;
 
-    secDesc->length1 = 0x0;
-    secDesc->pointer1 = NULL;
-    if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){
-        secDesc->length2 = DES_IVLEN ;
-        secDesc->length3 = DES_KEYLEN ;
-    } else {
-        secDesc->length2 = DES3_IVLEN ;
-        secDesc->length3 = DES3_KEYLEN ;
-    }
-    secDesc->pointer2 = secIV ;
-    secDesc->pointer3 = secKey;
-    secDesc->pointer4 = desBuffIn ;
-    secDesc->pointer5 = desBuffOut ;
-    secDesc->length6 = 0;
-    secDesc->pointer6 = NULL;
-    secDesc->length7 = 0x0;
-    secDesc->pointer7 = NULL;
-    secDesc->nextDescriptorPtr = NULL ;
+    static wolfSSL_Mutex Mutex_DesSEC;
 
-    while(sz) {
-        XMEMCPY(secIV, iv, secDesc->length2) ;
-        if((sz%DES_BUFFER_SIZE) == sz) {
-            size = sz ;
-            sz = 0 ;
-        } else {
-            size = DES_BUFFER_SIZE ;
-            sz -= DES_BUFFER_SIZE ;
-        }
+    #define SEC_DESC_DES_CBC_ENCRYPT  0x20500010
+    #define SEC_DESC_DES_CBC_DECRYPT  0x20400010
+    #define SEC_DESC_DES3_CBC_ENCRYPT 0x20700010
+    #define SEC_DESC_DES3_CBC_DECRYPT 0x20600010
 
-        XMEMCPY(desBuffIn, in, size) ;
-        XMEMCPY(secKey, key, secDesc->length3) ;
+    #define DES_IVLEN 8
+    #define DES_KEYLEN 8
+    #define DES3_IVLEN 8
+    #define DES3_KEYLEN 24
 
-        secDesc->header = desc ;
-        secDesc->length4 = size;
-        secDesc->length5 = size;
-        /* Point SEC to the location of the descriptor */
-        MCF_SEC_FR0 = (uint32)secDesc;
-        /* Initialize SEC and wait for encryption to complete */
-        MCF_SEC_CCCR0 = 0x0000001a;
-        /* poll SISR to determine when channel is complete */
-        v=0 ;
-        while((secDesc->header>> 24) != 0xff) {
-            if(v++ > 1000)break ;
-        }
+    extern volatile unsigned char __MBAR[];
 
-#ifdef DEBUG_WOLFSSL
-        ret = MCF_SEC_SISRH;
-        stat1 = MCF_SEC_DSR ;
-        stat2 = MCF_SEC_DISR ;
-        if(ret & 0xe0000000) {
-            /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2) ; */
-        }
-#endif
-
-        XMEMCPY(out, desBuffOut, size) ;
-
-        if((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) {
-            XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2) ;
-        } else {
-            XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2) ;
-        }
-
-        in  += size ;
-        out += size ;
-
-    }
-    wc_UnLockMutex(&Mutex_DesSEC) ;
-
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,  (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT) ;
-    return 0;
-}
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,   (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT) ;
-    return 0;
-}
-
-int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,  (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT) ;
-    return 0;
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz)
-{
-    wc_Des_Cbc(out, in, sz,   (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT) ;
-    return 0;
-}
-
-static void setParity(byte *buf, int len)
-{
-    int i, j ;
-    byte v ;
-    int bits ;
-
-    for(i=0; i> 1 ;
-        buf[i] = v << 1 ;
-        bits = 0 ;
-        for(j=0; j<7; j++)
-        {
-            bits += (v&0x1) ;
-            v = v >> 1 ;
+        #ifdef DEBUG_WOLFSSL
+        int ret;  int stat1,stat2;
+    	  #endif
+        int size;
+        volatile int v;
+
+        wc_LockMutex(&Mutex_DesSEC) ;
+
+        secDesc->length1 = 0x0;
+        secDesc->pointer1 = NULL;
+        if((desc==SEC_DESC_DES_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_DECRYPT)){
+            secDesc->length2 = DES_IVLEN;
+            secDesc->length3 = DES_KEYLEN;
+        } else {
+            secDesc->length2 = DES3_IVLEN;
+            secDesc->length3 = DES3_KEYLEN;
         }
-        buf[i] |= (1 - (bits&0x1)) ;
-    }
+        secDesc->pointer2 = secIV;
+        secDesc->pointer3 = secKey;
+        secDesc->pointer4 = desBuffIn;
+        secDesc->pointer5 = desBuffOut;
+        secDesc->length6 = 0;
+        secDesc->pointer6 = NULL;
+        secDesc->length7 = 0x0;
+        secDesc->pointer7 = NULL;
+        secDesc->nextDescriptorPtr = NULL;
 
-}
+        while(sz) {
+            XMEMCPY(secIV, iv, secDesc->length2);
+            if((sz%DES_BUFFER_SIZE) == sz) {
+                size = sz;
+                sz = 0;
+            } else {
+                size = DES_BUFFER_SIZE;
+                sz -= DES_BUFFER_SIZE;
+            }
 
+            XMEMCPY(desBuffIn, in, size);
+            XMEMCPY(secKey, key, secDesc->length3);
 
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
-    if(desBuffIn == NULL) {
-        #if defined (HAVE_THREADX)
-        int s1, s2, s3, s4, s5 ;
-        s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
-                                                     sizeof(SECdescriptorType), TX_NO_WAIT);
-        s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
-        s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
-        /* Don't know des or des3 to be used. Allocate larger buffers */
-        s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
-        s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
-        #else
-        #warning "Allocate non-Cache buffers"
+            secDesc->header = desc;
+            secDesc->length4 = size;
+            secDesc->length5 = size;
+            /* Point SEC to the location of the descriptor */
+            MCF_SEC_FR0 = (uint32)secDesc;
+            /* Initialize SEC and wait for encryption to complete */
+            MCF_SEC_CCCR0 = 0x0000001a;
+            /* poll SISR to determine when channel is complete */
+            v=0;
+            while((secDesc->header>> 24) != 0xff) {
+                if(v++ > 1000)break;
+            }
+
+        #ifdef DEBUG_WOLFSSL
+            ret = MCF_SEC_SISRH;
+            stat1 = MCF_SEC_DSR;
+            stat2 = MCF_SEC_DISR;
+            if(ret & 0xe0000000) {
+                /* db_printf("Des_Cbc(%x):ISRH=%08x, DSR=%08x, DISR=%08x\n", desc, ret, stat1, stat2); */
+            }
         #endif
 
-        wc_InitMutex(&Mutex_DesSEC) ;
+            XMEMCPY(out, desBuffOut, size);
+
+            if ((desc==SEC_DESC_DES3_CBC_ENCRYPT)||(desc==SEC_DESC_DES_CBC_ENCRYPT)) {
+                XMEMCPY((void*)iv, (void*)&(out[size-secDesc->length2]), secDesc->length2);
+            } else {
+                XMEMCPY((void*)iv, (void*)&(in[size-secDesc->length2]), secDesc->length2);
+            }
+
+            in  += size;
+            out += size;
+
+        }
+        wc_UnLockMutex(&Mutex_DesSEC) ;
+
     }
 
-    XMEMCPY(des->key, key, DES_KEYLEN);
-    setParity((byte *)des->key, DES_KEYLEN) ;
 
-    if (iv) {
-        XMEMCPY(des->reg, iv, DES_IVLEN);
-    }   else {
-        XMEMSET(des->reg, 0x0, DES_IVLEN) ;
+    int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,  (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_ENCRYPT);
+        return 0;
     }
-    return 0;
-}
 
-int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
-{
+    int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,   (byte *)des->key,  (byte *)des->reg, SEC_DESC_DES_CBC_DECRYPT);
+        return 0;
+    }
 
-    if(desBuffIn == NULL) {
+    int wc_Des3_CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,  (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_ENCRYPT);
+    	  return 0;
+    }
+
+
+    int wc_Des3_CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 sz)
+    {
+        wc_Des_Cbc(out, in, sz,   (byte *)des3->key,  (byte *)des3->reg, SEC_DESC_DES3_CBC_DECRYPT);
+    	  return 0;
+    }
+
+    static void setParity(byte *buf, int len)
+    {
+        int i, j;
+        byte v;
+        int bits;
+
+        for (i=0; i> 1;
+            buf[i] = v << 1;
+            bits = 0;
+            for (j=0; j<7; j++) {
+                bits += (v&0x1);
+                v = v >> 1;
+            }
+            buf[i] |= (1 - (bits&0x1));
+        }
+
+    }
+
+    int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
+    {
+        if(desBuffIn == NULL) {
         #if defined (HAVE_THREADX)
-        int s1, s2, s3, s4, s5 ;
-        s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
-                                                     sizeof(SECdescriptorType), TX_NO_WAIT);
-        s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
-        s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
-        s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
-        s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
+    			  int s1, s2, s3, s4, s5;
+            s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+                                                         sizeof(SECdescriptorType), TX_NO_WAIT);
+            s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
+            s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
+            /* Don't know des or des3 to be used. Allocate larger buffers */
+            s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
+            s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
         #else
-        #warning "Allocate non-Cache buffers"
+            #warning "Allocate non-Cache buffers"
         #endif
 
-        wc_InitMutex(&Mutex_DesSEC) ;
+            InitMutex(&Mutex_DesSEC);
+        }
+
+        XMEMCPY(des->key, key, DES_KEYLEN);
+        setParity((byte *)des->key, DES_KEYLEN);
+
+        if (iv) {
+            XMEMCPY(des->reg, iv, DES_IVLEN);
+        }   else {
+            XMEMSET(des->reg, 0x0, DES_IVLEN);
+        }
+    		return 0;
     }
 
-    XMEMCPY(des3->key[0], key, DES3_KEYLEN);
-    setParity((byte *)des3->key[0], DES3_KEYLEN) ;
+    int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
+    {
+
+        if(desBuffIn == NULL) {
+        #if defined (HAVE_THREADX)
+    			  int s1, s2, s3, s4, s5;
+            s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc,
+                                                         sizeof(SECdescriptorType), TX_NO_WAIT);
+            s1 = tx_byte_allocate(&mp_ncached,(void *)&desBuffIn,  DES_BUFFER_SIZE, TX_NO_WAIT);
+            s2 = tx_byte_allocate(&mp_ncached,(void *)&desBuffOut, DES_BUFFER_SIZE, TX_NO_WAIT);
+            s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey,     DES3_KEYLEN,TX_NO_WAIT);
+            s4 = tx_byte_allocate(&mp_ncached,(void *)&secIV,      DES3_IVLEN,  TX_NO_WAIT);
+        #else
+            #warning "Allocate non-Cache buffers"
+        #endif
+
+            InitMutex(&Mutex_DesSEC);
+        }
+
+        XMEMCPY(des3->key[0], key, DES3_KEYLEN);
+        setParity((byte *)des3->key[0], DES3_KEYLEN);
+
+        if (iv) {
+            XMEMCPY(des3->reg, iv, DES3_IVLEN);
+        }   else {
+            XMEMSET(des3->reg, 0x0, DES3_IVLEN);
+        }
+        return 0;
 
-    if (iv) {
-        XMEMCPY(des3->reg, iv, DES3_IVLEN);
-    }   else {
-        XMEMSET(des3->reg, 0x0, DES3_IVLEN) ;
     }
-    return 0;
-
-}
 #elif (defined FREESCALE_LTC_DES)
 
     #include "fsl_ltc.h"
@@ -753,7 +734,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
             return -1;
 
     }
-#elif defined FREESCALE_MMCAU
+#elif defined(FREESCALE_MMCAU)
     /*
      * Freescale mmCAU hardware DES/3DES support through the CAU/mmCAU library.
      * Documentation located in ColdFire/ColdFire+ CAU and Kinetis mmCAU
@@ -761,8 +742,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
      */
     #include "fsl_mmcau.h"
 
-    const unsigned char parityLookup[128] =
-    {
+    const unsigned char parityLookup[128] = {
         1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
         0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
         0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
@@ -824,7 +804,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
         byte temp_block[DES_BLOCK_SIZE];
 
         iv = (byte*)des->reg;
-        
+
         while (len > 0)
         {
             XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE);
@@ -897,7 +877,7 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
         byte temp_block[DES_BLOCK_SIZE];
 
         iv = (byte*)des->reg;
-        
+
         while (len > 0)
         {
             XMEMCPY(temp_block, in + offset, DES_BLOCK_SIZE);
@@ -969,13 +949,10 @@ int wc_Des3_SetKey(Des3* des3, const byte* key, const byte* iv, int dir)
 
     #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h"
 
-void wc_Des_SetIV(Des* des, const byte* iv);
-int  wc_Des3_SetIV(Des3* des, const byte* iv);
-
     int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
     {
-        word32 *dkey = des->key ;
-        word32 *dreg = des->reg ;
+        word32 *dkey = des->key;
+        word32 *dreg = des->reg;
 
         XMEMCPY((byte *)dkey, (byte *)key, 8);
         ByteReverseWords(dkey, dkey, 8);
@@ -988,12 +965,12 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
     int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
     {
         word32 *dkey1 = des->key[0];
-        word32 *dreg = des->reg ;
+        word32 *dreg = des->reg;
 
         XMEMCPY(dkey1, key, 24);
         ByteReverseWords(dkey1, dkey1, 24);
         XMEMCPY(dreg, iv, 8);
-        ByteReverseWords(dreg, dreg, 8) ;
+        ByteReverseWords(dreg, dreg, 8);
 
         return 0;
     }
@@ -1001,21 +978,21 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
     void DesCrypt(word32 *key, word32 *iv, byte* out, const byte* in, word32 sz,
                   int dir, int algo, int cryptoalgo)
     {
-        securityAssociation *sa_p ;
-        bufferDescriptor *bd_p ;
-        const byte *in_p, *in_l ;
-        byte *out_p, *out_l ;
+        securityAssociation *sa_p;
+        bufferDescriptor *bd_p;
+        const byte *in_p, *in_l;
+        byte *out_p, *out_l;
         volatile securityAssociation sa __attribute__((aligned (8)));
         volatile bufferDescriptor bd __attribute__((aligned (8)));
-        volatile int k ;
+        volatile int k;
 
         /* get uncached address */
 
         in_l = in;
-        out_l = out ;
-        sa_p = KVA0_TO_KVA1(&sa) ;
-        bd_p = KVA0_TO_KVA1(&bd) ;
-        in_p = KVA0_TO_KVA1(in_l) ;
+        out_l = out;
+        sa_p = KVA0_TO_KVA1(&sa);
+        bd_p = KVA0_TO_KVA1(&bd);
+        in_p = KVA0_TO_KVA1(in_l);
         out_p= KVA0_TO_KVA1(out_l);
 
         if(PIC32MZ_IF_RAM(in_p))
@@ -1024,13 +1001,13 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
 
         /* Set up the Security Association */
         XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
-        sa_p->SA_CTRL.ALGO = algo ;
+        sa_p->SA_CTRL.ALGO = algo;
         sa_p->SA_CTRL.LNC = 1;
         sa_p->SA_CTRL.LOADIV = 1;
         sa_p->SA_CTRL.FB = 1;
-        sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
+        sa_p->SA_CTRL.ENCTYPE = dir; /* Encryption/Decryption */
         sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
-        sa_p->SA_CTRL.KEYSIZE = 1 ; /* KEY is 192 bits */
+        sa_p->SA_CTRL.KEYSIZE = 1; /* KEY is 192 bits */
         XMEMCPY((byte *)KVA0_TO_KVA1(&sa.SA_ENCKEY[algo==PIC32_ALGO_TDES ? 2 : 6]),
                 (byte *)key, algo==PIC32_ALGO_TDES ? 24 : 8);
         XMEMCPY((byte *)KVA0_TO_KVA1(&sa.SA_ENCIV[2]), (byte *)iv, 8);
@@ -1043,30 +1020,30 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
         bd_p->BD_CTRL.LAST_BD = 1;
         bd_p->BD_CTRL.DESC_EN = 1;
 
-        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ; /* (unsigned int)sa_p; */
-        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ; /* (unsigned int)in_p; */
+        bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa); /* (unsigned int)sa_p; */
+        bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in); /* (unsigned int)in_p; */
         bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out); /* (unsigned int)out_p; */
         bd_p->NXTPTR = (unsigned int)KVA_TO_PA(&bd);
-        bd_p->MSGLEN = sz ;
+        bd_p->MSGLEN = sz;
 
         /* Fire in the hole! */
         CECON = 1 << 6;
         while (CECON);
 
         /* Run the engine */
-        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ; /* (unsigned int)bd_p ; */
+        CEBDPADDR = (unsigned int)KVA_TO_PA(&bd); /* (unsigned int)bd_p; */
         CEINTEN = 0x07;
         CECON = 0x27;
 
-        WAIT_ENGINE ;
+        WAIT_ENGINE;
 
         if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
            (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
            (cryptoalgo == PIC32_CRYPTOALGO_RCBC)) {
             /* set iv for the next call */
             if(dir == PIC32_ENCRYPTION) {
-                XMEMCPY((void *)iv, (void*)&(out_p[sz-DES_IVLEN]), DES_IVLEN) ;
-            } else {
+	            XMEMCPY((void *)iv, (void*)&(out_p[sz-DES_IVLEN]), DES_IVLEN);
+	        } else {
                 ByteReverseWords((word32*)iv, (word32 *)&(in_p[sz-DES_IVLEN]),
                                  DES_IVLEN);
             }
@@ -1103,559 +1080,575 @@ int  wc_Des3_SetIV(Des3* des, const byte* iv);
         return 0;
     }
 
-#else /* Begin wolfCrypt software implementation */
-
-/* permuted choice table (key) */
-static const byte pc1[] = {
-       57, 49, 41, 33, 25, 17,  9,
-        1, 58, 50, 42, 34, 26, 18,
-       10,  2, 59, 51, 43, 35, 27,
-       19, 11,  3, 60, 52, 44, 36,
-
-       63, 55, 47, 39, 31, 23, 15,
-        7, 62, 54, 46, 38, 30, 22,
-       14,  6, 61, 53, 45, 37, 29,
-       21, 13,  5, 28, 20, 12,  4
-};
-
-/* number left rotations of pc1 */
-static const byte totrot[] = {
-       1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
-};
-
-/* permuted choice key (table) */
-static const byte pc2[] = {
-       14, 17, 11, 24,  1,  5,
-        3, 28, 15,  6, 21, 10,
-       23, 19, 12,  4, 26,  8,
-       16,  7, 27, 20, 13,  2,
-       41, 52, 31, 37, 47, 55,
-       30, 40, 51, 45, 33, 48,
-       44, 49, 39, 56, 34, 53,
-       46, 42, 50, 36, 29, 32
-};
-
-/* End of DES-defined tables */
-
-/* bit 0 is left-most in byte */
-static const int bytebit[] = {
-       0200,0100,040,020,010,04,02,01
-};
-
-static const word32 Spbox[8][64] = {
-{
-0x01010400,0x00000000,0x00010000,0x01010404,
-0x01010004,0x00010404,0x00000004,0x00010000,
-0x00000400,0x01010400,0x01010404,0x00000400,
-0x01000404,0x01010004,0x01000000,0x00000004,
-0x00000404,0x01000400,0x01000400,0x00010400,
-0x00010400,0x01010000,0x01010000,0x01000404,
-0x00010004,0x01000004,0x01000004,0x00010004,
-0x00000000,0x00000404,0x00010404,0x01000000,
-0x00010000,0x01010404,0x00000004,0x01010000,
-0x01010400,0x01000000,0x01000000,0x00000400,
-0x01010004,0x00010000,0x00010400,0x01000004,
-0x00000400,0x00000004,0x01000404,0x00010404,
-0x01010404,0x00010004,0x01010000,0x01000404,
-0x01000004,0x00000404,0x00010404,0x01010400,
-0x00000404,0x01000400,0x01000400,0x00000000,
-0x00010004,0x00010400,0x00000000,0x01010004},
-{
-0x80108020,0x80008000,0x00008000,0x00108020,
-0x00100000,0x00000020,0x80100020,0x80008020,
-0x80000020,0x80108020,0x80108000,0x80000000,
-0x80008000,0x00100000,0x00000020,0x80100020,
-0x00108000,0x00100020,0x80008020,0x00000000,
-0x80000000,0x00008000,0x00108020,0x80100000,
-0x00100020,0x80000020,0x00000000,0x00108000,
-0x00008020,0x80108000,0x80100000,0x00008020,
-0x00000000,0x00108020,0x80100020,0x00100000,
-0x80008020,0x80100000,0x80108000,0x00008000,
-0x80100000,0x80008000,0x00000020,0x80108020,
-0x00108020,0x00000020,0x00008000,0x80000000,
-0x00008020,0x80108000,0x00100000,0x80000020,
-0x00100020,0x80008020,0x80000020,0x00100020,
-0x00108000,0x00000000,0x80008000,0x00008020,
-0x80000000,0x80100020,0x80108020,0x00108000},
-{
-0x00000208,0x08020200,0x00000000,0x08020008,
-0x08000200,0x00000000,0x00020208,0x08000200,
-0x00020008,0x08000008,0x08000008,0x00020000,
-0x08020208,0x00020008,0x08020000,0x00000208,
-0x08000000,0x00000008,0x08020200,0x00000200,
-0x00020200,0x08020000,0x08020008,0x00020208,
-0x08000208,0x00020200,0x00020000,0x08000208,
-0x00000008,0x08020208,0x00000200,0x08000000,
-0x08020200,0x08000000,0x00020008,0x00000208,
-0x00020000,0x08020200,0x08000200,0x00000000,
-0x00000200,0x00020008,0x08020208,0x08000200,
-0x08000008,0x00000200,0x00000000,0x08020008,
-0x08000208,0x00020000,0x08000000,0x08020208,
-0x00000008,0x00020208,0x00020200,0x08000008,
-0x08020000,0x08000208,0x00000208,0x08020000,
-0x00020208,0x00000008,0x08020008,0x00020200},
-{
-0x00802001,0x00002081,0x00002081,0x00000080,
-0x00802080,0x00800081,0x00800001,0x00002001,
-0x00000000,0x00802000,0x00802000,0x00802081,
-0x00000081,0x00000000,0x00800080,0x00800001,
-0x00000001,0x00002000,0x00800000,0x00802001,
-0x00000080,0x00800000,0x00002001,0x00002080,
-0x00800081,0x00000001,0x00002080,0x00800080,
-0x00002000,0x00802080,0x00802081,0x00000081,
-0x00800080,0x00800001,0x00802000,0x00802081,
-0x00000081,0x00000000,0x00000000,0x00802000,
-0x00002080,0x00800080,0x00800081,0x00000001,
-0x00802001,0x00002081,0x00002081,0x00000080,
-0x00802081,0x00000081,0x00000001,0x00002000,
-0x00800001,0x00002001,0x00802080,0x00800081,
-0x00002001,0x00002080,0x00800000,0x00802001,
-0x00000080,0x00800000,0x00002000,0x00802080},
-{
-0x00000100,0x02080100,0x02080000,0x42000100,
-0x00080000,0x00000100,0x40000000,0x02080000,
-0x40080100,0x00080000,0x02000100,0x40080100,
-0x42000100,0x42080000,0x00080100,0x40000000,
-0x02000000,0x40080000,0x40080000,0x00000000,
-0x40000100,0x42080100,0x42080100,0x02000100,
-0x42080000,0x40000100,0x00000000,0x42000000,
-0x02080100,0x02000000,0x42000000,0x00080100,
-0x00080000,0x42000100,0x00000100,0x02000000,
-0x40000000,0x02080000,0x42000100,0x40080100,
-0x02000100,0x40000000,0x42080000,0x02080100,
-0x40080100,0x00000100,0x02000000,0x42080000,
-0x42080100,0x00080100,0x42000000,0x42080100,
-0x02080000,0x00000000,0x40080000,0x42000000,
-0x00080100,0x02000100,0x40000100,0x00080000,
-0x00000000,0x40080000,0x02080100,0x40000100},
-{
-0x20000010,0x20400000,0x00004000,0x20404010,
-0x20400000,0x00000010,0x20404010,0x00400000,
-0x20004000,0x00404010,0x00400000,0x20000010,
-0x00400010,0x20004000,0x20000000,0x00004010,
-0x00000000,0x00400010,0x20004010,0x00004000,
-0x00404000,0x20004010,0x00000010,0x20400010,
-0x20400010,0x00000000,0x00404010,0x20404000,
-0x00004010,0x00404000,0x20404000,0x20000000,
-0x20004000,0x00000010,0x20400010,0x00404000,
-0x20404010,0x00400000,0x00004010,0x20000010,
-0x00400000,0x20004000,0x20000000,0x00004010,
-0x20000010,0x20404010,0x00404000,0x20400000,
-0x00404010,0x20404000,0x00000000,0x20400010,
-0x00000010,0x00004000,0x20400000,0x00404010,
-0x00004000,0x00400010,0x20004010,0x00000000,
-0x20404000,0x20000000,0x00400010,0x20004010},
-{
-0x00200000,0x04200002,0x04000802,0x00000000,
-0x00000800,0x04000802,0x00200802,0x04200800,
-0x04200802,0x00200000,0x00000000,0x04000002,
-0x00000002,0x04000000,0x04200002,0x00000802,
-0x04000800,0x00200802,0x00200002,0x04000800,
-0x04000002,0x04200000,0x04200800,0x00200002,
-0x04200000,0x00000800,0x00000802,0x04200802,
-0x00200800,0x00000002,0x04000000,0x00200800,
-0x04000000,0x00200800,0x00200000,0x04000802,
-0x04000802,0x04200002,0x04200002,0x00000002,
-0x00200002,0x04000000,0x04000800,0x00200000,
-0x04200800,0x00000802,0x00200802,0x04200800,
-0x00000802,0x04000002,0x04200802,0x04200000,
-0x00200800,0x00000000,0x00000002,0x04200802,
-0x00000000,0x00200802,0x04200000,0x00000800,
-0x04000002,0x04000800,0x00000800,0x00200002},
-{
-0x10001040,0x00001000,0x00040000,0x10041040,
-0x10000000,0x10001040,0x00000040,0x10000000,
-0x00040040,0x10040000,0x10041040,0x00041000,
-0x10041000,0x00041040,0x00001000,0x00000040,
-0x10040000,0x10000040,0x10001000,0x00001040,
-0x00041000,0x00040040,0x10040040,0x10041000,
-0x00001040,0x00000000,0x00000000,0x10040040,
-0x10000040,0x10001000,0x00041040,0x00040000,
-0x00041040,0x00040000,0x10041000,0x00001000,
-0x00000040,0x10040040,0x00001000,0x00041040,
-0x10001000,0x00000040,0x10000040,0x10040000,
-0x10040040,0x10000000,0x00040000,0x10001040,
-0x00000000,0x10041040,0x00040040,0x10000040,
-0x10040000,0x10001000,0x10001040,0x00000000,
-0x10041040,0x00041000,0x00041000,0x00001040,
-0x00001040,0x00040040,0x10000000,0x10041000}
-};
-
-
-static INLINE void IPERM(word32* left, word32* right)
-{
-    word32 work;
-
-    *right = rotlFixed(*right, 4U);
-    work = (*left ^ *right) & 0xf0f0f0f0;
-    *left ^= work;
-
-    *right = rotrFixed(*right^work, 20U);
-    work = (*left ^ *right) & 0xffff0000;
-    *left ^= work;
-
-    *right = rotrFixed(*right^work, 18U);
-    work = (*left ^ *right) & 0x33333333;
-    *left ^= work;
-
-    *right = rotrFixed(*right^work, 6U);
-    work = (*left ^ *right) & 0x00ff00ff;
-    *left ^= work;
-
-    *right = rotlFixed(*right^work, 9U);
-    work = (*left ^ *right) & 0xaaaaaaaa;
-    *left = rotlFixed(*left^work, 1U);
-    *right ^= work;
-}
-
-
-static INLINE void FPERM(word32* left, word32* right)
-{
-    word32 work;
-
-    *right = rotrFixed(*right, 1U);
-    work = (*left ^ *right) & 0xaaaaaaaa;
-    *right ^= work;
-
-    *left = rotrFixed(*left^work, 9U);
-    work = (*left ^ *right) & 0x00ff00ff;
-    *right ^= work;
-
-    *left = rotlFixed(*left^work, 6U);
-    work = (*left ^ *right) & 0x33333333;
-    *right ^= work;
-
-    *left = rotlFixed(*left^work, 18U);
-    work = (*left ^ *right) & 0xffff0000;
-    *right ^= work;
-
-    *left = rotlFixed(*left^work, 20U);
-    work = (*left ^ *right) & 0xf0f0f0f0;
-    *right ^= work;
-
-    *left = rotrFixed(*left^work, 4U);
-}
-
-
-static int DesSetKey(const byte* key, int dir, word32* out)
-{
-#ifdef WOLFSSL_SMALL_STACK
-    byte* buffer = (byte*)XMALLOC(56+56+8, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-
-    if (buffer == NULL)
-        return MEMORY_E;
 #else
-    byte buffer[56+56+8];
+    #define NEED_SOFT_DES
+
 #endif
 
+
+#ifdef NEED_SOFT_DES
+
+    /* permuted choice table (key) */
+    static const byte pc1[] = {
+           57, 49, 41, 33, 25, 17,  9,
+            1, 58, 50, 42, 34, 26, 18,
+           10,  2, 59, 51, 43, 35, 27,
+           19, 11,  3, 60, 52, 44, 36,
+
+           63, 55, 47, 39, 31, 23, 15,
+            7, 62, 54, 46, 38, 30, 22,
+           14,  6, 61, 53, 45, 37, 29,
+           21, 13,  5, 28, 20, 12,  4
+    };
+
+    /* number left rotations of pc1 */
+    static const byte totrot[] = {
+           1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
+    };
+
+    /* permuted choice key (table) */
+    static const byte pc2[] = {
+           14, 17, 11, 24,  1,  5,
+            3, 28, 15,  6, 21, 10,
+           23, 19, 12,  4, 26,  8,
+           16,  7, 27, 20, 13,  2,
+           41, 52, 31, 37, 47, 55,
+           30, 40, 51, 45, 33, 48,
+           44, 49, 39, 56, 34, 53,
+           46, 42, 50, 36, 29, 32
+    };
+
+    /* End of DES-defined tables */
+
+    /* bit 0 is left-most in byte */
+    static const int bytebit[] = {
+        0200,0100,040,020,010,04,02,01
+    };
+
+    static const word32 Spbox[8][64] = {
+    {   0x01010400,0x00000000,0x00010000,0x01010404,
+        0x01010004,0x00010404,0x00000004,0x00010000,
+        0x00000400,0x01010400,0x01010404,0x00000400,
+        0x01000404,0x01010004,0x01000000,0x00000004,
+        0x00000404,0x01000400,0x01000400,0x00010400,
+        0x00010400,0x01010000,0x01010000,0x01000404,
+        0x00010004,0x01000004,0x01000004,0x00010004,
+        0x00000000,0x00000404,0x00010404,0x01000000,
+        0x00010000,0x01010404,0x00000004,0x01010000,
+        0x01010400,0x01000000,0x01000000,0x00000400,
+        0x01010004,0x00010000,0x00010400,0x01000004,
+        0x00000400,0x00000004,0x01000404,0x00010404,
+        0x01010404,0x00010004,0x01010000,0x01000404,
+        0x01000004,0x00000404,0x00010404,0x01010400,
+        0x00000404,0x01000400,0x01000400,0x00000000,
+        0x00010004,0x00010400,0x00000000,0x01010004},
+    {   0x80108020,0x80008000,0x00008000,0x00108020,
+        0x00100000,0x00000020,0x80100020,0x80008020,
+        0x80000020,0x80108020,0x80108000,0x80000000,
+        0x80008000,0x00100000,0x00000020,0x80100020,
+        0x00108000,0x00100020,0x80008020,0x00000000,
+        0x80000000,0x00008000,0x00108020,0x80100000,
+        0x00100020,0x80000020,0x00000000,0x00108000,
+        0x00008020,0x80108000,0x80100000,0x00008020,
+        0x00000000,0x00108020,0x80100020,0x00100000,
+        0x80008020,0x80100000,0x80108000,0x00008000,
+        0x80100000,0x80008000,0x00000020,0x80108020,
+        0x00108020,0x00000020,0x00008000,0x80000000,
+        0x00008020,0x80108000,0x00100000,0x80000020,
+        0x00100020,0x80008020,0x80000020,0x00100020,
+        0x00108000,0x00000000,0x80008000,0x00008020,
+        0x80000000,0x80100020,0x80108020,0x00108000},
+    {   0x00000208,0x08020200,0x00000000,0x08020008,
+        0x08000200,0x00000000,0x00020208,0x08000200,
+        0x00020008,0x08000008,0x08000008,0x00020000,
+        0x08020208,0x00020008,0x08020000,0x00000208,
+        0x08000000,0x00000008,0x08020200,0x00000200,
+        0x00020200,0x08020000,0x08020008,0x00020208,
+        0x08000208,0x00020200,0x00020000,0x08000208,
+        0x00000008,0x08020208,0x00000200,0x08000000,
+        0x08020200,0x08000000,0x00020008,0x00000208,
+        0x00020000,0x08020200,0x08000200,0x00000000,
+        0x00000200,0x00020008,0x08020208,0x08000200,
+        0x08000008,0x00000200,0x00000000,0x08020008,
+        0x08000208,0x00020000,0x08000000,0x08020208,
+        0x00000008,0x00020208,0x00020200,0x08000008,
+        0x08020000,0x08000208,0x00000208,0x08020000,
+        0x00020208,0x00000008,0x08020008,0x00020200},
+    {   0x00802001,0x00002081,0x00002081,0x00000080,
+        0x00802080,0x00800081,0x00800001,0x00002001,
+        0x00000000,0x00802000,0x00802000,0x00802081,
+        0x00000081,0x00000000,0x00800080,0x00800001,
+        0x00000001,0x00002000,0x00800000,0x00802001,
+        0x00000080,0x00800000,0x00002001,0x00002080,
+        0x00800081,0x00000001,0x00002080,0x00800080,
+        0x00002000,0x00802080,0x00802081,0x00000081,
+        0x00800080,0x00800001,0x00802000,0x00802081,
+        0x00000081,0x00000000,0x00000000,0x00802000,
+        0x00002080,0x00800080,0x00800081,0x00000001,
+        0x00802001,0x00002081,0x00002081,0x00000080,
+        0x00802081,0x00000081,0x00000001,0x00002000,
+        0x00800001,0x00002001,0x00802080,0x00800081,
+        0x00002001,0x00002080,0x00800000,0x00802001,
+        0x00000080,0x00800000,0x00002000,0x00802080},
+    {   0x00000100,0x02080100,0x02080000,0x42000100,
+        0x00080000,0x00000100,0x40000000,0x02080000,
+        0x40080100,0x00080000,0x02000100,0x40080100,
+        0x42000100,0x42080000,0x00080100,0x40000000,
+        0x02000000,0x40080000,0x40080000,0x00000000,
+        0x40000100,0x42080100,0x42080100,0x02000100,
+        0x42080000,0x40000100,0x00000000,0x42000000,
+        0x02080100,0x02000000,0x42000000,0x00080100,
+        0x00080000,0x42000100,0x00000100,0x02000000,
+        0x40000000,0x02080000,0x42000100,0x40080100,
+        0x02000100,0x40000000,0x42080000,0x02080100,
+        0x40080100,0x00000100,0x02000000,0x42080000,
+        0x42080100,0x00080100,0x42000000,0x42080100,
+        0x02080000,0x00000000,0x40080000,0x42000000,
+        0x00080100,0x02000100,0x40000100,0x00080000,
+        0x00000000,0x40080000,0x02080100,0x40000100},
+    {   0x20000010,0x20400000,0x00004000,0x20404010,
+        0x20400000,0x00000010,0x20404010,0x00400000,
+        0x20004000,0x00404010,0x00400000,0x20000010,
+        0x00400010,0x20004000,0x20000000,0x00004010,
+        0x00000000,0x00400010,0x20004010,0x00004000,
+        0x00404000,0x20004010,0x00000010,0x20400010,
+        0x20400010,0x00000000,0x00404010,0x20404000,
+        0x00004010,0x00404000,0x20404000,0x20000000,
+        0x20004000,0x00000010,0x20400010,0x00404000,
+        0x20404010,0x00400000,0x00004010,0x20000010,
+        0x00400000,0x20004000,0x20000000,0x00004010,
+        0x20000010,0x20404010,0x00404000,0x20400000,
+        0x00404010,0x20404000,0x00000000,0x20400010,
+        0x00000010,0x00004000,0x20400000,0x00404010,
+        0x00004000,0x00400010,0x20004010,0x00000000,
+        0x20404000,0x20000000,0x00400010,0x20004010},
+    {   0x00200000,0x04200002,0x04000802,0x00000000,
+        0x00000800,0x04000802,0x00200802,0x04200800,
+        0x04200802,0x00200000,0x00000000,0x04000002,
+        0x00000002,0x04000000,0x04200002,0x00000802,
+        0x04000800,0x00200802,0x00200002,0x04000800,
+        0x04000002,0x04200000,0x04200800,0x00200002,
+        0x04200000,0x00000800,0x00000802,0x04200802,
+        0x00200800,0x00000002,0x04000000,0x00200800,
+        0x04000000,0x00200800,0x00200000,0x04000802,
+        0x04000802,0x04200002,0x04200002,0x00000002,
+        0x00200002,0x04000000,0x04000800,0x00200000,
+        0x04200800,0x00000802,0x00200802,0x04200800,
+        0x00000802,0x04000002,0x04200802,0x04200000,
+        0x00200800,0x00000000,0x00000002,0x04200802,
+        0x00000000,0x00200802,0x04200000,0x00000800,
+        0x04000002,0x04000800,0x00000800,0x00200002},
+    {   0x10001040,0x00001000,0x00040000,0x10041040,
+        0x10000000,0x10001040,0x00000040,0x10000000,
+        0x00040040,0x10040000,0x10041040,0x00041000,
+        0x10041000,0x00041040,0x00001000,0x00000040,
+        0x10040000,0x10000040,0x10001000,0x00001040,
+        0x00041000,0x00040040,0x10040040,0x10041000,
+        0x00001040,0x00000000,0x00000000,0x10040040,
+        0x10000040,0x10001000,0x00041040,0x00040000,
+        0x00041040,0x00040000,0x10041000,0x00001000,
+        0x00000040,0x10040040,0x00001000,0x00041040,
+        0x10001000,0x00000040,0x10000040,0x10040000,
+        0x10040040,0x10000000,0x00040000,0x10001040,
+        0x00000000,0x10041040,0x00040040,0x10000040,
+        0x10040000,0x10001000,0x10001040,0x00000000,
+        0x10041040,0x00041000,0x00041000,0x00001040,
+        0x00001040,0x00040040,0x10000000,0x10041000}
+    };
+
+    static INLINE void IPERM(word32* left, word32* right)
     {
-        byte* const  pc1m = buffer;               /* place to modify pc1 into */
-        byte* const  pcr  = pc1m + 56;            /* place to rotate pc1 into */
-        byte* const  ks   = pcr  + 56;
-        register int i, j, l;
-        int          m;
+        word32 work;
 
-        for (j = 0; j < 56; j++) {             /* convert pc1 to bits of key  */
-            l = pc1[j] - 1;                    /* integer bit location        */
-            m = l & 07;                        /* find bit                    */
-            pc1m[j] = (key[l >> 3] &           /* find which key byte l is in */
-                bytebit[m])                    /* and which bit of that byte  */
-                ? 1 : 0;                       /* and store 1-bit result      */
-        }
+        *right = rotlFixed(*right, 4U);
+        work = (*left ^ *right) & 0xf0f0f0f0;
+        *left ^= work;
 
-        for (i = 0; i < 16; i++) {            /* key chunk for each iteration */
-            XMEMSET(ks, 0, 8);                /* Clear key schedule */
+        *right = rotrFixed(*right^work, 20U);
+        work = (*left ^ *right) & 0xffff0000;
+        *left ^= work;
 
-            for (j = 0; j < 56; j++)          /* rotate pc1 the right amount  */
-                pcr[j] =
-                      pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28];
+        *right = rotrFixed(*right^work, 18U);
+        work = (*left ^ *right) & 0x33333333;
+        *left ^= work;
 
-            /* rotate left and right halves independently */
-            for (j = 0; j < 48; j++) {        /* select bits individually     */
-                if (pcr[pc2[j] - 1]) {        /* check bit that goes to ks[j] */
-                    l= j % 6;                 /* mask it in if it's there     */
-                    ks[j/6] |= bytebit[l] >> 2;
+        *right = rotrFixed(*right^work, 6U);
+        work = (*left ^ *right) & 0x00ff00ff;
+        *left ^= work;
+
+        *right = rotlFixed(*right^work, 9U);
+        work = (*left ^ *right) & 0xaaaaaaaa;
+        *left = rotlFixed(*left^work, 1U);
+        *right ^= work;
+    }
+
+    static INLINE void FPERM(word32* left, word32* right)
+    {
+        word32 work;
+
+        *right = rotrFixed(*right, 1U);
+        work = (*left ^ *right) & 0xaaaaaaaa;
+        *right ^= work;
+
+        *left = rotrFixed(*left^work, 9U);
+        work = (*left ^ *right) & 0x00ff00ff;
+        *right ^= work;
+
+        *left = rotlFixed(*left^work, 6U);
+        work = (*left ^ *right) & 0x33333333;
+        *right ^= work;
+
+        *left = rotlFixed(*left^work, 18U);
+        work = (*left ^ *right) & 0xffff0000;
+        *right ^= work;
+
+        *left = rotlFixed(*left^work, 20U);
+        work = (*left ^ *right) & 0xf0f0f0f0;
+        *right ^= work;
+
+        *left = rotrFixed(*left^work, 4U);
+    }
+
+    static int DesSetKey(const byte* key, int dir, word32* out)
+    {
+        #define DES_KEY_BUFFER_SIZE (56+56+8)
+    #ifdef WOLFSSL_SMALL_STACK
+        byte* buffer = (byte*)XMALLOC(DES_KEY_BUFFER_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+
+        if (buffer == NULL)
+            return MEMORY_E;
+    #else
+        byte buffer[DES_KEY_BUFFER_SIZE];
+    #endif
+
+        {
+            byte* const  pc1m = buffer;            /* place to modify pc1 into */
+            byte* const  pcr  = pc1m + 56;         /* place to rotate pc1 into */
+            byte* const  ks   = pcr  + 56;
+            register int i, j, l;
+            int          m;
+
+            for (j = 0; j < 56; j++) {             /* convert pc1 to bits of key  */
+                l = pc1[j] - 1;                    /* integer bit location        */
+                m = l & 07;                        /* find bit                    */
+                pc1m[j] = (key[l >> 3] &           /* find which key byte l is in */
+                    bytebit[m])                    /* and which bit of that byte  */
+                    ? 1 : 0;                       /* and store 1-bit result      */
+            }
+
+            for (i = 0; i < 16; i++) {            /* key chunk for each iteration */
+                XMEMSET(ks, 0, 8);                /* Clear key schedule */
+
+                for (j = 0; j < 56; j++)          /* rotate pc1 the right amount  */
+                    pcr[j] =
+                          pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l : l-28];
+
+                /* rotate left and right halves independently */
+                for (j = 0; j < 48; j++) {        /* select bits individually     */
+                    if (pcr[pc2[j] - 1]) {        /* check bit that goes to ks[j] */
+                        l= j % 6;                 /* mask it in if it's there     */
+                        ks[j/6] |= bytebit[l] >> 2;
+                    }
+                }
+
+                /* Now convert to odd/even interleaved form for use in F */
+                out[2*i] = ((word32) ks[0] << 24)
+                         | ((word32) ks[2] << 16)
+                         | ((word32) ks[4] << 8)
+                         | ((word32) ks[6]);
+
+                out[2*i + 1] = ((word32) ks[1] << 24)
+                             | ((word32) ks[3] << 16)
+                             | ((word32) ks[5] << 8)
+                             | ((word32) ks[7]);
+            }
+
+            /* reverse key schedule order */
+            if (dir == DES_DECRYPTION) {
+                for (i = 0; i < 16; i += 2) {
+                    word32 swap = out[i];
+                    out[i] = out[DES_KS_SIZE - 2 - i];
+                    out[DES_KS_SIZE - 2 - i] = swap;
+
+                    swap = out[i + 1];
+                    out[i + 1] = out[DES_KS_SIZE - 1 - i];
+                    out[DES_KS_SIZE - 1 - i] = swap;
                 }
             }
 
-            /* Now convert to odd/even interleaved form for use in F */
-            out[2*i] = ((word32) ks[0] << 24)
-                     | ((word32) ks[2] << 16)
-                     | ((word32) ks[4] << 8)
-                     | ((word32) ks[6]);
-
-            out[2*i + 1] = ((word32) ks[1] << 24)
-                         | ((word32) ks[3] << 16)
-                         | ((word32) ks[5] << 8)
-                         | ((word32) ks[7]);
+    #ifdef WOLFSSL_SMALL_STACK
+            XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
         }
 
-        /* reverse key schedule order */
-        if (dir == DES_DECRYPTION) {
-            for (i = 0; i < 16; i += 2) {
-                word32 swap = out[i];
-                out[i] = out[DES_KS_SIZE - 2 - i];
-                out[DES_KS_SIZE - 2 - i] = swap;
-
-                swap = out[i + 1];
-                out[i + 1] = out[DES_KS_SIZE - 1 - i];
-                out[DES_KS_SIZE - 1 - i] = swap;
-            }
-        }
-
-#ifdef WOLFSSL_SMALL_STACK
-        XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+        return 0;
     }
 
-    return 0;
-}
-
-
-static INLINE int Reverse(int dir)
-{
-    return !dir;
-}
-
-
-int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
-{
-    wc_Des_SetIV(des, iv);
-
-    return DesSetKey(key, dir, des->key);
-}
-
-
-int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
-{
-    int ret;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-        return NitroxDes3SetKey(des, key, iv);
-    }
-#endif
-
-    ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
-    if (ret != 0)
-        return ret;
-
-    ret = DesSetKey(key + 8, Reverse(dir), des->key[1]);
-    if (ret != 0)
-        return ret;
-
-    ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]);
-    if (ret != 0)
-        return ret;
-
-    return wc_Des3_SetIV(des, iv);
-}
-
-
-static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr)
-{
-    word32 l = *lIn, r = *rIn, i;
-
-    for (i=0; i<8; i++)
+    int wc_Des_SetKey(Des* des, const byte* key, const byte* iv, int dir)
     {
-        word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
-        l ^= Spbox[6][(work) & 0x3f]
-          ^  Spbox[4][(work >> 8) & 0x3f]
-          ^  Spbox[2][(work >> 16) & 0x3f]
-          ^  Spbox[0][(work >> 24) & 0x3f];
-        work = r ^ kptr[4*i+1];
-        l ^= Spbox[7][(work) & 0x3f]
-          ^  Spbox[5][(work >> 8) & 0x3f]
-          ^  Spbox[3][(work >> 16) & 0x3f]
-          ^  Spbox[1][(work >> 24) & 0x3f];
+        wc_Des_SetIV(des, iv);
 
-        work = rotrFixed(l, 4U) ^ kptr[4*i+2];
-        r ^= Spbox[6][(work) & 0x3f]
-          ^  Spbox[4][(work >> 8) & 0x3f]
-          ^  Spbox[2][(work >> 16) & 0x3f]
-          ^  Spbox[0][(work >> 24) & 0x3f];
-        work = l ^ kptr[4*i+3];
-        r ^= Spbox[7][(work) & 0x3f]
-          ^  Spbox[5][(work >> 8) & 0x3f]
-          ^  Spbox[3][(work >> 16) & 0x3f]
-          ^  Spbox[1][(work >> 24) & 0x3f];
+        return DesSetKey(key, dir, des->key);
     }
 
-    *lIn = l; *rIn = r;
-}
+    int wc_Des3_SetKey(Des3* des, const byte* key, const byte* iv, int dir)
+    {
+        int ret;
 
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
+            /* key_raw holds orignal key copy */
+            des->key_raw = key;
+            des->iv_raw = iv;
 
-static void DesProcessBlock(Des* des, const byte* in, byte* out)
-{
-    word32 l, r;
+            /* continue on to set normal key for smaller DES operations */
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    XMEMCPY(&l, in, sizeof(l));
-    XMEMCPY(&r, in + sizeof(l), sizeof(r));
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    IPERM(&l,&r);
+        ret = DesSetKey(key + (dir == DES_ENCRYPTION ? 0:16), dir, des->key[0]);
+        if (ret != 0)
+            return ret;
 
-    DesRawProcessBlock(&l, &r, des->key);
+        ret = DesSetKey(key + 8, !dir, des->key[1]);
+        if (ret != 0)
+            return ret;
 
-    FPERM(&l,&r);
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    XMEMCPY(out, &r, sizeof(r));
-    XMEMCPY(out + sizeof(r), &l, sizeof(l));
-}
+        ret = DesSetKey(key + (dir == DES_DECRYPTION ? 0:16), dir, des->key[2]);
+        if (ret != 0)
+            return ret;
 
-
-static void Des3ProcessBlock(Des3* des, const byte* in, byte* out)
-{
-    word32 l, r;
-
-    XMEMCPY(&l, in, sizeof(l));
-    XMEMCPY(&r, in + sizeof(l), sizeof(r));
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    IPERM(&l,&r);
-
-    DesRawProcessBlock(&l, &r, des->key[0]);
-    DesRawProcessBlock(&r, &l, des->key[1]);
-    DesRawProcessBlock(&l, &r, des->key[2]);
-
-    FPERM(&l,&r);
-    #ifdef LITTLE_ENDIAN_ORDER
-        l = ByteReverseWord32(l);
-        r = ByteReverseWord32(r);
-    #endif
-    XMEMCPY(out, &r, sizeof(r));
-    XMEMCPY(out + sizeof(r), &l, sizeof(l));
-}
-
-
-int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-
-    while (blocks--) {
-        xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
-        DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
-        XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-
-int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-
-    while (blocks--) {
-        XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
-        DesProcessBlock(des, (byte*)des->tmp, out);
-        xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
-        XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-
-int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-        return NitroxDes3CbcEncrypt(des, out, in, sz);
-    }
-#endif
-
-    blocks = sz / DES_BLOCK_SIZE;
-    while (blocks--) {
-        xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
-        Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
-        XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-
-int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks;
-
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES) {
-        return NitroxDes3CbcDecrypt(des, out, in, sz);
-    }
-#endif
-
-    blocks = sz / DES_BLOCK_SIZE;
-    while (blocks--) {
-        XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
-        Des3ProcessBlock(des, (byte*)des->tmp, out);
-        xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
-        XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
-
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
-
-#ifdef WOLFSSL_DES_ECB
-
-/* One block, compatibility only */
-int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-
-    if (des == NULL || out == NULL || in == NULL) {
-        return BAD_FUNC_ARG;
+        return wc_Des3_SetIV(des, iv);
     }
 
-    while (blocks--) {
-        DesProcessBlock(des, in, out);
+    static void DesRawProcessBlock(word32* lIn, word32* rIn, const word32* kptr)
+    {
+        word32 l = *lIn, r = *rIn, i;
 
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
-    }
-    return 0;
-}
+        for (i=0; i<8; i++)
+        {
+            word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
+            l ^= Spbox[6][(work) & 0x3f]
+              ^  Spbox[4][(work >> 8) & 0x3f]
+              ^  Spbox[2][(work >> 16) & 0x3f]
+              ^  Spbox[0][(work >> 24) & 0x3f];
+            work = r ^ kptr[4*i+1];
+            l ^= Spbox[7][(work) & 0x3f]
+              ^  Spbox[5][(work >> 8) & 0x3f]
+              ^  Spbox[3][(work >> 16) & 0x3f]
+              ^  Spbox[1][(work >> 24) & 0x3f];
 
-int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
-{
-    word32 blocks = sz / DES_BLOCK_SIZE;
-    /* printf("wc_Des3_EcbEncrypt(%016x, %016x, %d)\n",
-        *(unsigned long *)in, *(unsigned long *)out, sz) ; */
+            work = rotrFixed(l, 4U) ^ kptr[4*i+2];
+            r ^= Spbox[6][(work) & 0x3f]
+              ^  Spbox[4][(work >> 8) & 0x3f]
+              ^  Spbox[2][(work >> 16) & 0x3f]
+              ^  Spbox[0][(work >> 24) & 0x3f];
+            work = l ^ kptr[4*i+3];
+            r ^= Spbox[7][(work) & 0x3f]
+              ^  Spbox[5][(work >> 8) & 0x3f]
+              ^  Spbox[3][(work >> 16) & 0x3f]
+              ^  Spbox[1][(work >> 24) & 0x3f];
+        }
 
-    if (des == NULL || out == NULL || in == NULL) {
-        return BAD_FUNC_ARG;
+        *lIn = l; *rIn = r;
     }
 
-    while (blocks--) {
-        Des3ProcessBlock(des, in, out);
+    static void DesProcessBlock(Des* des, const byte* in, byte* out)
+    {
+        word32 l, r;
 
-        out += DES_BLOCK_SIZE;
-        in  += DES_BLOCK_SIZE;
+        XMEMCPY(&l, in, sizeof(l));
+        XMEMCPY(&r, in + sizeof(l), sizeof(r));
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        IPERM(&l,&r);
+
+        DesRawProcessBlock(&l, &r, des->key);
+
+        FPERM(&l,&r);
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        XMEMCPY(out, &r, sizeof(r));
+        XMEMCPY(out + sizeof(r), &l, sizeof(l));
     }
-    return 0;
-}
 
-#endif /* WOLFSSL_DES_ECB */
+    static void Des3ProcessBlock(Des3* des, const byte* in, byte* out)
+    {
+        word32 l, r;
 
-#endif /* End wolfCrypt software implementation */
+        XMEMCPY(&l, in, sizeof(l));
+        XMEMCPY(&r, in + sizeof(l), sizeof(r));
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        IPERM(&l,&r);
+
+        DesRawProcessBlock(&l, &r, des->key[0]);
+        DesRawProcessBlock(&r, &l, des->key[1]);
+        DesRawProcessBlock(&l, &r, des->key[2]);
+
+        FPERM(&l,&r);
+        #ifdef LITTLE_ENDIAN_ORDER
+            l = ByteReverseWord32(l);
+            r = ByteReverseWord32(r);
+        #endif
+        XMEMCPY(out, &r, sizeof(r));
+        XMEMCPY(out + sizeof(r), &l, sizeof(l));
+    }
+
+    int wc_Des_CbcEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks = sz / DES_BLOCK_SIZE;
+
+        while (blocks--) {
+            xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
+            DesProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
+            XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+    int wc_Des_CbcDecrypt(Des* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks = sz / DES_BLOCK_SIZE;
+
+        while (blocks--) {
+            XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
+            DesProcessBlock(des, (byte*)des->tmp, out);
+            xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+            XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+    int wc_Des3_CbcEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
+                                            sz >= WC_ASYNC_THRESH_DES3_CBC) {
+        #if defined(HAVE_CAVIUM)
+            return NitroxDes3CbcEncrypt(des, out, in, sz);
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymDes3CbcEncrypt(&des->asyncDev, out, in, sz,
+                des->key_raw, DES3_KEYLEN, (byte*)des->iv_raw, DES3_IVLEN);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &des->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_DES3_CBC_ENCRYPT;
+                testDev->des.des = des;
+                testDev->des.out = out;
+                testDev->des.in = in;
+                testDev->des.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        blocks = sz / DES_BLOCK_SIZE;
+        while (blocks--) {
+            xorbuf((byte*)des->reg, in, DES_BLOCK_SIZE);
+            Des3ProcessBlock(des, (byte*)des->reg, (byte*)des->reg);
+            XMEMCPY(out, des->reg, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+
+    int wc_Des3_CbcDecrypt(Des3* des, byte* out, const byte* in, word32 sz)
+    {
+        word32 blocks;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        if (des->asyncDev.marker == WOLFSSL_ASYNC_MARKER_3DES &&
+                                            sz >= WC_ASYNC_THRESH_DES3_CBC) {
+        #if defined(HAVE_CAVIUM)
+            return NitroxDes3CbcDecrypt(des, out, in, sz);
+        #elif defined(HAVE_INTEL_QA)
+            return IntelQaSymDes3CbcDecrypt(&des->asyncDev, out, in, sz,
+                des->key_raw, DES3_KEYLEN, (byte*)des->iv_raw, DES3_IVLEN);
+        #else /* WOLFSSL_ASYNC_CRYPT_TEST */
+            WC_ASYNC_TEST* testDev = &des->asyncDev.test;
+            if (testDev->type == ASYNC_TEST_NONE) {
+                testDev->type = ASYNC_TEST_DES3_CBC_DECRYPT;
+                testDev->des.des = des;
+                testDev->des.out = out;
+                testDev->des.in = in;
+                testDev->des.sz = sz;
+                return WC_PENDING_E;
+            }
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        blocks = sz / DES_BLOCK_SIZE;
+        while (blocks--) {
+            XMEMCPY(des->tmp, in, DES_BLOCK_SIZE);
+            Des3ProcessBlock(des, (byte*)des->tmp, out);
+            xorbuf(out, (byte*)des->reg, DES_BLOCK_SIZE);
+            XMEMCPY(des->reg, des->tmp, DES_BLOCK_SIZE);
+
+            out += DES_BLOCK_SIZE;
+            in  += DES_BLOCK_SIZE;
+        }
+        return 0;
+    }
+
+    #ifdef WOLFSSL_DES_ECB
+        /* One block, compatibility only */
+        int wc_Des_EcbEncrypt(Des* des, byte* out, const byte* in, word32 sz)
+        {
+            word32 blocks = sz / DES_BLOCK_SIZE;
+
+            if (des == NULL || out == NULL || in == NULL) {
+                return BAD_FUNC_ARG;
+            }
+
+            while (blocks--) {
+                DesProcessBlock(des, in, out);
+
+                out += DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+            }
+            return 0;
+        }
+
+        int wc_Des3_EcbEncrypt(Des3* des, byte* out, const byte* in, word32 sz)
+        {
+            word32 blocks = sz / DES_BLOCK_SIZE;
+            /* printf("wc_Des3_EcbEncrypt(%016x, %016x, %d)\n",
+                *(unsigned long *)in, *(unsigned long *)out, sz) ; */
+
+            if (des == NULL || out == NULL || in == NULL) {
+                return BAD_FUNC_ARG;
+            }
+
+            while (blocks--) {
+                Des3ProcessBlock(des, in, out);
+
+                out += DES_BLOCK_SIZE;
+                in  += DES_BLOCK_SIZE;
+            }
+            return 0;
+        }
+    #endif /* WOLFSSL_DES_ECB */
+
+#endif /* NEED_SOFT_DES */
 
 
 void wc_Des_SetIV(Des* des, const byte* iv)
@@ -1666,7 +1659,6 @@ void wc_Des_SetIV(Des* des, const byte* iv)
         XMEMSET(des->reg,  0, DES_BLOCK_SIZE);
 }
 
-
 int wc_Des3_SetIV(Des3* des, const byte* iv)
 {
     if (des && iv)
@@ -1678,28 +1670,35 @@ int wc_Des3_SetIV(Des3* des, const byte* iv)
 }
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Des3 for use with Nitrox device */
-int wc_Des3AsyncInit(Des3* des3, int devId)
+/* Initialize Des3 for use with async device */
+int wc_Des3Init(Des3* des3, void* heap, int devId)
 {
+    int ret = 0;
     if (des3 == NULL)
         return BAD_FUNC_ARG;
 
-    return wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES, devId);
+    des3->heap = heap;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+    ret = wolfAsync_DevCtxInit(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES,
+                                                        des3->heap, devId);
+#else
+    (void)devId;
+#endif
+
+    return ret;
 }
 
-
-/* Free Des3 from use with Nitrox device */
-void wc_Des3AsyncFree(Des3* des3)
+/* Free Des3 from use with async device */
+void wc_Des3Free(Des3* des3)
 {
     if (des3 == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&des3->asyncDev);
-}
-
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_3DES)
+    wolfAsync_DevCtxFree(&des3->asyncDev, WOLFSSL_ASYNC_MARKER_3DES);
 #endif /* WOLFSSL_ASYNC_CRYPT */
+}
 
 #endif /* WOLFSSL_TI_CRYPT */
 #endif /* HAVE_FIPS */
diff --git a/wolfcrypt/src/dh.c b/wolfcrypt/src/dh.c
old mode 100644
new mode 100755
index a3e2e4619..836659a6b
--- a/wolfcrypt/src/dh.c
+++ b/wolfcrypt/src/dh.c
@@ -49,26 +49,43 @@
 #endif
 
 
-int wc_InitDhKey(DhKey* key)
+int wc_InitDhKey_ex(DhKey* key, void* heap, int devId)
 {
     int ret = 0;
 
     if (key == NULL)
         return BAD_FUNC_ARG;
 
+    key->heap = heap; /* for XMALLOC/XFREE in future */
+
     if (mp_init_multi(&key->p, &key->g, NULL, NULL, NULL, NULL) != MP_OKAY)
-        ret = MEMORY_E;
+        return MEMORY_E;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+    /* handle as async */
+    ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH,
+        key->heap, devId);
+#else
+    (void)devId;
+#endif
 
     return ret;
 }
 
+int wc_InitDhKey(DhKey* key)
+{
+    return wc_InitDhKey_ex(key, NULL, INVALID_DEVID);
+}
+
 
 void wc_FreeDhKey(DhKey* key)
 {
     if (key) {
-    #ifndef USE_FAST_MATH
         mp_clear(&key->p);
         mp_clear(&key->g);
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+        wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_DH);
     #endif
     }
 }
@@ -76,15 +93,15 @@ void wc_FreeDhKey(DhKey* key)
 
 /* if defined to not use floating point values do not compile in */
 #ifndef WOLFSSL_DH_CONST
-static word32 DiscreteLogWorkFactor(word32 n)
-{
-    /* assuming discrete log takes about the same time as factoring */
-    if (n<5)
-        return 0;
-    else
-        return (word32)(2.4 * XPOW((double)n, 1.0/3.0) *
-                XPOW(XLOG((double)n), 2.0/3.0) - 5);
-}
+    static word32 DiscreteLogWorkFactor(word32 n)
+    {
+        /* assuming discrete log takes about the same time as factoring */
+        if (n < 5)
+            return 0;
+        else
+            return (word32)(2.4 * XPOW((double)n, 1.0/3.0) *
+                    XPOW(XLOG((double)n), 2.0/3.0) - 5);
+    }
 #endif /* WOLFSSL_DH_CONST*/
 
 
@@ -104,9 +121,9 @@ static word32 DiscreteLogWorkFactor(word32 n)
 #endif
 
 
-static int GeneratePrivate(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz)
+static int GeneratePrivateDh(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz)
 {
-    int ret;
+    int ret = 0;
     word32 sz = mp_unsigned_bin_size(&key->p);
 
     /* Table of predetermined values from the operation
@@ -124,33 +141,31 @@ static int GeneratePrivate(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz)
         case 896:  sz = 49; break;
         case 1024: sz = 52; break;
         default:
-            #ifndef WOLFSSL_DH_CONST
-                /* if using floating points and size of p is not in table */
-                sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
-                                           WOLFSSL_BIT_SIZE + 1);
-                break;
-            #else
-                return BAD_FUNC_ARG;
-            #endif
+        #ifndef WOLFSSL_DH_CONST
+            /* if using floating points and size of p is not in table */
+            sz = min(sz, 2 * DiscreteLogWorkFactor(sz * WOLFSSL_BIT_SIZE) /
+                                       WOLFSSL_BIT_SIZE + 1);
+            break;
+        #else
+            return BAD_FUNC_ARG;
+        #endif
     }
 
     ret = wc_RNG_GenerateBlock(rng, priv, sz);
-    if (ret != 0)
-        return ret;
 
-    priv[0] |= 0x0C;
+    if (ret == 0) {
+        priv[0] |= 0x0C;
+        *privSz = sz;
+    }
 
-    *privSz = sz;
-
-    return 0;
+    return ret;
 }
 
 
-static int GeneratePublic(DhKey* key, const byte* priv, word32 privSz,
-                          byte* pub, word32* pubSz)
+static int GeneratePublicDh(DhKey* key, byte* priv, word32 privSz,
+    byte* pub, word32* pubSz)
 {
     int ret = 0;
-
     mp_int x;
     mp_int y;
 
@@ -175,22 +190,75 @@ static int GeneratePublic(DhKey* key, const byte* priv, word32 privSz,
     return ret;
 }
 
-
-int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng, byte* priv, word32* privSz,
-                      byte* pub, word32* pubSz)
+static int wc_DhGenerateKeyPair_Sync(DhKey* key, WC_RNG* rng,
+    byte* priv, word32* privSz, byte* pub, word32* pubSz)
 {
     int ret;
 
     if (key == NULL || rng == NULL || priv == NULL || privSz == NULL ||
-            pub == NULL || pubSz == NULL) {
+        pub == NULL || pubSz == NULL) {
         return BAD_FUNC_ARG;
     }
 
-    ret = GeneratePrivate(key, rng, priv, privSz);
+    ret = GeneratePrivateDh(key, rng, priv, privSz);
 
-    return (ret != 0) ? ret : GeneratePublic(key, priv, *privSz, pub, pubSz);
+    return (ret != 0) ? ret : GeneratePublicDh(key, priv, *privSz, pub, pubSz);
 }
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+static int wc_DhGenerateKeyPair_Async(DhKey* key, WC_RNG* rng,
+    byte* priv, word32* privSz, byte* pub, word32* pubSz)
+{
+    int ret;
+
+    (void)rng;
+
+#ifdef HAVE_CAVIUM
+    /* TODO: Not implemented - use software for now */
+
+#elif defined(HAVE_INTEL_QA)
+    {
+        mp_int x;
+        ret = mp_init(&x);
+        if (ret != MP_OKAY)
+            return ret;
+
+        ret = GeneratePrivateDh(key, rng, priv, privSz);
+        if (ret == 0)
+            ret = mp_read_unsigned_bin(&x, priv, *privSz);
+        if (ret == MP_OKAY)
+            ret = wc_mp_to_bigint(&x, &x.raw);
+        if (ret == MP_OKAY)
+            ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+        if (ret == MP_OKAY)
+            ret = wc_mp_to_bigint(&key->g, &key->g.raw);
+        if (ret == MP_OKAY)
+            ret = IntelQaDhKeyGen(&key->asyncDev, &key->p.raw, &key->g.raw,
+                &x.raw, pub, pubSz);
+        mp_clear(&x);
+
+        return ret;
+    }
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+    WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+    if (testDev->type == ASYNC_TEST_NONE) {
+        testDev->type = ASYNC_TEST_DH_GEN;
+        testDev->dhGen.key = key;
+        testDev->dhGen.rng = rng;
+        testDev->dhGen.priv = priv;
+        testDev->dhGen.privSz = privSz;
+        testDev->dhGen.pub = pub;
+        testDev->dhGen.pubSz = pubSz;
+        return WC_PENDING_E;
+    }
+#endif
+
+    ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz);
+
+    return ret;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
 
 /* Check DH Public Key for invalid numbers
  *
@@ -242,11 +310,34 @@ int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz)
 }
 
 
-int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
-            word32 privSz, const byte* otherPub, word32 pubSz)
+int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng,
+    byte* priv, word32* privSz, byte* pub, word32* pubSz)
+{
+    int ret;
+
+    if (key == NULL || rng == NULL || priv == NULL || privSz == NULL ||
+                                                pub == NULL || pubSz == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) {
+        ret = wc_DhGenerateKeyPair_Async(key, rng, priv, privSz, pub, pubSz);
+    }
+    else
+#endif
+    {
+        ret = wc_DhGenerateKeyPair_Sync(key, rng, priv, privSz, pub, pubSz);
+    }
+
+    return ret;
+}
+
+
+static int wc_DhAgree_Sync(DhKey* key, byte* agree, word32* agreeSz,
+    const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz)
 {
     int ret = 0;
-
     mp_int x;
     mp_int y;
     mp_int z;
@@ -276,7 +367,65 @@ int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
 
     mp_clear(&z);
     mp_clear(&y);
-    mp_clear(&x);
+    mp_forcezero(&x);
+
+    return ret;
+}
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+static int wc_DhAgree_Async(DhKey* key, byte* agree, word32* agreeSz,
+    const byte* priv, word32 privSz, const byte* otherPub, word32 pubSz)
+{
+    int ret;
+
+#ifdef HAVE_CAVIUM
+    /* TODO: Not implemented - use software for now */
+    ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+
+#elif defined(HAVE_INTEL_QA)
+    ret = wc_mp_to_bigint(&key->p, &key->p.raw);
+    if (ret == MP_OKAY)
+        ret = IntelQaDhAgree(&key->asyncDev, &key->p.raw,
+            agree, agreeSz, priv, privSz, otherPub, pubSz);
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+    WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+    if (testDev->type == ASYNC_TEST_NONE) {
+        testDev->type = ASYNC_TEST_DH_AGREE;
+        testDev->dhAgree.key = key;
+        testDev->dhAgree.agree = agree;
+        testDev->dhAgree.agreeSz = agreeSz;
+        testDev->dhAgree.priv = priv;
+        testDev->dhAgree.privSz = privSz;
+        testDev->dhAgree.otherPub = otherPub;
+        testDev->dhAgree.pubSz = pubSz;
+        return WC_PENDING_E;
+    }
+    ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+#endif
+
+    return ret;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
+            word32 privSz, const byte* otherPub, word32 pubSz)
+{
+    int ret = 0;
+
+    if (key == NULL || agree == NULL || agreeSz == NULL || priv == NULL ||
+                                                            otherPub == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_DH)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_DH) {
+        ret = wc_DhAgree_Async(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+    }
+    else
+#endif
+    {
+        ret = wc_DhAgree_Sync(key, agree, agreeSz, priv, privSz, otherPub, pubSz);
+    }
 
     return ret;
 }
@@ -286,8 +435,9 @@ int wc_DhAgree(DhKey* key, byte* agree, word32* agreeSz, const byte* priv,
 int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
                 word32 gSz)
 {
-    if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0)
+    if (key == NULL || p == NULL || g == NULL || pSz == 0 || gSz == 0) {
         return BAD_FUNC_ARG;
+    }
 
     /* may have leading 0 */
     if (p[0] == 0) {
@@ -318,6 +468,4 @@ int wc_DhSetKey(DhKey* key, const byte* p, word32 pSz, const byte* g,
     return 0;
 }
 
-
 #endif /* NO_DH */
-
diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c
old mode 100644
new mode 100755
index 500088306..e62af0189
--- a/wolfcrypt/src/ecc.c
+++ b/wolfcrypt/src/ecc.c
@@ -1016,6 +1016,9 @@ enum ecc_curve_load_mask {
 #ifdef ECC_CACHE_CURVE
     /* cache (mp_int) of the curve parameters */
     static ecc_curve_spec* ecc_curve_spec_cache[ECC_SET_COUNT];
+    #ifndef SINGLE_THREADED
+        static wolfSSL_Mutex ecc_curve_cache_mutex;
+    #endif
 
     #define DECLARE_CURVE_SPECS(intcount) ecc_curve_spec* curve = NULL;
 #else
@@ -1034,8 +1037,6 @@ static void _wc_ecc_curve_free(ecc_curve_spec* curve)
         return;
     }
 
-    /* don't clear fast math (only normal math uses alloc's) */
-#if !defined(USE_FAST_MATH)
     if (curve->load_mask & ECC_CURVE_FIELD_PRIME)
         mp_clear(curve->prime);
     if (curve->load_mask & ECC_CURVE_FIELD_AF)
@@ -1050,7 +1051,7 @@ static void _wc_ecc_curve_free(ecc_curve_spec* curve)
         mp_clear(curve->Gx);
     if (curve->load_mask & ECC_CURVE_FIELD_GY)
         mp_clear(curve->Gy);
-#endif
+
     curve->load_mask = 0;
 }
 
@@ -1082,6 +1083,11 @@ static int wc_ecc_curve_load_item(const char* src, mp_int** dst,
         curve->load_mask |= mask;
 
         err = mp_read_radix(*dst, src, 16);
+
+    #ifdef HAVE_WOLF_BIGINT
+        if (err == MP_OKAY)
+            err = wc_mp_to_bigint(*dst, &(*dst)->raw);
+    #endif
     }
     return err;
 }
@@ -1091,7 +1097,7 @@ static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
 {
     int ret = 0, x;
     ecc_curve_spec* curve;
-    byte load_items; /* mask of items to load */
+    byte load_items = 0; /* mask of items to load */
 
     if (dp == NULL || pCurve == NULL)
         return BAD_FUNC_ARG;
@@ -1133,8 +1139,16 @@ static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
     }
     curve->dp = dp; /* set dp info */
 
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    ret = wc_LockMutex(&ecc_curve_cache_mutex);
+    if (ret != 0) {
+        return MEMORY_E;
+    }
+#endif
+
     /* determine items to load */
     load_items = (~curve->load_mask & load_mask);
+    curve->load_mask |= load_items;
 
     /* load items */
     x = 0;
@@ -1165,10 +1179,23 @@ static int wc_ecc_curve_load(const ecc_set_type* dp, ecc_curve_spec** pCurve,
         ret = MP_READ_E;
     }
 
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    wc_UnLockMutex(&ecc_curve_cache_mutex);
+#endif
+
     return ret;
 }
 
 #ifdef ECC_CACHE_CURVE
+int wc_ecc_curve_cache_init(void)
+{
+    int ret = 0;
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    ret = wc_InitMutex(&ecc_curve_cache_mutex);
+#endif
+    return ret;
+}
+
 void wc_ecc_curve_cache_free(void)
 {
     int x;
@@ -1181,6 +1208,10 @@ void wc_ecc_curve_cache_free(void)
             ecc_curve_spec_cache[x] = NULL;
         }
     }
+
+#if defined(ECC_CACHE_CURVE) && !defined(SINGLE_THREADED)
+    wc_FreeMutex(&ecc_curve_cache_mutex);
+#endif
 }
 #endif /* ECC_CACHE_CURVE */
 
@@ -1225,7 +1256,8 @@ int wc_ecc_set_curve(ecc_key* key, int keysize, int curve_id)
             }
         }
         if (ecc_sets[x].size == 0) {
-            return ECC_BAD_ARG_E;
+            WOLFSSL_MSG("ECC Curve not found");
+            return ECC_CURVE_OID_E;
         }
 
         key->idx = x;
@@ -1279,10 +1311,8 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
        if ( (mp_cmp(P->x, Q->x) == MP_EQ) &&
             (get_digit_count(Q->z) && mp_cmp(P->z, Q->z) == MP_EQ) &&
             (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, &t1) == MP_EQ)) {
-       #ifndef USE_FAST_MATH
            mp_clear(&t1);
            mp_clear(&t2);
-       #endif
           return ecc_projective_dbl_point(P, R, a, modulus, mp);
        }
    }
@@ -1508,11 +1538,10 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R,
 #endif
 
 done:
-#ifndef USE_FAST_MATH
+
    /* clean up */
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -1570,10 +1599,8 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a,
    z = &rz;
 
    if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) {
-   #ifndef USE_FAST_MATH
        mp_clear(&t1);
        mp_clear(&t2);
-   #endif
        return err;
    }
 #else
@@ -1780,11 +1807,9 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* a,
        err = mp_copy(z, R->z);
 #endif
 
-#ifndef USE_FAST_MATH
    /* clean up */
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -1892,10 +1917,8 @@ int ecc_map(ecc_point* P, mp_int* modulus, mp_digit mp)
 
 done:
   /* clean up */
-#ifndef USE_FAST_MATH
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -1907,7 +1930,7 @@ done:
     !defined(__cplusplus)
     /* let's use the one we already have */
     extern const wolfssl_word wc_off_on_addr[2];
-#elif defined(ECC_TIMING_RESISTANT)
+#else
     static const wolfssl_word wc_off_on_addr[2] =
     {
     #if defined(WC_64BIT_CPU)
@@ -1979,9 +2002,7 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
        return err;
    }
    if ((err = mp_montgomery_calc_normalization(&mu, modulus)) != MP_OKAY) {
-   #ifndef USE_FAST_MATH
        mp_clear(&mu);
-   #endif
        return err;
    }
 
@@ -1989,9 +2010,7 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
   for (i = 0; i < M_POINTS; i++) {
       M[i] = wc_ecc_new_point_h(heap);
       if (M[i] == NULL) {
-      #ifndef USE_FAST_MATH
          mp_clear(&mu);
-      #endif
          err = MEMORY_E; goto exit;
       }
   }
@@ -2018,10 +2037,8 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R,
        }
    }
 
-#ifndef USE_FAST_MATH
    /* done with mu */
    mp_clear(&mu);
-#endif
 
 #ifndef ECC_TIMING_RESISTANT
 
@@ -2689,31 +2706,12 @@ int wc_ecc_shared_secret(ecc_key* private_key, ecc_key* public_key, byte* out,
       return ECC_BAD_ARG_E;
    }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
-    #ifdef HAVE_CAVIUM
-        /* TODO: Not implemented */
-    #else
-        AsyncCryptTestDev* testDev = &private_key->asyncDev.dev;
-        if (testDev->type == ASYNC_TEST_NONE) {
-            testDev->type = ASYNC_TEST_ECC_SHARED_SEC;
-            testDev->eccSharedSec.private_key = private_key;
-            testDev->eccSharedSec.public_key = public_key;
-            testDev->eccSharedSec.out = out;
-            testDev->eccSharedSec.outLen = outlen;
-            return WC_PENDING_E;
-        }
-    #endif
-    }
-#endif
-
 #ifdef WOLFSSL_ATECC508A
    err = atcatls_ecdh(private_key->slot, public_key->pubkey, out);
    if (err != ATCA_SUCCESS) {
       err = BAD_COND_E;
    }
    *outlen = private_key->dp->size;
-
 #else
    err = wc_ecc_shared_secret_ex(private_key, &public_key->pubkey, out, outlen);
 #endif /* WOLFSSL_ATECC508A */
@@ -2787,6 +2785,48 @@ static int wc_ecc_shared_secret_gen_sync(ecc_key* private_key, ecc_point* point,
     return err;
 }
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+static int wc_ecc_shared_secret_gen_async(ecc_key* private_key,
+            ecc_point* point, byte* out, word32 *outlen,
+            ecc_curve_spec* curve)
+{
+    int err;
+
+#ifdef HAVE_CAVIUM
+    /* TODO: Not implemented - use software for now */
+    err = wc_ecc_shared_secret_gen_sync(private_key, point, out, outlen, curve);
+
+#elif defined(HAVE_INTEL_QA)
+    /* sync public key x/y */
+    err = wc_ecc_curve_load(private_key->dp, &curve, ECC_CURVE_FIELD_BF);
+    if (err == MP_OKAY)
+        err = wc_mp_to_bigint(&private_key->k, &private_key->k.raw);
+    if (err == MP_OKAY)
+        err = wc_mp_to_bigint(point->x, &point->x->raw);
+    if (err == MP_OKAY)
+        err = wc_mp_to_bigint(point->y, &point->y->raw);
+    if (err == MP_OKAY)
+        err = IntelQaEcdh(&private_key->asyncDev,
+            &private_key->k.raw, &point->x->raw, &point->y->raw,
+            out, outlen,
+            &curve->Af->raw, &curve->Bf->raw, &curve->prime->raw,
+            private_key->dp->cofactor);
+#else /* WOLFSSL_ASYNC_CRYPT_TEST */
+    WC_ASYNC_TEST* testDev = &private_key->asyncDev.test;
+    if (testDev->type == ASYNC_TEST_NONE) {
+        testDev->type = ASYNC_TEST_ECC_SHARED_SEC;
+        testDev->eccSharedSec.private_key = private_key;
+        testDev->eccSharedSec.public_point = point;
+        testDev->eccSharedSec.out = out;
+        testDev->eccSharedSec.outLen = outlen;
+        return WC_PENDING_E;
+    }
+    err = wc_ecc_shared_secret_gen_sync(private_key, point, out, outlen, curve);
+#endif
+
+    return err;
+}
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
 int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point,
                                                     byte* out, word32 *outlen)
@@ -2805,8 +2845,17 @@ int wc_ecc_shared_secret_gen(ecc_key* private_key, ecc_point* point,
     if (err != MP_OKAY)
         return err;
 
-    err = wc_ecc_shared_secret_gen_sync(private_key, point,
-        out, outlen, curve);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        err = wc_ecc_shared_secret_gen_async(private_key, point,
+            out, outlen, curve);
+    }
+    else
+#endif
+    {
+        err = wc_ecc_shared_secret_gen_sync(private_key, point,
+            out, outlen, curve);
+    }
 
     wc_ecc_curve_free(curve);
 
@@ -2855,6 +2904,13 @@ int wc_ecc_shared_secret_ex(ecc_key* private_key, ecc_point* point,
         case ECC_STATE_SHARED_SEC_RES:
             private_key->state = ECC_STATE_SHARED_SEC_RES;
             err = 0;
+        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+            if (private_key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+            #if defined(HAVE_CAVIUM) || defined(HAVE_INTEL_QA)
+                err = private_key->asyncDev.event.ret;
+            #endif
+            }
+        #endif
             break;
 
         default:
@@ -2928,11 +2984,6 @@ static int wc_ecc_gen_k(WC_RNG* rng, int size, mp_int* k, mp_int* order)
         }
     }
 
-#ifdef HAVE_WOLF_BIGINT
-    if (err == MP_OKAY)
-        err = wc_mp_to_bigint(k, &k->raw);
-#endif /* HAVE_WOLF_BIGINT */
-
     ForceZero(buf, ECC_MAXSIZE);
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(buf, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -2959,12 +3010,14 @@ int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id)
         return err;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
     if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
     #ifdef HAVE_CAVIUM
         /* TODO: Not implemented */
+    #elif defined(HAVE_INTEL_QA)
+        /* TODO: Not implemented */
     #else
-        AsyncCryptTestDev* testDev = &key->asyncDev.dev;
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
         if (testDev->type == ASYNC_TEST_NONE) {
             testDev->type = ASYNC_TEST_ECC_MAKE;
             testDev->eccMake.rng = rng;
@@ -3041,7 +3094,7 @@ int wc_ecc_make_key_ex(WC_RNG* rng, int keysize, ecc_key* key, int curve_id)
     /* cleanup these on failure case only */
     if (err != MP_OKAY) {
         /* clean up */
-    #if !defined(USE_FAST_MATH) && !defined(ALT_ECC_SIZE)
+    #ifndef ALT_ECC_SIZE
         mp_clear(key->pubkey.x);
         mp_clear(key->pubkey.y);
         mp_clear(key->pubkey.z);
@@ -3128,12 +3181,43 @@ int wc_ecc_make_key(WC_RNG* rng, int keysize, ecc_key* key)
     return wc_ecc_make_key_ex(rng, keysize, key, ECC_CURVE_DEF);
 }
 
+static INLINE int wc_ecc_alloc_rs(ecc_key* key, mp_int** r, mp_int** s)
+{
+    int err = 0;
+
+    if (*r == NULL) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        *r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_BIGINT);
+        if (*r == NULL) {
+            return MEMORY_E;
+        }
+        key->r = *r;
+    #endif
+
+        XMEMSET(*r, 0, sizeof(mp_int));
+    }
+    if (*s == NULL) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        *s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap, DYNAMIC_TYPE_BIGINT);
+        if (*s == NULL) {
+            XFREE(*r, key->heap, DYNAMIC_TYPE_BIGINT);
+            return MEMORY_E;
+        }
+        key->s = *s;
+    #endif
+
+        XMEMSET(*s, 0, sizeof(mp_int));
+    }
+    (void)key;
+
+    return err;
+}
+
 static INLINE void wc_ecc_free_rs(ecc_key* key, mp_int** r, mp_int** s)
 {
     if (*r) {
-    #ifndef USE_FAST_MATH
         mp_clear(*r);
-    #endif
+
     #ifdef WOLFSSL_ASYNC_CRYPT
         XFREE(*r, key->heap, DYNAMIC_TYPE_BIGINT);
         key->r = NULL;
@@ -3141,9 +3225,8 @@ static INLINE void wc_ecc_free_rs(ecc_key* key, mp_int** r, mp_int** s)
         *r = NULL;
     }
     if (*s) {
-    #ifndef USE_FAST_MATH
         mp_clear(*s);
-    #endif
+
     #ifdef WOLFSSL_ASYNC_CRYPT
         XFREE(*s, key->heap, DYNAMIC_TYPE_BIGINT);
         key->s = NULL;
@@ -3197,12 +3280,10 @@ int wc_ecc_init_ex(ecc_key* key, void* heap, int devId)
     key->heap = heap;
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (devId != INVALID_DEVID) {
-        /* handle as async */
-        ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC,
-                                                                        devId);
-    }
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    /* handle as async */
+    ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC,
+                                                            key->heap, devId);
 #else
     (void)devId;
 #endif
@@ -3252,49 +3333,16 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
         return ECC_BAD_ARG_E;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
-    #ifdef HAVE_CAVIUM
-        /* TODO: Not implemented */
-    #else
-        AsyncCryptTestDev* testDev = &key->asyncDev.dev;
-        if (testDev->type == ASYNC_TEST_NONE) {
-            testDev->type = ASYNC_TEST_ECC_SIGN;
-            testDev->eccSign.in = in;
-            testDev->eccSign.inSz = inlen;
-            testDev->eccSign.out = out;
-            testDev->eccSign.outSz = outlen;
-            testDev->eccSign.rng = rng;
-            testDev->eccSign.key = key;
-            return WC_PENDING_E;
-        }
-    #endif
-    }
-#endif
-
     switch(key->state) {
         case ECC_STATE_NONE:
         case ECC_STATE_SIGN_DO:
             key->state = ECC_STATE_SIGN_DO;
 
-        #ifdef WOLFSSL_ASYNC_CRYPT
-            if (r == NULL)
-                r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (s == NULL)
-                s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (r == NULL || s == NULL) {
-                err = MEMORY_E; break;
-            }
-            key->r = r;
-            key->s = s;
-        #endif
-            XMEMSET(r, 0, sizeof(mp_int));
-            XMEMSET(s, 0, sizeof(mp_int));
+            err = wc_ecc_alloc_rs(key, &r, &s);
+            if (err != 0)
+                break;
 
-            if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL))
-                                                                   != MP_OKAY) {
+            if ((err = mp_init_multi(r, s, NULL, NULL, NULL, NULL)) != MP_OKAY){
                 break;
             }
 
@@ -3338,13 +3386,26 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
         case ECC_STATE_SIGN_ENCODE:
             key->state = ECC_STATE_SIGN_ENCODE;
 
-        #ifdef WOLFSSL_ASYNC_CRYPT
+        #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+            /* restore r/s */
             r = key->r;
             s = key->s;
-        #endif
+
+            if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+                /* only do this if not simulator, since it overwrites result */
+                #ifndef WOLFSSL_ASYNC_CRYPT_TEST
+                    wc_bigint_to_mp(&r->raw, r);
+                    wc_bigint_to_mp(&s->raw, s);
+                #endif
+            }
+        #endif /* WOLFSSL_ASYNC_CRYPT */
 
             /* encoded with DSA header */
             err = StoreECC_DSA_Sig(out, outlen, r, s);
+
+            /* always free r/s */
+            mp_clear(r);
+            mp_clear(s);
             break;
 
         default:
@@ -3357,8 +3418,8 @@ int wc_ecc_sign_hash(const byte* in, word32 inlen, byte* out, word32 *outlen,
         return err;
     }
 
+    /* cleanup */
     wc_ecc_free_rs(key, &r, &s);
-
     key->state = ECC_STATE_NONE;
 
     return err;
@@ -3395,6 +3456,23 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
       return ECC_BAD_ARG_E;
    }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+       defined(WOLFSSL_ASYNC_CRYPT_TEST)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_ECC_SIGN;
+            testDev->eccSign.in = in;
+            testDev->eccSign.inSz = inlen;
+            testDev->eccSign.rng = rng;
+            testDev->eccSign.key = key;
+            testDev->eccSign.r = r;
+            testDev->eccSign.s = s;
+            return WC_PENDING_E;
+        }
+    }
+#endif
+
    /* get the hash and load it as a bignum into 'e' */
    /* init the bignums */
    if ((err = mp_init(&e)) != MP_OKAY) {
@@ -3423,6 +3501,47 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
    if (err == MP_OKAY) {
        int loop_check = 0;
        ecc_key pubkey;
+
+   #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+        if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        #ifdef HAVE_CAVIUM
+            /* TODO: Not implemented */
+        #elif defined(HAVE_INTEL_QA)
+           mp_int k;
+
+           err = mp_init(&k);
+           /* make sure r and s are allocated */
+           if (err == MP_OKAY)
+               err = wc_bigint_alloc(&key->r->raw, key->dp->size);
+           if (err == MP_OKAY)
+               err = wc_bigint_alloc(&key->s->raw, key->dp->size);
+           /* load e and k */
+           if (err == MP_OKAY)
+               err = wc_mp_to_bigint(&e, &e.raw);
+           if (err == MP_OKAY)
+               err = wc_mp_to_bigint(&key->k, &key->k.raw);
+           if (err == MP_OKAY)
+               err = wc_ecc_curve_load(key->dp, &curve, ECC_CURVE_FIELD_ALL);
+           if (err == MP_OKAY)
+               err = wc_ecc_gen_k(rng, key->dp->size, &k, curve->order);
+           if (err == MP_OKAY)
+               err = wc_mp_to_bigint(&k, &k.raw);
+           if (err == MP_OKAY)
+               err = IntelQaEcdsaSign(&key->asyncDev, &e.raw, &key->k.raw,
+                  &k.raw, &r->raw, &s->raw, &curve->Af->raw, &curve->Bf->raw,
+                  &curve->prime->raw, &curve->order->raw, &curve->Gx->raw,
+                  &curve->Gy->raw);
+
+           mp_clear(&e);
+           mp_clear(&k);
+           wc_ecc_curve_free(curve);
+
+           return err;
+       #endif
+       }
+   #endif /* WOLFSSL_ASYNC_CRYPT */
+
+       /* don't use async for key, since we don't support async return here */
        if (wc_ecc_init_ex(&pubkey, key->heap, INVALID_DEVID) == MP_OKAY) {
            for (;;) {
                if (++loop_check > 64) {
@@ -3438,12 +3557,12 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
                if (err != MP_OKAY) break;
 
                if (mp_iszero(r) == MP_YES) {
-               #ifndef USE_FAST_MATH
+                #ifndef ALT_ECC_SIZE
                    mp_clear(pubkey.pubkey.x);
                    mp_clear(pubkey.pubkey.y);
                    mp_clear(pubkey.pubkey.z);
-                   mp_clear(&pubkey.k);
-               #endif
+                #endif
+                   mp_forcezero(&pubkey.k);
                }
                else {
                    /* find s = (e + xr)/k */
@@ -3473,9 +3592,7 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
        }
    }
 
-#ifndef USE_FAST_MATH
    mp_clear(&e);
-#endif
    wc_ecc_curve_free(curve);
 
    return err;
@@ -3493,10 +3610,8 @@ void wc_ecc_free(ecc_key* key)
         return;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        wolfAsync_DevCtxFree(&key->asyncDev);
-    }
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+    wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_ECC);
     wc_ecc_free_rs(key, &key->r, &key->s);
 #endif
 
@@ -3505,11 +3620,10 @@ void wc_ecc_free(ecc_key* key)
    key->slot = -1;
 #else
 
-#ifndef USE_FAST_MATH
     mp_clear(key->pubkey.x);
     mp_clear(key->pubkey.y);
     mp_clear(key->pubkey.z);
-#endif
+
     mp_forcezero(&key->k);
 #endif /* WOLFSSL_ATECC508A */
 }
@@ -3623,10 +3737,8 @@ static int ecc_mul2add(ecc_point* A, mp_int* kA,
       if (err == MP_OKAY)
         err = mp_mulmod(B->z, &mu, modulus, precomp[1<<2]->z);
 
-    #ifndef USE_FAST_MATH
       /* done with mu */
       mp_clear(&mu);
-    #endif
     }
   }
 
@@ -3779,26 +3891,6 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         return ECC_BAD_ARG_E;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
-    #ifdef HAVE_CAVIUM
-        /* TODO: Not implemented */
-    #else
-        AsyncCryptTestDev* testDev = &key->asyncDev.dev;
-        if (testDev->type == ASYNC_TEST_NONE) {
-            testDev->type = ASYNC_TEST_ECC_VERIFY;
-            testDev->eccVerify.in = sig;
-            testDev->eccVerify.inSz = siglen;
-            testDev->eccVerify.out = hash;
-            testDev->eccVerify.outSz = hashlen;
-            testDev->eccVerify.stat = stat;
-            testDev->eccVerify.key = key;
-            return WC_PENDING_E;
-        }
-    #endif
-    }
-#endif
-
     switch(key->state) {
         case ECC_STATE_NONE:
         case ECC_STATE_VERIFY_DECODE:
@@ -3811,21 +3903,9 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
              * If either of those don't allocate correctly, none of
              * the rest of this function will execute, and everything
              * gets cleaned up at the end. */
-        #ifdef WOLFSSL_ASYNC_CRYPT
-            if (r == NULL)
-                r = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (s == NULL)
-                s = (mp_int*)XMALLOC(sizeof(mp_int), key->heap,
-                                                           DYNAMIC_TYPE_BIGINT);
-            if (r == NULL || s == NULL) {
-                err = MEMORY_E; break;
-            }
-            key->r = r;
-            key->s = s;
-        #endif
-            XMEMSET(r, 0, sizeof(mp_int));
-            XMEMSET(s, 0, sizeof(mp_int));
+            err = wc_ecc_alloc_rs(key, &r, &s);
+            if (err != 0)
+                break;
 
             /* decode DSA header */
             err = DecodeECC_DSA_Sig(sig, siglen, r, s);
@@ -3837,13 +3917,7 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         case ECC_STATE_VERIFY_DO:
             key->state = ECC_STATE_VERIFY_DO;
 
-        #ifdef WOLFSSL_ASYNC_CRYPT
-            r = key->r;
-            s = key->s;
-        #endif
-
-            err = wc_ecc_verify_hash_ex(r, s, hash, hashlen, stat,
-                                                                           key);
+            err = wc_ecc_verify_hash_ex(r, s, hash, hashlen, stat, key);
             if (err < 0) {
                 break;
             }
@@ -3852,6 +3926,16 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         case ECC_STATE_VERIFY_RES:
             key->state = ECC_STATE_VERIFY_RES;
             err = 0;
+
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            /* restore r/s */
+            r = key->r;
+            s = key->s;
+        #endif
+
+            /* done with R/S */
+            mp_clear(r);
+            mp_clear(s);
             break;
 
         default:
@@ -3864,8 +3948,8 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash,
         return err;
     }
 
+    /* cleanup */
     wc_ecc_free_rs(key, &r, &s);
-
     key->state = ECC_STATE_NONE;
 
     return err;
@@ -3888,6 +3972,7 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
 {
    int           err;
 #ifndef WOLFSSL_ATECC508A
+   int          did_init = 0;
    ecc_point    *mG = NULL, *mQ = NULL;
    mp_int        v;
    mp_int        w;
@@ -3910,6 +3995,23 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
       return ECC_BAD_ARG_E;
    }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC) && \
+       defined(WOLFSSL_ASYNC_CRYPT_TEST)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_ECC_VERIFY;
+            testDev->eccVerify.r = r;
+            testDev->eccVerify.s = s;
+            testDev->eccVerify.hash = hash;
+            testDev->eccVerify.hashlen = hashlen;
+            testDev->eccVerify.stat = stat;
+            testDev->eccVerify.key = key;
+            return WC_PENDING_E;
+        }
+    }
+#endif
+
 #ifdef WOLFSSL_ATECC508A
     /* Extract R and S */
     err = mp_to_unsigned_bin(r, &sigRS[0]);
@@ -3959,9 +4061,38 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
            mp_rshb(&e, WOLFSSL_BIT_SIZE - (orderBits & 0x7));
    }
 
+   /* check for async hardware acceleration */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
+   if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_ECC) {
+   #ifdef HAVE_CAVIUM
+      /* TODO: Not implemented */
+   #elif defined(HAVE_INTEL_QA)
+      err = wc_mp_to_bigint(&e, &e.raw);
+      if (err == MP_OKAY)
+          err = wc_mp_to_bigint(key->pubkey.x, &key->pubkey.x->raw);
+      if (err == MP_OKAY)
+          err = wc_mp_to_bigint(key->pubkey.y, &key->pubkey.y->raw);
+      if (err == MP_OKAY)
+          err = IntelQaEcdsaVerify(&key->asyncDev, &e.raw, &key->pubkey.x->raw,
+                &key->pubkey.y->raw, &r->raw, &s->raw, &curve->Af->raw,
+                &curve->Bf->raw, &curve->prime->raw, &curve->order->raw,
+                &curve->Gx->raw, &curve->Gy->raw, stat);
+
+      mp_clear(&e);
+
+      wc_ecc_curve_free(curve);
+
+      return err;
+   #endif
+   }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
    /* allocate ints */
-   if ((err = mp_init_multi(&v, &w, &u1, &u2, NULL, NULL)) != MP_OKAY) {
-      err = MEMORY_E;
+   if (err == MP_OKAY) {
+       if ((err = mp_init_multi(&v, &w, &u1, &u2, NULL, NULL)) != MP_OKAY) {
+          err = MEMORY_E;
+       }
+       did_init = 1;
    }
 
    /* allocate points */
@@ -4052,13 +4183,13 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
    wc_ecc_del_point_h(mG, key->heap);
    wc_ecc_del_point_h(mQ, key->heap);
 
-#ifndef USE_FAST_MATH
    mp_clear(&e);
-   mp_clear(&v);
-   mp_clear(&w);
-   mp_clear(&u1);
-   mp_clear(&u2);
-#endif
+   if (did_init) {
+       mp_clear(&v);
+       mp_clear(&w);
+       mp_clear(&u1);
+       mp_clear(&u2);
+   }
 
    wc_ecc_curve_free(curve);
 
@@ -4119,9 +4250,8 @@ int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
 
 #ifdef HAVE_COMP_KEY
     if (err == MP_OKAY && compressed == 1) {   /* build y */
-        mp_int t1, t2;
         int did_init = 0;
-
+        mp_int t1, t2;
         DECLARE_CURVE_SPECS(3)
 
         if (mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL) != MP_OKAY)
@@ -4132,7 +4262,8 @@ int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
         /* load curve info */
         if (err == MP_OKAY)
             err = wc_ecc_curve_load(&ecc_sets[curve_idx], &curve,
-                (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF | ECC_CURVE_FIELD_BF));
+                (ECC_CURVE_FIELD_PRIME | ECC_CURVE_FIELD_AF |
+                    ECC_CURVE_FIELD_BF));
 
         /* compute x^3 */
         if (err == MP_OKAY)
@@ -4166,10 +4297,8 @@ int wc_ecc_import_point_der(byte* in, word32 inLen, const int curve_idx,
         }
 
         if (did_init) {
-        #ifndef USE_FAST_MATH
             mp_clear(&t2);
             mp_clear(&t1);
-        #endif
         }
 
         wc_ecc_curve_free(curve);
@@ -4449,10 +4578,8 @@ int wc_ecc_is_point(ecc_point* ecp, mp_int* a, mp_int* b, mp_int* prime)
        }
    }
 
-#ifndef USE_FAST_MATH
    mp_clear(&t1);
    mp_clear(&t2);
-#endif
 
    return err;
 }
@@ -4690,8 +4817,8 @@ int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key,
         alt_fp_init(key->pubkey.z);
         err = mp_init(&key->k);
     #else
-        err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z, &key->k,
-            NULL, NULL);
+        err = mp_init_multi(&key->k,
+                    key->pubkey.x, key->pubkey.y, key->pubkey.z, NULL, NULL);
     #endif
     if (err != MP_OKAY)
         return MEMORY_E;
@@ -4777,10 +4904,8 @@ int wc_ecc_import_x963_ex(const byte* in, word32 inLen, ecc_key* key,
         }
 
         if (did_init) {
-    #ifndef USE_FAST_MATH
             mp_clear(&t2);
             mp_clear(&t1);
-    #endif
         }
 
         wc_ecc_curve_free(curve);
@@ -5046,10 +5171,8 @@ int wc_ecc_rs_to_sig(const char* r, const char* s, byte* out, word32* outlen)
             err = MP_ZERO_E;
     }
 
-#ifndef USE_FAST_MATH
     mp_clear(&rtmp);
     mp_clear(&stmp);
-#endif
 
     return err;
 }
@@ -5102,10 +5225,8 @@ int wc_ecc_sig_to_rs(const byte* sig, word32 sigLen, byte* r, word32* rLen,
         }
     }
 
-#ifndef USE_FAST_MATH
     mp_clear(&rtmp);
     mp_clear(&stmp);
-#endif
 
     return err;
 }
@@ -5149,8 +5270,8 @@ static int wc_ecc_import_raw_private(ecc_key* key, const char* qx,
     alt_fp_init(key->pubkey.z);
     err = mp_init(&key->k);
 #else
-    err = mp_init_multi(key->pubkey.x, key->pubkey.y, key->pubkey.z, &key->k,
-                      NULL, NULL);
+    err = mp_init_multi(&key->k, key->pubkey.x, key->pubkey.y, key->pubkey.z,
+                                                                  NULL, NULL);
 #endif
     if (err != MP_OKAY)
         return MEMORY_E;
@@ -6056,9 +6177,7 @@ static int build_lut(int idx, mp_int* a, mp_int* modulus, mp_digit mp,
          mp_clear(fp_cache[idx].LUT[x]->z);
    }
 
-#ifndef USE_FAST_MATH
    mp_clear(&tmp);
-#endif
 
    if (err == MP_OKAY)
      return MP_OKAY;
@@ -6212,10 +6331,8 @@ static int accel_fp_mul(int idx, mp_int* k, ecc_point *R, mp_int* a,
 
 done:
    /* cleanup */
-#ifndef USE_FAST_MATH
    mp_clear(&order);
    mp_clear(&tk);
-#endif
 
 #ifdef WOLFSSL_SMALL_STACK
    XFREE(kb, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -6436,11 +6553,9 @@ static int accel_fp_mul2add(int idx1, int idx2,
 
 done:
    /* cleanup */
-#ifndef USE_FAST_MATH
    mp_clear(&tkb);
    mp_clear(&tka);
    mp_clear(&order);
-#endif
 
    if (kb[0])
       ForceZero(kb[0], KB_SIZE);
@@ -6579,9 +6694,7 @@ int ecc_mul2add(ecc_point* A, mp_int* kA,
 #ifndef HAVE_THREAD_LS
     wc_UnLockMutex(&ecc_fp_lock);
 #endif /* HAVE_THREAD_LS */
-#ifndef USE_FAST_MATH
     mp_clear(&mu);
-#endif
 
     return err;
 }
@@ -6669,9 +6782,7 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a,
 #ifndef HAVE_THREAD_LS
     wc_UnLockMutex(&ecc_fp_lock);
 #endif /* HAVE_THREAD_LS */
-#ifndef USE_FAST_MATH
     mp_clear(&mu);
-#endif
 
     return err;
 }
@@ -7392,10 +7503,8 @@ int mp_jacobi(mp_int* a, mp_int* n, int* c)
 
 done:
   /* cleanup */
-#ifndef USE_FAST_MATH
   mp_clear(&n1);
   mp_clear(&a1);
-#endif
 
   return res;
 }
@@ -7574,7 +7683,6 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
     }
   }
 
-#ifndef USE_FAST_MATH
   /* done */
   mp_clear(&t1);
   mp_clear(&C);
@@ -7585,7 +7693,6 @@ int mp_sqrtmod_prime(mp_int* n, mp_int* prime, mp_int* ret)
   mp_clear(&T);
   mp_clear(&R);
   mp_clear(&two);
-#endif
 
   return res;
 }
@@ -7807,51 +7914,4 @@ int wc_X963_KDF(enum wc_HashType type, const byte* secret, word32 secretSz,
 }
 #endif /* HAVE_X963_KDF */
 
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-int wc_ecc_async_handle(ecc_key* key, WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
-{
-    int ret;
-
-    if (key == NULL || queue == NULL || event == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* make sure this ECC context had "wc_EccAsyncInit" called on it */
-    if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC) {
-        return ASYNC_INIT_E;
-    }
-
-    /* setup the event and push to queue */
-    ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-    if (ret == 0) {
-        ret = wolfEventQueue_Push(queue, event);
-    }
-
-    /* check for error (helps with debugging) */
-    if (ret != 0) {
-        WOLFSSL_MSG("wc_EccAsyncHandle failed");
-    }
-    return ret;
-}
-
-int wc_ecc_async_wait(int ret, ecc_key* key)
-{
-    if (ret == WC_PENDING_E) {
-        WOLF_EVENT event;
-        XMEMSET(&event, 0, sizeof(event));
-        ret = wolfAsync_EventInit(&event, WOLF_EVENT_TYPE_ASYNC_WOLFSSL, &key->asyncDev);
-        if (ret == 0) {
-            ret = wolfAsync_EventWait(&event);
-            if (ret == 0 && event.ret >= 0) {
-                ret = event.ret;
-            }
-        }
-    }
-    return ret;
-}
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 #endif /* HAVE_ECC */
diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c
index 796d9e553..b5b578d5a 100644
--- a/wolfcrypt/src/error.c
+++ b/wolfcrypt/src/error.c
@@ -422,6 +422,9 @@ const char* wc_GetErrorString(int error)
     case BAD_PATH_ERROR:
         return "Bad path for opendir error";
 
+    case ASYNC_OP_E:
+        return "Async operation error";
+
     default:
         return "unknown error number";
 
diff --git a/wolfcrypt/src/hash.c b/wolfcrypt/src/hash.c
index bb03cde14..58a9c2251 100644
--- a/wolfcrypt/src/hash.c
+++ b/wolfcrypt/src/hash.c
@@ -388,82 +388,70 @@ int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type, byte* out)
 #if !defined(WOLFSSL_TI_HASH)
 
 #if !defined(NO_MD5)
-void wc_Md5GetHash(Md5* md5, byte* hash)
-{
-    Md5 save = *md5 ;
-    wc_Md5Final(md5, hash) ;
-    *md5 = save ;
-}
+    int wc_Md5Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret;
+    #ifdef WOLFSSL_SMALL_STACK
+        Md5* md5;
+    #else
+        Md5  md5[1];
+    #endif
 
-WOLFSSL_API void wc_Md5RestorePos(Md5* m1, Md5* m2) {
-    *m1 = *m2 ;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        md5 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (md5 == NULL)
+            return MEMORY_E;
+    #endif
 
-#endif
+        ret = wc_InitMd5(md5);
+        if (ret == 0) {
+            ret = wc_Md5Update(md5, data, len);
+            if (ret == 0) {
+                ret = wc_Md5Final(md5, hash);
+            }
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
+    }
+#endif /* !NO_MD5 */
 
 #if !defined(NO_SHA)
-int wc_ShaGetHash(Sha* sha, byte* hash)
-{
-    int ret ;
-    Sha save = *sha ;
-    ret = wc_ShaFinal(sha, hash) ;
-    *sha = save ;
-    return ret ;
-}
+    int wc_ShaHash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        Sha* sha;
+    #else
+        Sha sha[1];
+    #endif
 
-void wc_ShaRestorePos(Sha* s1, Sha* s2) {
-    *s1 = *s2 ;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha == NULL)
+            return MEMORY_E;
+    #endif
 
-int wc_ShaHash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha* sha;
-#else
-    Sha sha[1];
-#endif
+        if ((ret = wc_InitSha(sha)) != 0) {
+            WOLFSSL_MSG("wc_InitSha failed");
+        }
+        else {
+            wc_ShaUpdate(sha, data, len);
+            wc_ShaFinal(sha, hash);
+        }
 
-#ifdef WOLFSSL_SMALL_STACK
-    sha = (Sha*)XMALLOC(sizeof(Sha), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha == NULL)
-        return MEMORY_E;
-#endif
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
 
-    if ((ret = wc_InitSha(sha)) != 0) {
-        WOLFSSL_MSG("wc_InitSha failed");
+        return ret;
     }
-    else {
-        wc_ShaUpdate(sha, data, len);
-        wc_ShaFinal(sha, hash);
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-
-}
-
-#endif /* !defined(NO_SHA) */
+#endif /* !NO_SHA */
 
 #if defined(WOLFSSL_SHA224)
-int wc_Sha224GetHash(Sha224* sha224, byte* hash)
-{
-    int ret;
-    Sha224 save;
-
-    if (sha224 == NULL || hash == NULL)
-        return BAD_FUNC_ARG;
-
-    save= *sha224;
-    ret = wc_Sha224Final(sha224, hash);
-    *sha224 = save;
-
-    return ret;
-}
-
 int wc_Sha224Hash(const byte* data, word32 len, byte* hash)
 {
     int ret = 0;
@@ -495,154 +483,109 @@ int wc_Sha224Hash(const byte* data, word32 len, byte* hash)
 
     return ret;
 }
-
-#endif /* defined(WOLFSSL_SHA224) */
+#endif /* WOLFSSL_SHA224 */
 
 #if !defined(NO_SHA256)
-int wc_Sha256GetHash(Sha256* sha256, byte* hash)
-{
-    int ret ;
-    Sha256 save = *sha256 ;
-    ret = wc_Sha256Final(sha256, hash) ;
-    *sha256 = save ;
-    return ret ;
-}
+    int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        Sha256* sha256;
+    #else
+        Sha256 sha256[1];
+    #endif
 
-void wc_Sha256RestorePos(Sha256* s1, Sha256* s2) {
-    *s1 = *s2 ;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        sha256 = (Sha256*)XMALLOC(sizeof(Sha256), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha256 == NULL)
+            return MEMORY_E;
+    #endif
 
-int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha256* sha256;
-#else
-    Sha256 sha256[1];
-#endif
+        if ((ret = wc_InitSha256(sha256)) != 0) {
+            WOLFSSL_MSG("InitSha256 failed");
+        }
+        else if ((ret = wc_Sha256Update(sha256, data, len)) != 0) {
+            WOLFSSL_MSG("Sha256Update failed");
+        }
+        else if ((ret = wc_Sha256Final(sha256, hash)) != 0) {
+            WOLFSSL_MSG("Sha256Final failed");
+        }
 
-#ifdef WOLFSSL_SMALL_STACK
-    sha256 = (Sha256*)XMALLOC(sizeof(Sha256), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha256 == NULL)
-        return MEMORY_E;
-#endif
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
 
-    if ((ret = wc_InitSha256(sha256)) != 0) {
-        WOLFSSL_MSG("InitSha256 failed");
+        return ret;
     }
-    else if ((ret = wc_Sha256Update(sha256, data, len)) != 0) {
-        WOLFSSL_MSG("Sha256Update failed");
-    }
-    else if ((ret = wc_Sha256Final(sha256, hash)) != 0) {
-        WOLFSSL_MSG("Sha256Final failed");
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha256, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
-#endif /* !defined(NO_SHA256) */
+#endif /* !NO_SHA256 */
 
 #endif /* !defined(WOLFSSL_TI_HASH) */
 
+
 #if defined(WOLFSSL_SHA512)
-int wc_Sha512GetHash(Sha512* sha512, byte* hash)
-{
-    int ret;
-    Sha512 save;
+    int wc_Sha512Hash(const byte* data, word32 len, byte* hash)
+    {
+        int ret = 0;
+    #ifdef WOLFSSL_SMALL_STACK
+        Sha512* sha512;
+    #else
+        Sha512 sha512[1];
+    #endif
 
-    if (sha512 == NULL || hash == NULL)
-        return BAD_FUNC_ARG;
+    #ifdef WOLFSSL_SMALL_STACK
+        sha512 = (Sha512*)XMALLOC(sizeof(Sha512), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (sha512 == NULL)
+            return MEMORY_E;
+    #endif
 
-    save= *sha512;
-    ret = wc_Sha512Final(sha512, hash);
-    *sha512 = save;
+        if ((ret = wc_InitSha512(sha512)) != 0) {
+            WOLFSSL_MSG("InitSha512 failed");
+        }
+        else if ((ret = wc_Sha512Update(sha512, data, len)) != 0) {
+            WOLFSSL_MSG("Sha512Update failed");
+        }
+        else if ((ret = wc_Sha512Final(sha512, hash)) != 0) {
+            WOLFSSL_MSG("Sha512Final failed");
+        }
 
-    return ret;
-}
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
 
-int wc_Sha512Hash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha512* sha512;
-#else
-    Sha512 sha512[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    sha512 = (Sha512*)XMALLOC(sizeof(Sha512), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha512 == NULL)
-        return MEMORY_E;
-#endif
-
-    if ((ret = wc_InitSha512(sha512)) != 0) {
-        WOLFSSL_MSG("InitSha512 failed");
-    }
-    else if ((ret = wc_Sha512Update(sha512, data, len)) != 0) {
-        WOLFSSL_MSG("Sha512Update failed");
-    }
-    else if ((ret = wc_Sha512Final(sha512, hash)) != 0) {
-        WOLFSSL_MSG("Sha512Final failed");
+        return ret;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha512, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
+    #if defined(WOLFSSL_SHA384)
+        int wc_Sha384Hash(const byte* data, word32 len, byte* hash)
+        {
+            int ret = 0;
+        #ifdef WOLFSSL_SMALL_STACK
+            Sha384* sha384;
+        #else
+            Sha384 sha384[1];
+        #endif
 
-    return ret;
-}
+        #ifdef WOLFSSL_SMALL_STACK
+            sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            if (sha384 == NULL)
+                return MEMORY_E;
+        #endif
 
-#if defined(WOLFSSL_SHA384)
-int wc_Sha384GetHash(Sha384* sha384, byte* hash)
-{
-    int ret;
-    Sha384 save;
+            if ((ret = wc_InitSha384(sha384)) != 0) {
+                WOLFSSL_MSG("InitSha384 failed");
+            }
+            else if ((ret = wc_Sha384Update(sha384, data, len)) != 0) {
+                WOLFSSL_MSG("Sha384Update failed");
+            }
+            else if ((ret = wc_Sha384Final(sha384, hash)) != 0) {
+                WOLFSSL_MSG("Sha384Final failed");
+            }
 
-    if (sha384 == NULL || hash == NULL)
-        return BAD_FUNC_ARG;
+        #ifdef WOLFSSL_SMALL_STACK
+            XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        #endif
 
-    save= *sha384;
-    ret = wc_Sha384Final(sha384, hash);
-    *sha384 = save;
-
-    return ret;
-}
-
-int wc_Sha384Hash(const byte* data, word32 len, byte* hash)
-{
-    int ret = 0;
-#ifdef WOLFSSL_SMALL_STACK
-    Sha384* sha384;
-#else
-    Sha384 sha384[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    sha384 = (Sha384*)XMALLOC(sizeof(Sha384), NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (sha384 == NULL)
-        return MEMORY_E;
-#endif
-
-    if ((ret = wc_InitSha384(sha384)) != 0) {
-        WOLFSSL_MSG("InitSha384 failed");
-    }
-    else if ((ret = wc_Sha384Update(sha384, data, len)) != 0) {
-        WOLFSSL_MSG("Sha384Update failed");
-    }
-    else if ((ret = wc_Sha384Final(sha384, hash)) != 0) {
-        WOLFSSL_MSG("Sha384Final failed");
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(sha384, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
-#endif /* defined(WOLFSSL_SHA384) */
-#endif /* defined(WOLFSSL_SHA512) */
+            return ret;
+        }
+    #endif /* WOLFSSL_SHA384 */
+#endif /* WOLFSSL_SHA512 */
diff --git a/wolfcrypt/src/hmac.c b/wolfcrypt/src/hmac.c
old mode 100644
new mode 100755
index 271ccd43b..90ad53965
--- a/wolfcrypt/src/hmac.c
+++ b/wolfcrypt/src/hmac.c
@@ -38,83 +38,79 @@
 #endif
 
 
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
-/* does init */
-int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz)
-{
-    return HmacSetKey_fips(hmac, type, key, keySz);
-}
-
-
-int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz)
-{
-    return HmacUpdate_fips(hmac, in, sz);
-}
-
-
-int wc_HmacFinal(Hmac* hmac, byte* out)
-{
-    return HmacFinal_fips(hmac, out);
-}
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    int  wc_HmacAsyncInit(Hmac* hmac, int i)
+    /* does init */
+    int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 keySz)
     {
-        return HmacAsyncInit(hmac, i);
+        return HmacSetKey_fips(hmac, type, key, keySz);
+    }
+    int wc_HmacUpdate(Hmac* hmac, const byte* in, word32 sz)
+    {
+        return HmacUpdate_fips(hmac, in, sz);
+    }
+    int wc_HmacFinal(Hmac* hmac, byte* out)
+    {
+        return HmacFinal_fips(hmac, out);
+    }
+    int wolfSSL_GetHmacMaxSize(void)
+    {
+        return CyaSSL_GetHmacMaxSize();
     }
 
-
-    void wc_HmacAsyncFree(Hmac* hmac)
+    int wc_HmacInit(Hmac* hmac, void* heap, int devId)
     {
-        HmacAsyncFree(hmac);
+        (void)hmac;
+        (void)heap;
+        (void)devId;
+        /* FIPS doesn't support:
+            return HmacInit(hmac, heap, devId); */
+        return 0;
+    }
+    void wc_HmacFree(Hmac* hmac)
+    {
+        (void)hmac;
+        /* FIPS doesn't support:
+            HmacFree(hmac); */
     }
-#endif
 
-int wolfSSL_GetHmacMaxSize(void)
-{
-    return CyaSSL_GetHmacMaxSize();
-}
-
-#ifdef HAVE_HKDF
-
-int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+    #ifdef HAVE_HKDF
+        int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
                     const byte* salt, word32 saltSz,
                     const byte* info, word32 infoSz,
                     byte* out, word32 outSz)
-{
-    return HKDF(type, inKey, inKeySz, salt, saltSz, info, infoSz, out, outSz);
-}
+        {
+            return HKDF(type, inKey, inKeySz, salt, saltSz,
+                info, infoSz, out, outSz);
+        }
+    #endif /* HAVE_HKDF */
 
-
-#endif /* HAVE_HKDF */
 #else /* else build without fips */
-#ifdef WOLFSSL_PIC32MZ_HASH
 
-#define wc_InitMd5   wc_InitMd5_sw
-#define wc_Md5Update wc_Md5Update_sw
-#define wc_Md5Final  wc_Md5Final_sw
-
-#define wc_InitSha   wc_InitSha_sw
-#define wc_ShaUpdate wc_ShaUpdate_sw
-#define wc_ShaFinal  wc_ShaFinal_sw
-
-#define wc_InitSha256   wc_InitSha256_sw
-#define wc_Sha256Update wc_Sha256Update_sw
-#define wc_Sha256Final  wc_Sha256Final_sw
-
-#endif
-
-#ifdef HAVE_FIPS
-    /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
-    #define FIPS_NO_WRAPPERS
-#endif
 
 #include 
 
 
+#ifdef WOLFSSL_PIC32MZ_HASH
+    #define wc_InitMd5   wc_InitMd5_sw
+    #define wc_Md5Update wc_Md5Update_sw
+    #define wc_Md5Final  wc_Md5Final_sw
+
+    #define wc_InitSha   wc_InitSha_sw
+    #define wc_ShaUpdate wc_ShaUpdate_sw
+    #define wc_ShaFinal  wc_ShaFinal_sw
+
+    #define wc_InitSha256   wc_InitSha256_sw
+    #define wc_Sha256Update wc_Sha256Update_sw
+    #define wc_Sha256Final  wc_Sha256Final_sw
+#endif /* WOLFSSL_PIC32MZ_HASH */
+
+
+
 int wc_HmacSizeByType(int type)
 {
+    int ret;
+
     if (!(type == MD5 || type == SHA    || type == SHA256 || type == SHA384
                       || type == SHA512 || type == BLAKE2B_ID
                       || type == SHA224)) {
@@ -124,121 +120,149 @@ int wc_HmacSizeByType(int type)
     switch (type) {
     #ifndef NO_MD5
         case MD5:
-            return MD5_DIGEST_SIZE;
-    #endif
+            ret = MD5_DIGEST_SIZE;
+            break;
+    #endif /* !NO_MD5 */
 
     #ifndef NO_SHA
         case SHA:
-            return SHA_DIGEST_SIZE;
-    #endif
+            ret = SHA_DIGEST_SIZE;
+            break;
+    #endif /* !NO_SHA */
 
     #ifdef WOLFSSL_SHA224
         case SHA224:
-            return SHA224_DIGEST_SIZE;
-    #endif
+            ret = SHA224_DIGEST_SIZE;
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
     #ifndef NO_SHA256
         case SHA256:
-            return SHA256_DIGEST_SIZE;
-    #endif
-
-    #ifdef WOLFSSL_SHA384
-        case SHA384:
-            return SHA384_DIGEST_SIZE;
-    #endif
+            ret = SHA256_DIGEST_SIZE;
+            break;
+    #endif /* !NO_SHA256 */
 
     #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
+        case SHA384:
+            ret = SHA384_DIGEST_SIZE;
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-            return SHA512_DIGEST_SIZE;
-    #endif
+            ret = SHA512_DIGEST_SIZE;
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
     #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-            return BLAKE2B_OUTBYTES;
-    #endif
+            ret = BLAKE2B_OUTBYTES;
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-            return BAD_FUNC_ARG;
+            ret = BAD_FUNC_ARG;
+            break;
     }
+
+    return ret;
 }
 
-static int InitHmac(Hmac* hmac, int type)
+static int _InitHmac(Hmac* hmac, int type, void* heap)
 {
     int ret = 0;
 
-    hmac->innerHashKeyed = 0;
-    hmac->macType = (byte)type;
-
-    if (!(type == MD5 || type == SHA    || type == SHA256 || type == SHA384
-                      || type == SHA512 || type == BLAKE2B_ID
-                      || type == SHA224))
-        return BAD_FUNC_ARG;
-
     switch (type) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-            wc_InitMd5(&hmac->hash.md5);
-        break;
-        #endif
+            ret = wc_InitMd5(&hmac->hash.md5);
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
             ret = wc_InitSha(&hmac->hash.sha);
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
             ret = wc_InitSha224(&hmac->hash.sha224);
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
             ret = wc_InitSha256(&hmac->hash.sha256);
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
             ret = wc_InitSha384(&hmac->hash.sha384);
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
             ret = wc_InitSha512(&hmac->hash.sha512);
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
             ret = wc_InitBlake2b(&hmac->hash.blake2b, BLAKE2B_256);
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-            return BAD_FUNC_ARG;
+            ret = BAD_FUNC_ARG;
+            break;
     }
 
+    /* default to NULL heap hint or test value */
+#ifdef WOLFSSL_HEAP_TEST
+    hmac->heap = (void)WOLFSSL_HEAP_TEST;
+#else
+    hmac->heap = heap;
+#endif /* WOLFSSL_HEAP_TEST */
+
     return ret;
 }
 
 
 int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
 {
-    byte*  ip = (byte*) hmac->ipad;
-    byte*  op = (byte*) hmac->opad;
+    byte*  ip;
+    byte*  op;
     word32 i, hmac_block_size = 0;
-    int    ret;
+    int    ret = 0;
+    void*  heap = NULL;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
-        return NitroxHmacSetKey(hmac, type, key, length);
+    if (hmac == NULL || key == NULL ||
+        !(type == MD5 || type == SHA    || type == SHA256 || type == SHA384
+                      || type == SHA512 || type == BLAKE2B_ID
+                      || type == SHA224)) {
+        return BAD_FUNC_ARG;
     }
-#endif
 
-    ret = InitHmac(hmac, type);
+    hmac->innerHashKeyed = 0;
+    hmac->macType = (byte)type;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+    if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+    #if defined(HAVE_CAVIUM) || defined(HAVE_INTEL_QA)
+        if (length > HMAC_BLOCK_SIZE) {
+            return WC_KEY_SIZE_E;
+        }
+
+        XMEMCPY(hmac->keyRaw, key, length);
+        hmac->keyLen = (word16)length;
+
+        return 0; /* nothing to do here */
+    #endif /* HAVE_CAVIUM || HAVE_INTEL_QA */
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    ret = _InitHmac(hmac, type, heap);
     if (ret != 0)
         return ret;
 
@@ -247,40 +271,48 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
         return HMAC_MIN_KEYLEN_E;
 #endif
 
+    ip = (byte*)hmac->ipad;
+    op = (byte*)hmac->opad;
+
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-        {
             hmac_block_size = MD5_BLOCK_SIZE;
             if (length <= MD5_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
             }
             else {
-                wc_Md5Update(&hmac->hash.md5, key, length);
-                wc_Md5Final(&hmac->hash.md5, ip);
+                ret = wc_Md5Update(&hmac->hash.md5, key, length);
+                if (ret != 0)
+                    break;
+                ret = wc_Md5Final(&hmac->hash.md5, ip);
+                if (ret != 0)
+                    break;
                 length = MD5_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-        {
             hmac_block_size = SHA_BLOCK_SIZE;
             if (length <= SHA_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
             }
             else {
-                wc_ShaUpdate(&hmac->hash.sha, key, length);
-                wc_ShaFinal(&hmac->hash.sha, ip);
+                ret = wc_ShaUpdate(&hmac->hash.sha, key, length);
+                if (ret != 0)
+                    break;
+                ret = wc_ShaFinal(&hmac->hash.sha, ip);
+                if (ret != 0)
+                    break;
+
                 length = SHA_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
         {
             hmac_block_size = SHA224_BLOCK_SIZE;
@@ -291,7 +323,6 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
                 ret = wc_Sha224Update(&hmac->hash.sha224, key, length);
                 if (ret != 0)
                     return ret;
-
                 ret = wc_Sha224Final(&hmac->hash.sha224, ip);
                 if (ret != 0)
                     return ret;
@@ -300,11 +331,10 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             }
         }
         break;
-        #endif
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
-        {
     		hmac_block_size = SHA256_BLOCK_SIZE;
             if (length <= SHA256_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
@@ -312,21 +342,19 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha256Update(&hmac->hash.sha256, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha256Final(&hmac->hash.sha256, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA256_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
-        {
             hmac_block_size = SHA384_BLOCK_SIZE;
             if (length <= SHA384_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
@@ -334,21 +362,16 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha384Update(&hmac->hash.sha384, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha384Final(&hmac->hash.sha384, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA384_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-        {
             hmac_block_size = SHA512_BLOCK_SIZE;
             if (length <= SHA512_BLOCK_SIZE) {
                 XMEMCPY(ip, key, length);
@@ -356,21 +379,18 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Sha512Update(&hmac->hash.sha512, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Sha512Final(&hmac->hash.sha512, ip);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = SHA512_DIGEST_SIZE;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-        {
             hmac_block_size = BLAKE2B_BLOCKBYTES;
             if (length <= BLAKE2B_BLOCKBYTES) {
                 XMEMCPY(ip, key, length);
@@ -378,29 +398,31 @@ int wc_HmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
             else {
                 ret = wc_Blake2bUpdate(&hmac->hash.blake2b, key, length);
                 if (ret != 0)
-                    return ret;
-
+                    break;
                 ret = wc_Blake2bFinal(&hmac->hash.blake2b, ip, BLAKE2B_256);
                 if (ret != 0)
-                    return ret;
+                    break;
 
                 length = BLAKE2B_256;
             }
-        }
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
             return BAD_FUNC_ARG;
     }
-    if (length < hmac_block_size)
-        XMEMSET(ip + length, 0, hmac_block_size - length);
 
-    for(i = 0; i < hmac_block_size; i++) {
-        op[i] = ip[i] ^ OPAD;
-        ip[i] ^= IPAD;
+    if (ret == 0) {
+        if (length < hmac_block_size)
+            XMEMSET(ip + length, 0, hmac_block_size - length);
+
+        for(i = 0; i < hmac_block_size; i++) {
+            op[i] = ip[i] ^ OPAD;
+            ip[i] ^= IPAD;
+        }
     }
-    return 0;
+
+    return ret;
 }
 
 
@@ -409,68 +431,60 @@ static int HmacKeyInnerHash(Hmac* hmac)
     int ret = 0;
 
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-            wc_Md5Update(&hmac->hash.md5, (byte*) hmac->ipad, MD5_BLOCK_SIZE);
-        break;
-        #endif
+            ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->ipad,
+                                                                MD5_BLOCK_SIZE);
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-            wc_ShaUpdate(&hmac->hash.sha, (byte*) hmac->ipad, SHA_BLOCK_SIZE);
-        break;
-        #endif
+            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->ipad,
+                                                                SHA_BLOCK_SIZE);
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
-            ret = wc_Sha224Update(&hmac->hash.sha224,
-                                         (byte*) hmac->ipad, SHA224_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->ipad,
+                                                             SHA224_BLOCK_SIZE);
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
-            ret = wc_Sha256Update(&hmac->hash.sha256,
-                                         (byte*) hmac->ipad, SHA256_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->ipad,
+                                                             SHA256_BLOCK_SIZE);
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
-            ret = wc_Sha384Update(&hmac->hash.sha384,
-                                         (byte*) hmac->ipad, SHA384_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->ipad,
+                                                             SHA384_BLOCK_SIZE);
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-            ret = wc_Sha512Update(&hmac->hash.sha512,
-                                         (byte*) hmac->ipad, SHA512_BLOCK_SIZE);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->ipad,
+                                                             SHA512_BLOCK_SIZE);
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
-                                         (byte*) hmac->ipad,BLAKE2B_BLOCKBYTES);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->ipad,
+                                                            BLAKE2B_BLOCKBYTES);
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-        break;
+            break;
     }
 
-    hmac->innerHashKeyed = 1;
+    if (ret == 0)
+        hmac->innerHashKeyed = 1;
 
     return ret;
 }
@@ -478,13 +492,18 @@ static int HmacKeyInnerHash(Hmac* hmac)
 
 int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
 {
-    int ret;
+    int ret = 0;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
+    #if defined(HAVE_CAVIUM)
         return NitroxHmacUpdate(hmac, msg, length);
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaHmac(&hmac->asyncDev, hmac->macType,
+            hmac->keyRaw, hmac->keyLen, NULL, msg, length);
+    #endif
     }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
     if (!hmac->innerHashKeyed) {
         ret = HmacKeyInnerHash(hmac);
@@ -493,63 +512,52 @@ int wc_HmacUpdate(Hmac* hmac, const byte* msg, word32 length)
     }
 
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-            wc_Md5Update(&hmac->hash.md5, msg, length);
-        break;
-        #endif
+            ret = wc_Md5Update(&hmac->hash.md5, msg, length);
+            break;
+    #endif /* !NO_MD5 */
 
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-            wc_ShaUpdate(&hmac->hash.sha, msg, length);
-        break;
-        #endif
+            ret = wc_ShaUpdate(&hmac->hash.sha, msg, length);
+            break;
+    #endif /* !NO_SHA */
 
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
             ret = wc_Sha224Update(&hmac->hash.sha224, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
             ret = wc_Sha256Update(&hmac->hash.sha256, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
             ret = wc_Sha384Update(&hmac->hash.sha384, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
             ret = wc_Sha512Update(&hmac->hash.sha512, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
             ret = wc_Blake2bUpdate(&hmac->hash.blake2b, msg, length);
-            if (ret != 0)
-                return ret;
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-        break;
+            break;
     }
 
-    return 0;
+    return ret;
 }
 
 
@@ -557,11 +565,21 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
 {
     int ret;
 
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
     if (hmac->asyncDev.marker == WOLFSSL_ASYNC_MARKER_HMAC) {
-        return NitroxHmacFinal(hmac, hash);
+        int hashLen = wc_HmacSizeByType(hmac->macType);
+        if (hashLen <= 0)
+            return hashLen;
+
+    #if defined(HAVE_CAVIUM)
+        return NitroxHmacFinal(hmac, hmac->macType, hash, hashLen);
+    #elif defined(HAVE_INTEL_QA)
+        return IntelQaHmac(&hmac->asyncDev, hmac->macType,
+            hmac->keyRaw, hmac->keyLen, hash, NULL, hashLen);
+    #endif
+        (void)hashLen;
     }
-#endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
     if (!hmac->innerHashKeyed) {
         ret = HmacKeyInnerHash(hmac);
@@ -570,216 +588,183 @@ int wc_HmacFinal(Hmac* hmac, byte* hash)
     }
 
     switch (hmac->macType) {
-        #ifndef NO_MD5
+    #ifndef NO_MD5
         case MD5:
-        {
-            wc_Md5Final(&hmac->hash.md5, (byte*) hmac->innerHash);
+            ret = wc_Md5Final(&hmac->hash.md5, (byte*)hmac->innerHash);
+            if (ret != 0)
+                break;
+            ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->opad,
+                                                                MD5_BLOCK_SIZE);
+            if (ret != 0)
+                break;
+            ret = wc_Md5Update(&hmac->hash.md5, (byte*)hmac->innerHash,
+                                                               MD5_DIGEST_SIZE);
+            if (ret != 0)
+                break;
+            ret = wc_Md5Final(&hmac->hash.md5, hash);
+            break;
+    #endif /* !NO_MD5 */
 
-            wc_Md5Update(&hmac->hash.md5, (byte*) hmac->opad, MD5_BLOCK_SIZE);
-            wc_Md5Update(&hmac->hash.md5,
-                                     (byte*) hmac->innerHash, MD5_DIGEST_SIZE);
-
-            wc_Md5Final(&hmac->hash.md5, hash);
-        }
-        break;
-        #endif
-
-        #ifndef NO_SHA
+    #ifndef NO_SHA
         case SHA:
-        {
-            wc_ShaFinal(&hmac->hash.sha, (byte*) hmac->innerHash);
+            ret = wc_ShaFinal(&hmac->hash.sha, (byte*)hmac->innerHash);
+            if (ret != 0)
+                break;
+            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->opad,
+                                                                SHA_BLOCK_SIZE);
+            if (ret != 0)
+                break;
+            ret = wc_ShaUpdate(&hmac->hash.sha, (byte*)hmac->innerHash,
+                                                               SHA_DIGEST_SIZE);
+            if (ret != 0)
+                break;
+            ret = wc_ShaFinal(&hmac->hash.sha, hash);
+            break;
+    #endif /* !NO_SHA */
 
-            wc_ShaUpdate(&hmac->hash.sha, (byte*) hmac->opad, SHA_BLOCK_SIZE);
-            wc_ShaUpdate(&hmac->hash.sha,
-                                     (byte*) hmac->innerHash, SHA_DIGEST_SIZE);
-
-            wc_ShaFinal(&hmac->hash.sha, hash);
-        }
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA224
+    #ifdef WOLFSSL_SHA224
         case SHA224:
         {
-            ret = wc_Sha224Final(&hmac->hash.sha224, (byte*) hmac->innerHash);
+            ret = wc_Sha224Final(&hmac->hash.sha224, (byte*)hmac->innerHash);
             if (ret != 0)
                 return ret;
-
-            ret = wc_Sha224Update(&hmac->hash.sha224,
-                                 (byte*) hmac->opad, SHA224_BLOCK_SIZE);
+            ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->opad,
+                                                             SHA224_BLOCK_SIZE);
             if (ret != 0)
                 return ret;
-
-            ret = wc_Sha224Update(&hmac->hash.sha224,
-                                 (byte*) hmac->innerHash, SHA224_DIGEST_SIZE);
+            ret = wc_Sha224Update(&hmac->hash.sha224, (byte*)hmac->innerHash,
+                                                            SHA224_DIGEST_SIZE);
             if (ret != 0)
                 return ret;
-
             ret = wc_Sha224Final(&hmac->hash.sha224, hash);
             if (ret != 0)
                 return ret;
         }
         break;
-        #endif
+    #endif /* WOLFSSL_SHA224 */
 
-        #ifndef NO_SHA256
+    #ifndef NO_SHA256
         case SHA256:
-        {
-            ret = wc_Sha256Final(&hmac->hash.sha256, (byte*) hmac->innerHash);
+            ret = wc_Sha256Final(&hmac->hash.sha256, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha256Update(&hmac->hash.sha256,
-                                (byte*) hmac->opad, SHA256_BLOCK_SIZE);
+                break;
+            ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->opad,
+                                                             SHA256_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha256Update(&hmac->hash.sha256,
-                                (byte*) hmac->innerHash, SHA256_DIGEST_SIZE);
+                break;
+            ret = wc_Sha256Update(&hmac->hash.sha256, (byte*)hmac->innerHash,
+                                                            SHA256_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha256Final(&hmac->hash.sha256, hash);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
+            break;
+    #endif /* !NO_SHA256 */
 
-        #ifdef WOLFSSL_SHA384
+    #ifdef WOLFSSL_SHA512
+    #ifdef WOLFSSL_SHA384
         case SHA384:
-        {
-            ret = wc_Sha384Final(&hmac->hash.sha384, (byte*) hmac->innerHash);
+            ret = wc_Sha384Final(&hmac->hash.sha384, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha384Update(&hmac->hash.sha384,
-                                 (byte*) hmac->opad, SHA384_BLOCK_SIZE);
+                break;
+            ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->opad,
+                                                             SHA384_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha384Update(&hmac->hash.sha384,
-                                 (byte*) hmac->innerHash, SHA384_DIGEST_SIZE);
+                break;
+            ret = wc_Sha384Update(&hmac->hash.sha384, (byte*)hmac->innerHash,
+                                                            SHA384_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha384Final(&hmac->hash.sha384, hash);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
-
-        #ifdef WOLFSSL_SHA512
+            break;
+    #endif /* WOLFSSL_SHA384 */
         case SHA512:
-        {
-            ret = wc_Sha512Final(&hmac->hash.sha512, (byte*) hmac->innerHash);
+            ret = wc_Sha512Final(&hmac->hash.sha512, (byte*)hmac->innerHash);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha512Update(&hmac->hash.sha512,
-                                 (byte*) hmac->opad, SHA512_BLOCK_SIZE);
+                break;
+            ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->opad,
+                                                             SHA512_BLOCK_SIZE);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Sha512Update(&hmac->hash.sha512,
-                                 (byte*) hmac->innerHash, SHA512_DIGEST_SIZE);
+                break;
+            ret = wc_Sha512Update(&hmac->hash.sha512, (byte*)hmac->innerHash,
+                                                            SHA512_DIGEST_SIZE);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Sha512Final(&hmac->hash.sha512, hash);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
+            break;
+    #endif /* WOLFSSL_SHA512 */
 
-        #ifdef HAVE_BLAKE2
+    #ifdef HAVE_BLAKE2
         case BLAKE2B_ID:
-        {
-            ret = wc_Blake2bFinal(&hmac->hash.blake2b, (byte*) hmac->innerHash,
-                         BLAKE2B_256);
+            ret = wc_Blake2bFinal(&hmac->hash.blake2b, (byte*)hmac->innerHash,
+                                                                   BLAKE2B_256);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
-                                 (byte*) hmac->opad, BLAKE2B_BLOCKBYTES);
+                break;
+            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->opad,
+                                                            BLAKE2B_BLOCKBYTES);
             if (ret != 0)
-                return ret;
-
-            ret = wc_Blake2bUpdate(&hmac->hash.blake2b,
-                                 (byte*) hmac->innerHash, BLAKE2B_256);
+                break;
+            ret = wc_Blake2bUpdate(&hmac->hash.blake2b, (byte*)hmac->innerHash,
+                                                                   BLAKE2B_256);
             if (ret != 0)
-                return ret;
-
+                break;
             ret = wc_Blake2bFinal(&hmac->hash.blake2b, hash, BLAKE2B_256);
-            if (ret != 0)
-                return ret;
-        }
-        break;
-        #endif
+            break;
+    #endif /* HAVE_BLAKE2 */
 
         default:
-        break;
+            ret = BAD_FUNC_ARG;
+            break;
     }
 
-    hmac->innerHashKeyed = 0;
+    if (ret == 0) {
+        hmac->innerHashKeyed = 0;
+    }
 
-    return 0;
+    return ret;
 }
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-
-/* Initialize Hmac for use with Nitrox device */
-int wc_HmacAsyncInit(Hmac* hmac, int devId)
+/* Initialize Hmac for use with async device */
+int wc_HmacInit(Hmac* hmac, void* heap, int devId)
 {
     int ret = 0;
 
     if (hmac == NULL)
-        return -1;
+        return BAD_FUNC_ARG;
 
-    ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC, devId);
-    if (ret != 0) {
-        return ret;
-    }
+    hmac->heap = heap;
 
-#ifdef HAVE_CAVIUM
-    hmac->keyLen  = 0;
-    hmac->dataLen = 0;
-    hmac->type    = 0;
-    hmac->data    = NULL;        /* buffered input data */
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+    hmac->keyLen = 0;
+    #ifdef HAVE_CAVIUM
+        hmac->dataLen = 0;
+        hmac->data    = NULL;        /* buffered input data */
+    #endif /* HAVE_CAVIUM */
 
-    hmac->innerHashKeyed = 0;
-#endif /* HAVE_CAVIUM */
-
-    /* default to NULL heap hint or test value */
-#ifdef WOLFSSL_HEAP_TEST
-    hmac->heap = (void)WOLFSSL_HEAP_TEST;
+    ret = wolfAsync_DevCtxInit(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC,
+                                                         hmac->heap, devId);
 #else
-    hmac->heap = NULL;
-#endif /* WOLFSSL_HEAP_TEST */
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
-    return 0;
+    return ret;
 }
 
-
-/* Free Hmac from use with Nitrox device */
-void wc_HmacAsyncFree(Hmac* hmac)
+/* Free Hmac from use with async device */
+void wc_HmacFree(Hmac* hmac)
 {
     if (hmac == NULL)
         return;
 
-    wolfAsync_DevCtxFree(&hmac->asyncDev);
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_HMAC)
+    wolfAsync_DevCtxFree(&hmac->asyncDev, WOLFSSL_ASYNC_MARKER_HMAC);
 
 #ifdef HAVE_CAVIUM
-    XFREE(hmac->data, hmac->heap, DYNAMIC_TYPE_ASYNC_TMP);
+    XFREE(hmac->data, hmac->heap, DYNAMIC_TYPE_HMAC);
     hmac->data = NULL;
-#endif
-}
-
+#endif /* HAVE_CAVIUM */
 #endif /* WOLFSSL_ASYNC_CRYPT */
-
+}
 
 int wolfSSL_GetHmacMaxSize(void)
 {
@@ -787,92 +772,91 @@ int wolfSSL_GetHmacMaxSize(void)
 }
 
 #ifdef HAVE_HKDF
+    /* HMAC-KDF with hash type, optional salt and info, return 0 on success */
+    int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+                       const byte* salt,  word32 saltSz,
+                       const byte* info,  word32 infoSz,
+                       byte* out,         word32 outSz)
+    {
+        Hmac   myHmac;
+    #ifdef WOLFSSL_SMALL_STACK
+        byte* tmp;
+        byte* prk;
+    #else
+        byte   tmp[MAX_DIGEST_SIZE]; /* localSalt helper and T */
+        byte   prk[MAX_DIGEST_SIZE];
+    #endif
+        const  byte* localSalt;  /* either points to user input or tmp */
+        int    hashSz = wc_HmacSizeByType(type);
+        word32 outIdx = 0;
+        byte   n = 0x1;
+        int    ret;
 
-/* HMAC-KDF with hash type, optional salt and info, return 0 on success */
-int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
-                   const byte* salt,  word32 saltSz,
-                   const byte* info,  word32 infoSz,
-                   byte* out,         word32 outSz)
-{
-    Hmac   myHmac;
-#ifdef WOLFSSL_SMALL_STACK
-    byte* tmp;
-    byte* prk;
-#else
-    byte   tmp[MAX_DIGEST_SIZE]; /* localSalt helper and T */
-    byte   prk[MAX_DIGEST_SIZE];
-#endif
-    const  byte* localSalt;  /* either points to user input or tmp */
-    int    hashSz = wc_HmacSizeByType(type);
-    word32 outIdx = 0;
-    byte   n = 0x1;
-    int    ret;
+        if (hashSz < 0)
+            return BAD_FUNC_ARG;
 
-    if (hashSz < 0)
-        return BAD_FUNC_ARG;
+    #ifdef WOLFSSL_SMALL_STACK
+        tmp = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (tmp == NULL)
+            return MEMORY_E;
 
-#ifdef WOLFSSL_SMALL_STACK
-    tmp = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (tmp == NULL)
-        return MEMORY_E;
-
-    prk = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (prk == NULL) {
-        XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        return MEMORY_E;
-    }
-#endif
-
-    localSalt = salt;
-    if (localSalt == NULL) {
-        XMEMSET(tmp, 0, hashSz);
-        localSalt = tmp;
-        saltSz    = hashSz;
-    }
-
-    ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz);
-    if (ret == 0)
-        ret = wc_HmacUpdate(&myHmac, inKey, inKeySz);
-    if (ret == 0)
-        ret = wc_HmacFinal(&myHmac,  prk);
-
-    if (ret == 0) {
-        while (outIdx < outSz) {
-            int    tmpSz = (n == 1) ? 0 : hashSz;
-            word32 left = outSz - outIdx;
-
-            ret = wc_HmacSetKey(&myHmac, type, prk, hashSz);
-            if (ret != 0)
-                break;
-            ret = wc_HmacUpdate(&myHmac, tmp, tmpSz);
-            if (ret != 0)
-                break;
-            ret = wc_HmacUpdate(&myHmac, info, infoSz);
-            if (ret != 0)
-                break;
-            ret = wc_HmacUpdate(&myHmac, &n, 1);
-            if (ret != 0)
-                break;
-            ret = wc_HmacFinal(&myHmac, tmp);
-            if (ret != 0)
-                break;
-
-            left = min(left, (word32)hashSz);
-            XMEMCPY(out+outIdx, tmp, left);
-
-            outIdx += hashSz;
-            n++;
+        prk = (byte*)XMALLOC(MAX_DIGEST_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        if (prk == NULL) {
+            XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+            return MEMORY_E;
         }
+    #endif
+
+        localSalt = salt;
+        if (localSalt == NULL) {
+            XMEMSET(tmp, 0, hashSz);
+            localSalt = tmp;
+            saltSz    = hashSz;
+        }
+
+        ret = wc_HmacSetKey(&myHmac, type, localSalt, saltSz);
+        if (ret == 0)
+            ret = wc_HmacUpdate(&myHmac, inKey, inKeySz);
+        if (ret == 0)
+            ret = wc_HmacFinal(&myHmac,  prk);
+
+        if (ret == 0) {
+            while (outIdx < outSz) {
+                int    tmpSz = (n == 1) ? 0 : hashSz;
+                word32 left = outSz - outIdx;
+
+                ret = wc_HmacSetKey(&myHmac, type, prk, hashSz);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(&myHmac, tmp, tmpSz);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(&myHmac, info, infoSz);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacUpdate(&myHmac, &n, 1);
+                if (ret != 0)
+                    break;
+                ret = wc_HmacFinal(&myHmac, tmp);
+                if (ret != 0)
+                    break;
+
+                left = min(left, (word32)hashSz);
+                XMEMCPY(out+outIdx, tmp, left);
+
+                outIdx += hashSz;
+                n++;
+            }
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(prk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return ret;
     }
 
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    XFREE(prk, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return ret;
-}
-
 #endif /* HAVE_HKDF */
 
 #endif /* HAVE_FIPS */
diff --git a/wolfcrypt/src/include.am b/wolfcrypt/src/include.am
index 81aa797db..7b5bd941a 100644
--- a/wolfcrypt/src/include.am
+++ b/wolfcrypt/src/include.am
@@ -58,3 +58,10 @@ src_libwolfssl_la_SOURCES += wolfcrypt/src/port/cavium/cavium_nitrox.c
 
 EXTRA_DIST += wolfcrypt/src/port/cavium/README.md
 endif
+
+if BUILD_INTEL_QA
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist.c
+src_libwolfssl_la_SOURCES += wolfcrypt/src/port/intel/quickassist_mem.c
+
+EXTRA_DIST += wolfcrypt/src/port/intel/README.md
+endif
diff --git a/wolfcrypt/src/integer.c b/wolfcrypt/src/integer.c
index 63d5c0293..624deea29 100644
--- a/wolfcrypt/src/integer.c
+++ b/wolfcrypt/src/integer.c
@@ -157,6 +157,9 @@ int mp_init (mp_int * a)
   a->used  = 0;
   a->alloc = 0;
   a->sign  = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_init(&a->raw);
+#endif
 
   return MP_OKAY;
 }
@@ -178,15 +181,28 @@ void mp_clear (mp_int * a)
     }
 
     /* free ram */
-    XFREE(a->dp, NULL, DYNAMIC_TYPE_BIGINT);
+    mp_free(a);
 
     /* reset members to make debugging easier */
-    a->dp    = NULL;
     a->alloc = a->used = 0;
     a->sign  = MP_ZPOS;
   }
 }
 
+void mp_free (mp_int * a)
+{
+  /* only do anything if a hasn't been freed previously */
+  if (a->dp != NULL) {
+    /* free ram */
+    XFREE(a->dp, 0, DYNAMIC_TYPE_BIGINT);
+    a->dp    = NULL;
+  }
+
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_free(&a->raw);
+#endif
+}
+
 void mp_forcezero(mp_int * a)
 {
     if (a == NULL)
@@ -198,10 +214,9 @@ void mp_forcezero(mp_int * a)
       ForceZero(a->dp, a->used * sizeof(mp_digit));
 
       /* free ram */
-      XFREE(a->dp, NULL, DYNAMIC_TYPE_BIGINT);
+      mp_free(a);
 
       /* reset members to make debugging easier */
-      a->dp    = NULL;
       a->alloc = a->used = 0;
       a->sign  = MP_ZPOS;
     }
@@ -330,7 +345,7 @@ int mp_copy (mp_int * a, mp_int * b)
   }
 
   /* grow dest */
-  if (b->alloc < a->used) {
+  if (b->alloc < a->used || b->alloc == 0) {
      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
         return res;
      }
@@ -373,7 +388,7 @@ int mp_grow (mp_int * a, int size)
   mp_digit *tmp;
 
   /* if the alloc size is smaller alloc more ram */
-  if (a->alloc < size) {
+  if (a->alloc < size || size == 0) {
     /* ensure there are always at least MP_PREC digits extra on top */
     size += (MP_PREC * 2) - (size % MP_PREC);
 
@@ -469,6 +484,9 @@ void mp_zero (mp_int * a)
 
   a->sign = MP_ZPOS;
   a->used = 0;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_zero(&a->raw);
+#endif
 
   tmp = a->dp;
   for (n = 0; n < a->alloc; n++) {
@@ -2949,6 +2967,9 @@ int mp_init_size (mp_int * a, int size)
   a->used  = 0;
   a->alloc = size;
   a->sign  = MP_ZPOS;
+#ifdef HAVE_WOLF_BIGINT
+  wc_bigint_init(&a->raw);
+#endif
 
   /* zero the digits */
   for (x = 0; x < size; x++) {
diff --git a/wolfcrypt/src/logging.c b/wolfcrypt/src/logging.c
index 8aecf5f0b..7253001ea 100644
--- a/wolfcrypt/src/logging.c
+++ b/wolfcrypt/src/logging.c
@@ -38,7 +38,7 @@
     WOLFSSL_API int  wolfSSL_Debugging_ON(void);
     WOLFSSL_API void wolfSSL_Debugging_OFF(void);
 #ifdef __cplusplus
-    } 
+    }
 #endif
 
 #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
@@ -233,7 +233,7 @@ void WOLFSSL_ERROR(int error)
     #endif
 {
     #if defined(DEBUG_WOLFSSL) && !defined(WOLFSSL_NGINX)
-    if (loggingEnabled)
+    if (loggingEnabled && error != WC_PENDING_E)
     #endif
     {
         char buffer[80];
diff --git a/wolfcrypt/src/md5.c b/wolfcrypt/src/md5.c
old mode 100644
new mode 100755
index d142b13e9..f65ea3353
--- a/wolfcrypt/src/md5.c
+++ b/wolfcrypt/src/md5.c
@@ -31,13 +31,8 @@
 
 #if defined(WOLFSSL_TI_HASH)
     /* #include  included by wc_port.c */
-#else
 
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitMd5   wc_InitMd5_sw
-#define wc_Md5Update wc_Md5Update_sw
-#define wc_Md5Final  wc_Md5Final_sw
-#endif
+#else
 
 #include 
 #include 
@@ -49,22 +44,21 @@
     #include 
 #endif
 
-#ifdef FREESCALE_MMCAU_SHA
-    #include "fsl_mmcau.h"
-    #define XTRANSFORM(S,B)  Transform((S), (B))
-#else
-    #define XTRANSFORM(S,B)  Transform((S))
-#endif
-
 
+/* Hardware Acceleration */
 #if defined(STM32F2_HASH) || defined(STM32F4_HASH)
     /*
      * STM32F2/F4 hardware MD5 support through the standard peripheral
      * library. (See note in README).
      */
 
-    void wc_InitMd5(Md5* md5)
+    #define HAVE_MD5_CUST_API
+
+    int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
     {
+        (void)heap;
+        (void)devId;
+
         /* STM32 struct notes:
          * md5->buffer  = first 4 bytes used to hold partial block if needed
          * md5->buffLen = num bytes currently stored in md5->buffer
@@ -85,9 +79,11 @@
 
         /* reset HASH processor */
         HASH->CR |= HASH_CR_INIT;
+
+        return 0;
     }
 
-    void wc_Md5Update(Md5* md5, const byte* data, word32 len)
+    int wc_Md5Update(Md5* md5, const byte* data, word32 len)
     {
         word32 i = 0;
         word32 fill = 0;
@@ -110,7 +106,7 @@
                 /* append partial to existing stored block */
                 XMEMCPY((byte*)md5->buffer + md5->buffLen, data, len);
                 md5->buffLen += len;
-                return;
+                return 0;
             }
         }
 
@@ -131,9 +127,11 @@
 
         /* keep track of total data length thus far */
         md5->loLen += (len - md5->buffLen);
+
+        return 0;
     }
 
-    void wc_Md5Final(Md5* md5, byte* hash)
+    int wc_Md5Final(Md5* md5, byte* hash)
     {
         __IO uint16_t nbvalidbitsdata = 0;
 
@@ -165,13 +163,148 @@
 
         XMEMCPY(hash, md5->digest, MD5_DIGEST_SIZE);
 
-        wc_InitMd5(md5);  /* reset state */
+        return wc_InitMd5(md5);  /* reset state */
     }
 
-#else /* Begin wolfCrypt software implementation */
+#elif defined(FREESCALE_MMCAU_SHA)
+    #include "cau_api.h"
+    #define XTRANSFORM(S,B)  Transform((S), (B))
 
-void wc_InitMd5(Md5* md5)
+    static int Transform(Md5* md5, byte* data)
+    {
+        int ret = wolfSSL_CryptHwMutexLock();
+        if(ret == 0) {
+            MMCAU_MD5_HashN(data, 1, (uint32_t*)md5->digest);
+            wolfSSL_CryptHwMutexUnLock();
+        }
+        return ret;
+    }
+
+#elif defined(WOLFSSL_PIC32MZ_HASH)
+    #define wc_InitMd5   wc_InitMd5_sw
+    #define wc_Md5Update wc_Md5Update_sw
+    #define wc_Md5Final  wc_Md5Final_sw
+
+    #define NEED_SOFT_MD5
+
+#else
+    #define NEED_SOFT_MD5
+
+#endif /* End Hardware Acceleration */
+
+
+#ifdef NEED_SOFT_MD5
+
+    #define XTRANSFORM(S,B)  Transform((S))
+
+    #define F1(x, y, z) (z ^ (x & (y ^ z)))
+    #define F2(x, y, z) F1(z, x, y)
+    #define F3(x, y, z) (x ^ y ^ z)
+    #define F4(x, y, z) (y ^ (x | ~z))
+
+    #define MD5STEP(f, w, x, y, z, data, s) \
+        w = rotlFixed(w + f(x, y, z) + data, s) + x
+
+    static int Transform(Md5* md5)
+    {
+        /* Copy context->state[] to working vars  */
+        word32 a = md5->digest[0];
+        word32 b = md5->digest[1];
+        word32 c = md5->digest[2];
+        word32 d = md5->digest[3];
+
+        MD5STEP(F1, a, b, c, d, md5->buffer[0]  + 0xd76aa478,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[1]  + 0xe8c7b756, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[2]  + 0x242070db, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[3]  + 0xc1bdceee, 22);
+        MD5STEP(F1, a, b, c, d, md5->buffer[4]  + 0xf57c0faf,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[5]  + 0x4787c62a, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[6]  + 0xa8304613, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[7]  + 0xfd469501, 22);
+        MD5STEP(F1, a, b, c, d, md5->buffer[8]  + 0x698098d8,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[9]  + 0x8b44f7af, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22);
+        MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122,  7);
+        MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12);
+        MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17);
+        MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22);
+
+        MD5STEP(F2, a, b, c, d, md5->buffer[1]  + 0xf61e2562,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[6]  + 0xc040b340,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[0]  + 0xe9b6c7aa, 20);
+        MD5STEP(F2, a, b, c, d, md5->buffer[5]  + 0xd62f105d,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[4]  + 0xe7d3fbc8, 20);
+        MD5STEP(F2, a, b, c, d, md5->buffer[9]  + 0x21e1cde6,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[3]  + 0xf4d50d87, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[8]  + 0x455a14ed, 20);
+        MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905,  5);
+        MD5STEP(F2, d, a, b, c, md5->buffer[2]  + 0xfcefa3f8,  9);
+        MD5STEP(F2, c, d, a, b, md5->buffer[7]  + 0x676f02d9, 14);
+        MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20);
+
+        MD5STEP(F3, a, b, c, d, md5->buffer[5]  + 0xfffa3942,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[8]  + 0x8771f681, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23);
+        MD5STEP(F3, a, b, c, d, md5->buffer[1]  + 0xa4beea44,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[4]  + 0x4bdecfa9, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[7]  + 0xf6bb4b60, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23);
+        MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[0]  + 0xeaa127fa, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[3]  + 0xd4ef3085, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[6]  + 0x04881d05, 23);
+        MD5STEP(F3, a, b, c, d, md5->buffer[9]  + 0xd9d4d039,  4);
+        MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11);
+        MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16);
+        MD5STEP(F3, b, c, d, a, md5->buffer[2]  + 0xc4ac5665, 23);
+
+        MD5STEP(F4, a, b, c, d, md5->buffer[0]  + 0xf4292244,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[7]  + 0x432aff97, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[5]  + 0xfc93a039, 21);
+        MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[3]  + 0x8f0ccc92, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[1]  + 0x85845dd1, 21);
+        MD5STEP(F4, a, b, c, d, md5->buffer[8]  + 0x6fa87e4f,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[6]  + 0xa3014314, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21);
+        MD5STEP(F4, a, b, c, d, md5->buffer[4]  + 0xf7537e82,  6);
+        MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10);
+        MD5STEP(F4, c, d, a, b, md5->buffer[2]  + 0x2ad7d2bb, 15);
+        MD5STEP(F4, b, c, d, a, md5->buffer[9]  + 0xeb86d391, 21);
+
+        /* Add the working vars back into digest state[]  */
+        md5->digest[0] += a;
+        md5->digest[1] += b;
+        md5->digest[2] += c;
+        md5->digest[3] += d;
+
+        return 0;
+    }
+#endif /* NEED_SOFT_MD5 */
+
+
+#ifndef HAVE_MD5_CUST_API
+static INLINE void AddMd5Length(Md5* md5, word32 len)
 {
+    word32 tmp = md5->loLen;
+    if ((md5->loLen += len) < tmp) {
+        md5->hiLen++;                       /* carry low to high */
+    }
+}
+
+static int _InitMd5(Md5* md5)
+{
+    int ret = 0;
+
     md5->digest[0] = 0x67452301L;
     md5->digest[1] = 0xefcdab89L;
     md5->digest[2] = 0x98badcfeL;
@@ -180,128 +313,55 @@ void wc_InitMd5(Md5* md5)
     md5->buffLen = 0;
     md5->loLen   = 0;
     md5->hiLen   = 0;
-}
 
-#ifdef FREESCALE_MMCAU_SHA
-static int Transform(Md5* md5, byte* data)
-{
-    int ret = wolfSSL_CryptHwMutexLock();
-    if(ret == 0) {
-        MMCAU_MD5_HashN(data, 1, (uint32_t*)(md5->digest));
-        wolfSSL_CryptHwMutexUnLock();
-    }
     return ret;
 }
-#endif /* FREESCALE_MMCAU_SHA */
 
-#ifndef FREESCALE_MMCAU_SHA
-
-static void Transform(Md5* md5)
+int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
 {
-#define F1(x, y, z) (z ^ (x & (y ^ z)))
-#define F2(x, y, z) F1(z, x, y)
-#define F3(x, y, z) (x ^ y ^ z)
-#define F4(x, y, z) (y ^ (x | ~z))
+    int ret = 0;
 
-#define MD5STEP(f, w, x, y, z, data, s) \
-    w = rotlFixed(w + f(x, y, z) + data, s) + x
+    if (md5 == NULL)
+        return BAD_FUNC_ARG;
 
-    /* Copy context->state[] to working vars  */
-    word32 a = md5->digest[0];
-    word32 b = md5->digest[1];
-    word32 c = md5->digest[2];
-    word32 d = md5->digest[3];
+    md5->heap = heap;
 
-    MD5STEP(F1, a, b, c, d, md5->buffer[0]  + 0xd76aa478,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[1]  + 0xe8c7b756, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[2]  + 0x242070db, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[3]  + 0xc1bdceee, 22);
-    MD5STEP(F1, a, b, c, d, md5->buffer[4]  + 0xf57c0faf,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[5]  + 0x4787c62a, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[6]  + 0xa8304613, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[7]  + 0xfd469501, 22);
-    MD5STEP(F1, a, b, c, d, md5->buffer[8]  + 0x698098d8,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[9]  + 0x8b44f7af, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[10] + 0xffff5bb1, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[11] + 0x895cd7be, 22);
-    MD5STEP(F1, a, b, c, d, md5->buffer[12] + 0x6b901122,  7);
-    MD5STEP(F1, d, a, b, c, md5->buffer[13] + 0xfd987193, 12);
-    MD5STEP(F1, c, d, a, b, md5->buffer[14] + 0xa679438e, 17);
-    MD5STEP(F1, b, c, d, a, md5->buffer[15] + 0x49b40821, 22);
+    ret = _InitMd5(md5);
+    if (ret != 0)
+        return ret;
 
-    MD5STEP(F2, a, b, c, d, md5->buffer[1]  + 0xf61e2562,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[6]  + 0xc040b340,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[11] + 0x265e5a51, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[0]  + 0xe9b6c7aa, 20);
-    MD5STEP(F2, a, b, c, d, md5->buffer[5]  + 0xd62f105d,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[10] + 0x02441453,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[15] + 0xd8a1e681, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[4]  + 0xe7d3fbc8, 20);
-    MD5STEP(F2, a, b, c, d, md5->buffer[9]  + 0x21e1cde6,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[14] + 0xc33707d6,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[3]  + 0xf4d50d87, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[8]  + 0x455a14ed, 20);
-    MD5STEP(F2, a, b, c, d, md5->buffer[13] + 0xa9e3e905,  5);
-    MD5STEP(F2, d, a, b, c, md5->buffer[2]  + 0xfcefa3f8,  9);
-    MD5STEP(F2, c, d, a, b, md5->buffer[7]  + 0x676f02d9, 14);
-    MD5STEP(F2, b, c, d, a, md5->buffer[12] + 0x8d2a4c8a, 20);
-
-    MD5STEP(F3, a, b, c, d, md5->buffer[5]  + 0xfffa3942,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[8]  + 0x8771f681, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[11] + 0x6d9d6122, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[14] + 0xfde5380c, 23);
-    MD5STEP(F3, a, b, c, d, md5->buffer[1]  + 0xa4beea44,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[4]  + 0x4bdecfa9, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[7]  + 0xf6bb4b60, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[10] + 0xbebfbc70, 23);
-    MD5STEP(F3, a, b, c, d, md5->buffer[13] + 0x289b7ec6,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[0]  + 0xeaa127fa, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[3]  + 0xd4ef3085, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[6]  + 0x04881d05, 23);
-    MD5STEP(F3, a, b, c, d, md5->buffer[9]  + 0xd9d4d039,  4);
-    MD5STEP(F3, d, a, b, c, md5->buffer[12] + 0xe6db99e5, 11);
-    MD5STEP(F3, c, d, a, b, md5->buffer[15] + 0x1fa27cf8, 16);
-    MD5STEP(F3, b, c, d, a, md5->buffer[2]  + 0xc4ac5665, 23);
-
-    MD5STEP(F4, a, b, c, d, md5->buffer[0]  + 0xf4292244,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[7]  + 0x432aff97, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[14] + 0xab9423a7, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[5]  + 0xfc93a039, 21);
-    MD5STEP(F4, a, b, c, d, md5->buffer[12] + 0x655b59c3,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[3]  + 0x8f0ccc92, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[10] + 0xffeff47d, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[1]  + 0x85845dd1, 21);
-    MD5STEP(F4, a, b, c, d, md5->buffer[8]  + 0x6fa87e4f,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[15] + 0xfe2ce6e0, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[6]  + 0xa3014314, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[13] + 0x4e0811a1, 21);
-    MD5STEP(F4, a, b, c, d, md5->buffer[4]  + 0xf7537e82,  6);
-    MD5STEP(F4, d, a, b, c, md5->buffer[11] + 0xbd3af235, 10);
-    MD5STEP(F4, c, d, a, b, md5->buffer[2]  + 0x2ad7d2bb, 15);
-    MD5STEP(F4, b, c, d, a, md5->buffer[9]  + 0xeb86d391, 21);
-
-    /* Add the working vars back into digest state[]  */
-    md5->digest[0] += a;
-    md5->digest[1] += b;
-    md5->digest[2] += c;
-    md5->digest[3] += d;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    ret = wolfAsync_DevCtxInit(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5,
+                                                            md5->heap, devId);
+#else
+    (void)devId;
+#endif
+    return ret;
 }
 
-#endif /* End Software implementation */
-
-
-static INLINE void AddLength(Md5* md5, word32 len)
+int wc_Md5Update(Md5* md5, const byte* data, word32 len)
 {
-    word32 tmp = md5->loLen;
-    if ( (md5->loLen += len) < tmp)
-        md5->hiLen++;                       /* carry low to high */
-}
+    int ret = 0;
+    byte* local;
 
+    if (md5 == NULL || (data == NULL && len > 0)) {
+        return BAD_FUNC_ARG;
+    }
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymMd5(&md5->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
 
-void wc_Md5Update(Md5* md5, const byte* data, word32 len)
-{
     /* do block size increments */
-    byte* local = (byte*)md5->buffer;
+    local = (byte*)md5->buffer;
+
+    /* check that internal buffLen is valid */
+    if (md5->buffLen > MD5_BLOCK_SIZE)
+        return BUFFER_E;
 
     while (len) {
         word32 add = min(len, MD5_BLOCK_SIZE - md5->buffLen);
@@ -312,23 +372,36 @@ void wc_Md5Update(Md5* md5, const byte* data, word32 len)
         len          -= add;
 
         if (md5->buffLen == MD5_BLOCK_SIZE) {
-            #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-                ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
-            #endif
+        #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+            ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
+        #endif
             XTRANSFORM(md5, local);
-            AddLength(md5, MD5_BLOCK_SIZE);
+            AddMd5Length(md5, MD5_BLOCK_SIZE);
             md5->buffLen = 0;
         }
     }
+    return ret;
 }
 
-
-void wc_Md5Final(Md5* md5, byte* hash)
+int wc_Md5Final(Md5* md5, byte* hash)
 {
-    byte* local = (byte*)md5->buffer;
+    byte* local;
 
-    AddLength(md5, md5->buffLen);  /* before adding pads */
+    if (md5 == NULL || hash == NULL) {
+        return BAD_FUNC_ARG;
+    }
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    if (md5->asyncDev.marker == WOLFSSL_ASYNC_MARKER_MD5) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymMd5(&md5->asyncDev, hash, NULL, MD5_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    local = (byte*)md5->buffer;
+
+    AddMd5Length(md5, md5->buffLen);  /* before adding pads */
     local[md5->buffLen++] = 0x80;  /* add 1 */
 
     /* pad with zeros */
@@ -336,9 +409,9 @@ void wc_Md5Final(Md5* md5, byte* hash)
         XMEMSET(&local[md5->buffLen], 0, MD5_BLOCK_SIZE - md5->buffLen);
         md5->buffLen += MD5_BLOCK_SIZE - md5->buffLen;
 
-        #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-            ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
-        #endif
+    #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
+    #endif
         XTRANSFORM(md5, local);
         md5->buffLen = 0;
     }
@@ -350,50 +423,69 @@ void wc_Md5Final(Md5* md5, byte* hash)
     md5->loLen = md5->loLen << 3;
 
     /* store lengths */
-    #if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-        ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
-    #endif
+#if defined(BIG_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+    ByteReverseWords(md5->buffer, md5->buffer, MD5_BLOCK_SIZE);
+#endif
     /* ! length ordering dependent on digest endian type ! */
     XMEMCPY(&local[MD5_PAD_SIZE], &md5->loLen, sizeof(word32));
     XMEMCPY(&local[MD5_PAD_SIZE + sizeof(word32)], &md5->hiLen, sizeof(word32));
 
     XTRANSFORM(md5, local);
-    #ifdef BIG_ENDIAN_ORDER
-        ByteReverseWords(md5->digest, md5->digest, MD5_DIGEST_SIZE);
-    #endif
+#ifdef BIG_ENDIAN_ORDER
+    ByteReverseWords(md5->digest, md5->digest, MD5_DIGEST_SIZE);
+#endif
     XMEMCPY(hash, md5->digest, MD5_DIGEST_SIZE);
 
-    wc_InitMd5(md5);  /* reset state */
+    return _InitMd5(md5); /* reset state */
+}
+#endif /* !HAVE_MD5_CUST_API */
+
+
+int wc_InitMd5(Md5* md5)
+{
+    return wc_InitMd5_ex(md5, NULL, INVALID_DEVID);
 }
 
-#endif /* End wolfCrypt software implementation */
-
-
-int wc_Md5Hash(const byte* data, word32 len, byte* hash)
+void wc_Md5Free(Md5* md5)
 {
-#ifdef WOLFSSL_SMALL_STACK
-    Md5* md5;
-#else
-    Md5 md5[1];
-#endif
-
-#ifdef WOLFSSL_SMALL_STACK
-    md5 = (Md5*)XMALLOC(sizeof(Md5), NULL, DYNAMIC_TYPE_TMP_BUFFER);
     if (md5 == NULL)
-        return MEMORY_E;
+        return;
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_MD5)
+    wolfAsync_DevCtxFree(&md5->asyncDev, WOLFSSL_ASYNC_MARKER_MD5);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
+int wc_Md5GetHash(Md5* md5, byte* hash)
+{
+    int ret;
+    Md5 tmpMd5;
+
+    if (md5 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Md5Copy(md5, &tmpMd5);
+    if (ret == 0) {
+        ret = wc_Md5Final(&tmpMd5, hash);
+    }
+
+    return ret;
+}
+
+int wc_Md5Copy(Md5* src, Md5* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Md5));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
 #endif
 
-    wc_InitMd5(md5);
-    wc_Md5Update(md5, data, len);
-    wc_Md5Final(md5, hash);
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(md5, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-#endif
-
-    return 0;
+    return ret;
 }
 
 #endif /* WOLFSSL_TI_HASH */
-
 #endif /* NO_MD5 */
diff --git a/wolfcrypt/src/memory.c b/wolfcrypt/src/memory.c
index c5f0e47b9..927b0c4ad 100644
--- a/wolfcrypt/src/memory.c
+++ b/wolfcrypt/src/memory.c
@@ -74,6 +74,16 @@ int wolfSSL_SetAllocators(wolfSSL_Malloc_cb  mf,
     return res;
 }
 
+int wolfSSL_GetAllocators(wolfSSL_Malloc_cb*  mf,
+                          wolfSSL_Free_cb*    ff,
+                          wolfSSL_Realloc_cb* rf)
+{
+    if (mf) *mf = malloc_function;
+    if (ff) *ff = free_function;
+    if (rf) *rf = realloc_function;
+    return 0;
+}
+
 #ifndef WOLFSSL_STATIC_MEMORY
 #ifdef WOLFSSL_DEBUG_MEMORY
 void* wolfSSL_Malloc(size_t size, const char* func, unsigned int line)
diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c
index 363db46a8..08ba55e86 100644
--- a/wolfcrypt/src/misc.c
+++ b/wolfcrypt/src/misc.c
@@ -31,7 +31,7 @@
 
 #include 
 
-/* inlining these functions is a huge speed increase and a small size decrease, 
+/* inlining these functions is a huge speed increase and a small size decrease,
    because the functions are smaller than function call setup/cleanup, e.g.,
    md5 benchmark is twice as fast with inline.  If you don't want it, then
    define NO_INLINE and compile this file into wolfssl, otherwise it's used as
@@ -79,7 +79,7 @@
     STATIC INLINE word32 rotlFixed(word32 x, word32 y)
     {
         return (x << y) | (x >> (sizeof(y) * 8 - y));
-    }   
+    }
 
 
     STATIC INLINE word32 rotrFixed(word32 x, word32 y)
@@ -128,7 +128,7 @@ STATIC INLINE void ByteReverseWords(word32* out, const word32* in,
 STATIC INLINE word64 rotlFixed64(word64 x, word64 y)
 {
     return (x << y) | (x >> (sizeof(y) * 8 - y));
-}  
+}
 
 
 STATIC INLINE word64 rotrFixed64(word64 x, word64 y)
@@ -139,8 +139,8 @@ STATIC INLINE word64 rotrFixed64(word64 x, word64 y)
 
 STATIC INLINE word64 ByteReverseWord64(word64 value)
 {
-#ifdef WOLFCRYPT_SLOW_WORD64
-	return (word64)(ByteReverseWord32((word32)value)) << 32 | 
+#if defined(WOLFCRYPT_SLOW_WORD64)
+	return (word64)(ByteReverseWord32((word32)value)) << 32 |
                     ByteReverseWord32((word32)(value>>32));
 #else
 	value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) |
diff --git a/wolfcrypt/src/pkcs12.c b/wolfcrypt/src/pkcs12.c
index 24877b60d..fc99e2ec7 100644
--- a/wolfcrypt/src/pkcs12.c
+++ b/wolfcrypt/src/pkcs12.c
@@ -527,6 +527,9 @@ static int wc_PKCS12_verify(WC_PKCS12* pkcs12, byte* data, word32 dataSz,
     }
 
     /* now that key has been created use it to get HMAC hash on data */
+    if ((ret = wc_HmacInit(&hmac, NULL, INVALID_DEVID)) != 0) {
+        return ret;
+    }
     if ((ret = wc_HmacSetKey(&hmac, typeH, key, kLen)) != 0) {
         return ret;
     }
@@ -536,6 +539,7 @@ static int wc_PKCS12_verify(WC_PKCS12* pkcs12, byte* data, word32 dataSz,
     if ((ret = wc_HmacFinal(&hmac, digest)) != 0) {
         return ret;
     }
+    wc_HmacFree(&hmac);
 #ifdef WOLFSSL_DEBUG_PKCS12
     {
         byte* p;
diff --git a/wolfcrypt/src/pkcs7.c b/wolfcrypt/src/pkcs7.c
index 4f7962f34..b65cfdb10 100644
--- a/wolfcrypt/src/pkcs7.c
+++ b/wolfcrypt/src/pkcs7.c
@@ -31,6 +31,12 @@
 #include 
 #include 
 #include 
+#ifndef NO_RSA
+    #include 
+#endif
+#ifdef HAVE_ECC
+    #include 
+#endif
 #ifdef NO_INLINE
     #include 
 #else
@@ -2259,7 +2265,7 @@ int wc_PKCS7_EncodeEnvelopedData(PKCS7* pkcs7, byte* output, word32 outputSz)
     }
 
     /* generate random content encryption key */
-    ret = wc_InitRng_ex(&rng, pkcs7->heap);
+    ret = wc_InitRng_ex(&rng, pkcs7->heap, INVALID_DEVID);
     if (ret != 0)
         return ret;
 
diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c
index 250411924..518c8fcda 100644
--- a/wolfcrypt/src/port/arm/armv8-aes.c
+++ b/wolfcrypt/src/port/arm/armv8-aes.c
@@ -301,16 +301,22 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
 
 
 /* set the heap hint for aes struct */
-int wc_InitAes_h(Aes* aes, void* h)
+int wc_AesInit(Aes* aes, void* heap, int devId)
 {
     if (aes == NULL)
         return BAD_FUNC_ARG;
 
     aes->heap = h;
+    (void)devId;
 
     return 0;
 }
 
+void wc_AesFree(Aes* aes)
+{
+    (void)aes;
+}
+
 
 #ifdef __aarch64__
 /* AES CCM/GCM use encrypt direct but not decrypt */
@@ -4552,26 +4558,7 @@ int  wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
 #endif /* HAVE_AES_DECRYPT */
 #endif /* HAVE_AESCCM */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
 
-/* Initialize Aes for use with Nitrox device */
-int wc_AesAsyncInit(Aes* aes, int devId)
-{
-    WOLFSSL_STUB("wc_AesAsyncInit");
-    (void)aes;
-    (void)devId;
-    return 0;
-}
-
-
-/* Free Aes from use with Nitrox device */
-void wc_AesAsyncFree(Aes* aes)
-{
-    WOLFSSL_STUB("wc_AesAsyncFree");
-    (void)aes;
-}
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
 
 #ifdef HAVE_AESGCM /* common GCM functions 32 and 64 bit */
 WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len)
diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c
index 80f3a901a..48d7230ef 100644
--- a/wolfcrypt/src/port/arm/armv8-sha256.c
+++ b/wolfcrypt/src/port/arm/armv8-sha256.c
@@ -55,7 +55,8 @@ static const ALIGN32 word32 K[64] = {
     0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
 };
 
-int wc_InitSha256(Sha256* sha256)
+
+int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
 {
     int ret = 0;
 
@@ -76,9 +77,21 @@ int wc_InitSha256(Sha256* sha256)
     sha256->loLen   = 0;
     sha256->hiLen   = 0;
 
+    (void)heap;
+    (void)devId;
+
     return ret;
 }
 
+int wc_InitSha256(Sha256* sha256)
+{
+    return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
+}
+
+void wc_Sha256Free(Sha256* sha256)
+{
+    (void)sha256;
+}
 
 static INLINE void AddLength(Sha256* sha256, word32 len)
 {
@@ -1287,7 +1300,35 @@ int wc_Sha256Final(Sha256* sha256, byte* hash)
 
     return wc_InitSha256(sha256);  /* reset state */
 }
+
 #endif /* __aarch64__ */
 
-#endif /* NO_SHA256 and WOLFSSL_ARMASM */
 
+int wc_Sha256GetHash(Sha256* sha256, byte* hash)
+{
+    int ret;
+    Sha256 tmpSha256;
+
+    if (sha256 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha256Copy(sha256, &tmpSha256);
+    if (ret == 0) {
+        ret = wc_Sha256Final(&tmpSha256, hash);
+    }
+    return ret;
+}
+
+int wc_Sha256Copy(Sha256* src, Sha256* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha256));
+
+    return ret;
+}
+
+#endif /* NO_SHA256 and WOLFSSL_ARMASM */
diff --git a/wolfcrypt/src/port/cavium/README.md b/wolfcrypt/src/port/cavium/README.md
index 982a938b9..b98d866dd 100644
--- a/wolfcrypt/src/port/cavium/README.md
+++ b/wolfcrypt/src/port/cavium/README.md
@@ -1,32 +1,3 @@
-# Cavium Nitrox V Support
+# Cavium Nitrox III/V Support
 
-## Directory Structure:
-`/`
-    `/CNN55XX-SDK`
-    `/wolfssl`
-
-## Cavium Driver
-
-Tested again `CNN55XX-Driver-Linux-KVM-XEN-PF-SDK-0.2-04.tar`
-From inside `CNN55XX-SDK`:
-1. `make`
-    Note: To resolve warnings in `CNN55XX-SDK/include/vf_defs.h`:
-    a. Changed `vf_config_mode_str` to return `const char*` and modify `vf_mode_str` to be `const char*`.
-    b. In `vf_config_mode_to_num_vfs` above `default:` add `case PF:`.
-
-2. `sudo make load`
-
-## wolfSSL
-
-Currently the AES and DES3 benchmark tests causes the kernel to crash, so they are disabled for now, even though the wolfCrypt tests pass for those.
-
-From inside `wolfssl`:
-1. `./configure --with-cavium-v=../CNN55XX-SDK --enable-asynccrypt --enable-aesni --enable-intelasm --disable-aes --disable-aesgcm --disable-des3`
-2. `make`
-
-## Usage
-
-Note: Must run applications with sudo to access device.
-
-`sudo ./wolfcrypt/benchmark/benchmark`
-`sudo ./wolfcrypt/test/testwolfcrypt`
+Please contact wolfSSL at info@wolfssl.com to request an evaluation.
diff --git a/wolfcrypt/src/port/cavium/cavium_nitrox.c b/wolfcrypt/src/port/cavium/cavium_nitrox.c
deleted file mode 100644
index 1acc49644..000000000
--- a/wolfcrypt/src/port/cavium/cavium_nitrox.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/* cavium-nitrox.c
- *
- * Copyright (C) 2006-2016 wolfSSL Inc.
- *
- * This file is part of wolfSSL. (formerly known as CyaSSL)
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#ifdef HAVE_CONFIG_H
-    #include 
-#endif
-
-#include 
-
-#ifdef HAVE_CAVIUM
-
-#include 
-#include 
-#include 
-#include 
-#ifndef NO_RSA
-    #include 
-#endif
-#ifndef NO_AES
-    #include 
-#endif
-
-#include 
-#include  /* For ntohs */
-
-static CspHandle mLastDevHandle = INVALID_DEVID;
-
-int NitroxTranslateResponseCode(int ret)
-{
-    switch (ret) {
-        case EAGAIN:
-        case ERR_REQ_PENDING:
-            ret = WC_PENDING_E;
-            break;
-        case ERR_REQ_TIMEOUT:
-            ret = WC_TIMEOUT_E;
-            break;
-        case 0:
-            /* leave as-is */
-            break;
-        default:
-            printf("NitroxTranslateResponseCode Unknown ret=%x\n", ret);
-            ret = ASYNC_INIT_E;
-    }
-    return ret;
-}
-
-
-CspHandle NitroxGetDeviceHandle(void)
-{
-    return mLastDevHandle;
-}
-    
-CspHandle NitroxOpenDevice(int dma_mode, int dev_id)
-{
-    mLastDevHandle = INVALID_DEVID;
-
-#ifdef HAVE_CAVIUM_V
-    (void)dma_mode;
-
-    if (CspInitialize(dev_id, &mLastDevHandle)) {
-        return -1;
-    }
-
-#else
-    Csp1CoreAssignment core_assign;
-    Uint32             device;
-
-    if (CspInitialize(CAVIUM_DIRECT, CAVIUM_DEV_ID)) {
-        return -1;
-    }
-    if (Csp1GetDevType(&device)) {
-        return -1;
-    }
-    if (device != NPX_DEVICE) {
-        if (ioctl(gpkpdev_hdlr[CAVIUM_DEV_ID], IOCTL_CSP1_GET_CORE_ASSIGNMENT,
-        (Uint32 *)&core_assign)!= 0) {
-            return -1;
-        }
-    }
-    CspShutdown(CAVIUM_DEV_ID);
-
-    mLastDevHandle = CspInitialize(dma_mode, dev_id);
-    if (mLastDevHandle == 0) {
-        mLastDevHandle = dev_id;
-    }
-
-#endif /* HAVE_CAVIUM_V */
-
-    return mLastDevHandle;
-}
-
-
-int NitroxAllocContext(CaviumNitroxDev* nitrox, CspHandle devId,
-    ContextType type)
-{
-    int ret;
-
-    if (nitrox == NULL) {
-        return -1;
-    }
-
-    /* If invalid handle provided, use last open one */
-    if (devId == INVALID_DEVID) {
-        devId = NitroxGetDeviceHandle();
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspAllocContext(devId, type, &nitrox->contextHandle);
-#else
-    ret = CspAllocContext(type, &nitrox->contextHandle, devId);
-#endif
-    if (ret != 0) {
-        return -1;
-    }
-
-    nitrox->type = type;
-    nitrox->devId = devId;
-
-    return 0;
-}
-
-void NitroxFreeContext(CaviumNitroxDev* nitrox)
-{
-    if (nitrox == NULL) {
-        return;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    CspFreeContext(nitrox->devId, nitrox->type, nitrox->contextHandle);
-#else
-    CspFreeContext(nitrox->type, nitrox->contextHandle, nitrox->devId);
-#endif
-}
-
-void NitroxCloseDevice(CspHandle devId)
-{
-    if (devId >= 0) {
-        CspShutdown(devId);
-    }
-}
-
-#if defined(WOLFSSL_ASYNC_CRYPT)
-
-int NitroxCheckRequest(CspHandle devId, CavReqId reqId)
-{
-    int ret = CspCheckForCompletion(devId, reqId);
-    return NitroxTranslateResponseCode(ret);
-}
-
-int NitroxCheckRequests(CspHandle devId, CspMultiRequestStatusBuffer* req_stat_buf)
-{
-    int ret = CspGetAllResults(req_stat_buf, devId);
-    return NitroxTranslateResponseCode(ret);   
-}
-
-
-#ifndef NO_RSA
-
-int NitroxRsaExptMod(const byte* in, word32 inLen,
-                     byte* exponent, word32 expLen,
-                     byte* modulus, word32 modLen,
-                     byte* out, word32* outLen, RsaKey* key)
-{
-    int ret;
-
-    if (key == NULL || in == NULL || inLen == 0 || exponent == NULL ||
-                                            modulus == NULL || out == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    (void)outLen;
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspMe(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP,
-            CAVIUM_DPORT, modLen, expLen, inLen,
-            modulus, exponent, (Uint8*)in, out,
-            &key->asyncDev.dev.reqId);
-    #if 0
-    /* TODO: Try MeCRT */
-    ret = CspMeCRT();
-    #endif
-#else
-    /* Not implemented/supported */
-    ret = NOT_COMPILED_IN;
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    return ret;
-}
-
-int NitroxRsaPublicEncrypt(const byte* in, word32 inLen, byte* out,
-                           word32 outLen, RsaKey* key)
-{
-    word32 ret;
-
-    if (key == NULL || in == NULL || out == NULL || outLen < (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15Enc(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                         BT2, key->n.used, key->e.used,
-                         (word16)inLen, key->n.dpraw, key->e.dpraw, (byte*)in, out,
-                         &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15Enc(CAVIUM_REQ_MODE, BT2, key->n.used, key->e.used,
-                         (word16)inLen, key->n.dpraw, key->e.dpraw, (byte*)in, out,
-                         &key->asyncDev.dev.reqId, key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    return key->n.used;
-}
-
-
-static INLINE void ato16(const byte* c, word16* u16)
-{
-    *u16 = (c[0] << 8) | (c[1]);
-}
-
-int NitroxRsaPrivateDecrypt(const byte* in, word32 inLen, byte* out,
-                            word32 outLen, RsaKey* key)
-{
-    word32 ret;
-    word16 outSz = (word16)outLen;
-
-    if (key == NULL || in == NULL || out == NULL ||
-                                                inLen != (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15CrtDec(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                            BT2, key->n.used, key->q.dpraw,
-                            key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, &outSz, out, &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15CrtDec(CAVIUM_REQ_MODE, BT2, key->n.used, key->q.dpraw,
-                            key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, &outSz, out, &key->asyncDev.dev.reqId,
-                            key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    ato16((const byte*)&outSz, &outSz); 
-
-    return outSz;
-}
-
-
-int NitroxRsaSSL_Sign(const byte* in, word32 inLen, byte* out,
-                      word32 outLen, RsaKey* key)
-{
-    word32 ret;
-
-    if (key == NULL || in == NULL || out == NULL || inLen == 0 || outLen <
-                                                         (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15CrtEnc(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                            BT1, key->n.used, (word16)inLen,
-                            key->q.dpraw, key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, out, &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15CrtEnc(CAVIUM_REQ_MODE, BT1, key->n.used, (word16)inLen,
-                            key->q.dpraw, key->dQ.dpraw, key->p.dpraw, key->dP.dpraw, key->u.dpraw,
-                            (byte*)in, out, &key->asyncDev.dev.reqId, key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    return key->n.used;
-}
-
-
-int NitroxRsaSSL_Verify(const byte* in, word32 inLen, byte* out,
-                        word32 outLen, RsaKey* key)
-{
-    word32 ret;
-    word16 outSz = (word16)outLen;
-
-    if (key == NULL || in == NULL || out == NULL || inLen != (word32)key->n.used) {
-        return BAD_FUNC_ARG;
-    }
-
-#ifdef HAVE_CAVIUM_V
-    ret = CspPkcs1v15Dec(key->asyncDev.dev.devId, CAVIUM_REQ_MODE, CAVIUM_SSL_GRP, CAVIUM_DPORT,
-                         BT1, key->n.used, key->e.used,
-                         key->n.dpraw, key->e.dpraw, (byte*)in, &outSz, out,
-                         &key->asyncDev.dev.reqId);
-#else
-    ret = CspPkcs1v15Dec(CAVIUM_REQ_MODE, BT1, key->n.used, key->e.used,
-                         key->n.dpraw, key->e.dpraw, (byte*)in, &outSz, out,
-                         &key->asyncDev.dev.reqId, key->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    outSz = ntohs(outSz);
-
-    return outSz;
-}
-#endif /* !NO_RSA */
-
-
-#ifndef NO_AES
-int NitroxAesSetKey(Aes* aes, const byte* key, word32 length, const byte* iv)
-{
-    if (aes == NULL)
-        return BAD_FUNC_ARG;
-
-    XMEMCPY(aes->key, key, length);   /* key still holds key, iv still in reg */
-    if (length == 16)
-        aes->type = AES_128_BIT;
-    else if (length == 24)
-        aes->type = AES_192_BIT;
-    else if (length == 32)
-        aes->type = AES_256_BIT;
-
-    return wc_AesSetIV(aes, iv);
-}
-
-#ifdef HAVE_AES_CBC
-int NitroxAesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 length)
-{
-    int ret;
-    wolfssl_word offset = 0;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key,
-                          (byte*)aes->reg, 0, NULL, slen, (byte*)in + offset,
-                          out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspEncryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(aes->reg, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key,
-                          (byte*)aes->reg,  0, NULL, slen, (byte*)in + offset,
-                          out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspEncryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(aes->reg, out + offset+length - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    }
-    return 0;
-}
-
-#ifdef HAVE_AES_DECRYPT
-int NitroxAesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 length)
-{
-    wolfssl_word offset = 0;
-    int ret;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-        XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key, (byte*)aes->reg,
-                          0, NULL, slen, (byte*)in + offset, out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspDecryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-        XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecryptAes(aes->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                          CAVIUM_SSL_GRP, CAVIUM_DPORT, aes->asyncDev.dev.contextHandle,
-                          FROM_DPTR, FROM_CTX, AES_CBC, aes->type, (byte*)aes->key, (byte*)aes->reg,
-                          0, NULL, slen, (byte*)in + offset, out + offset, &aes->asyncDev.dev.reqId);
-    #else
-        ret = CspDecryptAes(CAVIUM_BLOCKING, aes->asyncDev.dev.contextHandle, CAVIUM_NO_UPDATE,
-                          aes->type, slen, (byte*)in + offset, out + offset,
-                          (byte*)aes->reg, (byte*)aes->key, &aes->asyncDev.dev.reqId,
-                          aes->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
-    }
-    return 0;
-}
-#endif /* HAVE_AES_DECRYPT */
-#endif /* HAVE_AES_CBC */
-#endif /* !NO_AES */
-
-
-#if !defined(NO_ARC4) && !defined(HAVE_CAVIUM_V)
-void NitroxArc4SetKey(Arc4* arc4, const byte* key, word32 length)
-{
-    if (CspInitializeRc4(CAVIUM_BLOCKING, arc4->asyncDev.dev.contextHandle, length,
-                         (byte*)key, &arc4->asyncDev.dev.reqId, arc4->devId) != 0) {
-        WOLFSSL_MSG("Bad Cavium Arc4 Init");
-    }
-}
-
-void NitroxArc4Process(Arc4* arc4, byte* out, const byte* in, word32 length)
-{
-    int ret;
-    wolfssl_word offset = 0;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-        ret = CspEncryptRc4(CAVIUM_BLOCKING, arc4->asyncDev.dev.contextHandle,
-            CAVIUM_UPDATE, slen, (byte*)in + offset, out + offset,
-            &arc4->asyncDev.dev.reqId, arc4->devId);
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-    }
-    if (length) {
-        word16 slen = (word16)length;
-        ret = CspEncryptRc4(CAVIUM_BLOCKING, arc4->asyncDev.dev.contextHandle,
-            CAVIUM_UPDATE, slen, (byte*)in + offset, out + offset,
-            &arc4->asyncDev.dev.reqId, arc4->devId);
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-    }
-}
-#endif /* !NO_ARC4 && !HAVE_CAVIUM_V */
-
-
-#ifndef NO_DES3
-int NitroxDes3SetKey(Des3* des3, const byte* key, const byte* iv)
-{
-    if (des3 == NULL)
-        return BAD_FUNC_ARG;
-
-    /* key[0] holds key, iv in reg */
-    XMEMCPY(des3->key[0], key, DES_BLOCK_SIZE*3);
-
-    return wc_Des3_SetIV(des3, iv);
-}
-
-int NitroxDes3CbcEncrypt(Des3* des3, byte* out, const byte* in, word32 length)
-{
-    wolfssl_word offset = 0;
-    int ret;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0],
-                            (byte*)des3->reg, slen, (byte*)in + offset,
-                            out + offset, &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspEncrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                            CAVIUM_NO_UPDATE, slen, (byte*)in + offset,
-                            out + offset, (byte*)des3->reg, (byte*)des3->key[0],
-                            &des3->asyncDev.dev.reqId, des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(des3->reg, out + offset - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspEncrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0], (byte*)des3->reg,
-                            slen, (byte*)in + offset, out + offset,
-                            &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspEncrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                            CAVIUM_NO_UPDATE, slen, (byte*)in + offset,
-                            out + offset, (byte*)des3->reg, (byte*)des3->key[0],
-                            &des3->asyncDev.dev.reqId, des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(des3->reg, out+offset+length - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-    }
-    return 0;
-}
-
-int NitroxDes3CbcDecrypt(Des3* des3, byte* out, const byte* in, word32 length)
-{
-    wolfssl_word offset = 0;
-    int ret;
-
-    while (length > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-        XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0], (byte*)des3->reg,
-                            slen, (byte*)in + offset, out + offset,
-                            &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspDecrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                           CAVIUM_NO_UPDATE, slen, (byte*)in + offset, out + offset,
-                           (byte*)des3->reg, (byte*)des3->key[0], &des3->asyncDev.dev.reqId,
-                           des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        length -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-        XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE);
-    }
-    if (length) {
-        word16 slen = (word16)length;
-        XMEMCPY(des3->tmp, in + offset + slen - DES_BLOCK_SIZE,DES_BLOCK_SIZE);
-    #ifdef HAVE_CAVIUM_V
-        ret = CspDecrypt3Des(des3->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, des3->asyncDev.dev.contextHandle,
-                            FROM_DPTR, FROM_CTX, DES3_CBC, (byte*)des3->key[0], (byte*)des3->reg,
-                            slen, (byte*)in + offset, out + offset,
-                            &des3->asyncDev.dev.reqId);
-    #else
-        ret = CspDecrypt3Des(CAVIUM_BLOCKING, des3->asyncDev.dev.contextHandle,
-                           CAVIUM_NO_UPDATE, slen, (byte*)in + offset, out + offset,
-                           (byte*)des3->reg, (byte*)des3->key[0], &des3->asyncDev.dev.reqId,
-                           des3->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        XMEMCPY(des3->reg, des3->tmp, DES_BLOCK_SIZE);
-    }
-    return 0;
-}
-#endif /* !NO_DES3 */
-
-
-#ifndef NO_HMAC
-int NitroxHmacFinal(Hmac* hmac, byte* hash)
-{
-    int ret = -1;
-
-#ifdef HAVE_CAVIUM_V
-    word16 hashLen = wc_HmacSizeByType(hmac->macType);
-    ret = CspHmac(hmac->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT,
-                  CAVIUM_SSL_GRP, CAVIUM_DPORT, hmac->type, hmac->keyLen,
-                  (byte*)hmac->ipad, hmac->dataLen, hmac->data, hashLen,
-                  hash, &hmac->asyncDev.dev.reqId);
-#else
-    ret = CspHmac(CAVIUM_BLOCKING, hmac->type, NULL, hmac->keyLen,
-                  (byte*)hmac->ipad, hmac->dataLen, hmac->data, hash,
-                  &hmac->asyncDev.dev.reqId, hmac->asyncDev.dev.devId);
-#endif
-    ret = NitroxTranslateResponseCode(ret);
-    if (ret != 0) {
-        return ret;
-    }
-
-    hmac->innerHashKeyed = 0;  /* tell update to start over if used again */
-
-    return 0;
-}
-
-int NitroxHmacUpdate(Hmac* hmac, const byte* msg, word32 length)
-{
-    word16 add = (word16)length;
-    word32 total;
-    byte*  tmp;
-
-    if (length > WOLFSSL_MAX_16BIT) {
-        WOLFSSL_MSG("Too big msg for cavium hmac");
-        return -1;
-    }
-
-    if (hmac->innerHashKeyed == 0) {  /* starting new */
-        hmac->dataLen        = 0;
-        hmac->innerHashKeyed = 1;
-    }
-
-    total = add + hmac->dataLen;
-    if (total > WOLFSSL_MAX_16BIT) {
-        WOLFSSL_MSG("Too big msg for cavium hmac");
-        return -1;
-    }
-
-    tmp = XMALLOC(hmac->dataLen + add, NULL, DYNAMIC_TYPE_ASYNC_TMP);
-    if (tmp == NULL) {
-        WOLFSSL_MSG("Out of memory for cavium update");
-        return -1;
-    }
-    if (hmac->dataLen)
-        XMEMCPY(tmp, hmac->data,  hmac->dataLen);
-    XMEMCPY(tmp + hmac->dataLen, msg, add);
-
-    hmac->dataLen += add;
-    XFREE(hmac->data, NULL, DYNAMIC_TYPE_ASYNC_TMP);
-    hmac->data = tmp;
-
-    return 0;
-}
-
-int NitroxHmacSetKey(Hmac* hmac, int type, const byte* key, word32 length)
-{
-    hmac->macType = (byte)type;
-    
-    /* Determine Cavium HashType */
-    switch(type) {
-    #ifndef NO_MD5
-        case MD5:
-            hmac->type = MD5_TYPE;
-            break;
-    #endif
-    #ifndef NO_SHA
-        case SHA:
-            hmac->type = SHA1_TYPE;
-            break;
-    #endif
-    #ifndef NO_SHA256
-        case SHA256:
-        #ifdef HAVE_CAVIUM_V
-            hmac->type = SHA2_SHA256;
-        #else
-            hmac->type = SHA256_TYPE;
-        #endif
-            break;
-    #endif
-    #ifdef HAVE_CAVIUM_V
-        #ifndef WOLFSSL_SHA512
-            case SHA512:
-                hmac->type = SHA2_SHA512;
-                break;
-        #endif
-        #ifndef WOLFSSL_SHA384
-            case SHA384:
-                hmac->type = SHA2_SHA384;
-                break;
-        #endif
-    #endif /* HAVE_CAVIUM_V */
-        default:
-            WOLFSSL_MSG("unsupported cavium hmac type");
-            break;
-    }
-
-    hmac->innerHashKeyed = 0;  /* should we key Startup flag */
-
-    hmac->keyLen = (word16)length;
-    /* store key in ipad */
-    XMEMCPY(hmac->ipad, key, length);
-
-    return 0;
-}
-#endif /* !NO_HMAC */
-
-
-#if !defined(HAVE_HASHDRBG) && !defined(NO_RC4)
-void NitroxRngGenerateBlock(WC_RNG* rng, byte* output, word32 sz)
-{
-    wolfssl_word offset = 0;
-    word32      requestId;
-
-    while (sz > WOLFSSL_MAX_16BIT) {
-        word16 slen = (word16)WOLFSSL_MAX_16BIT;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspTrueRandom(rng->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, slen, output + offset, &requestId);
-    #else
-        ret = CspRandom(CAVIUM_BLOCKING, slen, output + offset, &requestId,
-                        rng->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-        sz     -= WOLFSSL_MAX_16BIT;
-        offset += WOLFSSL_MAX_16BIT;
-    }
-    if (sz) {
-        word16 slen = (word16)sz;
-    #ifdef HAVE_CAVIUM_V
-        ret = CspTrueRandom(rng->asyncDev.dev.devId, CAVIUM_BLOCKING, DMA_DIRECT_DIRECT, 
-                            CAVIUM_SSL_GRP, CAVIUM_DPORT, slen, output + offset, &requestId);
-    #else
-        ret = CspRandom(CAVIUM_BLOCKING, slen, output + offset, &requestId,
-                        rng->asyncDev.dev.devId);
-    #endif
-        ret = NitroxTranslateResponseCode(ret);
-        if (ret != 0) {
-            return ret;
-        }
-    }
-}
-#endif /* !defined(HAVE_HASHDRBG) && !defined(NO_RC4) */
-
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
-#endif /* HAVE_CAVIUM */
diff --git a/wolfcrypt/src/port/intel/README.md b/wolfcrypt/src/port/intel/README.md
new file mode 100644
index 000000000..4b5d971ba
--- /dev/null
+++ b/wolfcrypt/src/port/intel/README.md
@@ -0,0 +1,3 @@
+# Intel QuickAssist Adapter Asynchronous Support
+
+Please contact wolfSSL at info@wolfssl.com to request an evaluation.
diff --git a/wolfcrypt/src/port/nxp/ksdk_port.c b/wolfcrypt/src/port/nxp/ksdk_port.c
index 259a1fb5c..4c5853d7b 100644
--- a/wolfcrypt/src/port/nxp/ksdk_port.c
+++ b/wolfcrypt/src/port/nxp/ksdk_port.c
@@ -681,6 +681,7 @@ int wc_ecc_mulmod_ex(mp_int *k, ecc_point *G, ecc_point *R, mp_int* a,
     int res;
 
     (void)a;
+    (void)heap;
 
     uint8_t Gxbin[LTC_MAX_ECC_BITS / 8];
     uint8_t Gybin[LTC_MAX_ECC_BITS / 8];
diff --git a/wolfcrypt/src/port/ti/ti-ccm.c b/wolfcrypt/src/port/ti/ti-ccm.c
index abf4d602d..f4a4c6595 100644
--- a/wolfcrypt/src/port/ti/ti-ccm.c
+++ b/wolfcrypt/src/port/ti/ti-ccm.c
@@ -32,52 +32,63 @@
 #include 
 #include 
 
+#ifndef TI_DUMMY_BUILD
 #include "driverlib/sysctl.h"
 #include "driverlib/rom_map.h"
 #include "driverlib/rom.h"
 
 #ifndef SINGLE_THREADED
 #include 
-    static wolfSSL_Mutex TI_CCM_Mutex ;
+    static wolfSSL_Mutex TI_CCM_Mutex;
 #endif
+#endif /* TI_DUMMY_BUILD */
 
 #define TIMEOUT  500000
-#define WAIT(stat) { volatile int i ; for(i=0; i
 
 #include 
-#include       
-#include       
-#include       
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -59,67 +59,68 @@
 #define SHAMD5_ALGO_MD5 1
 #define SHAMD5_ALGO_SHA1 2
 #define SHAMD5_ALGO_SHA256 3
-bool wolfSSL_TI_CCMInit(void) { return true ; }
+#define SHAMD5_ALGO_SHA224 4
 #endif
 
 static int hashInit(wolfssl_TI_Hash *hash) {
-    if(!wolfSSL_TI_CCMInit())return 1 ;
-    hash->used = 0 ;
-    hash->msg  = 0 ;
-    hash->len  = 0 ;
-    return 0 ;
+    if (!wolfSSL_TI_CCMInit())return 1;
+    hash->used = 0;
+    hash->msg  = 0;
+    hash->len  = 0;
+    return 0;
 }
 
 static int hashUpdate(wolfssl_TI_Hash *hash, const byte* data, word32 len)
 {
-    void *p ;
+    void *p;
 
-    if((hash== NULL) || (data == NULL))return BAD_FUNC_ARG;
+    if ((hash== NULL) || (data == NULL))return BAD_FUNC_ARG;
 
-    if(hash->len < hash->used+len) {
-        if(hash->msg == NULL) {
+    if (hash->len < hash->used+len) {
+        if (hash->msg == NULL) {
             p = XMALLOC(hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         } else {
             p = XREALLOC(hash->msg, hash->used+len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
         }
-        if(p == 0)return 1 ;
-        hash->msg = p ;     
-        hash->len = hash->used+len ;
-    } 
-    XMEMCPY(hash->msg+hash->used, data, len) ;
-    hash->used += len ;
-    return 0 ;
+        if (p == 0)return 1;
+        hash->msg = p;
+        hash->len = hash->used+len;
+    }
+    XMEMCPY(hash->msg+hash->used, data, len);
+    hash->used += len;
+    return 0;
 }
 
 static int hashGetHash(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize)
-{   
-    uint32_t h[16] ;
+{
+    uint32_t h[16];
 #ifndef TI_DUMMY_BUILD
-    wolfSSL_TI_lockCCM() ;
+    wolfSSL_TI_lockCCM();
     ROM_SHAMD5Reset(SHAMD5_BASE);
     ROM_SHAMD5ConfigSet(SHAMD5_BASE, algo);
-    ROM_SHAMD5DataProcess(SHAMD5_BASE, 
+    ROM_SHAMD5DataProcess(SHAMD5_BASE,
                    (uint32_t *)hash->msg, hash->used, h);
-    wolfSSL_TI_unlockCCM() ;
+    wolfSSL_TI_unlockCCM();
 #else
-    (void) hash ;
-    (void) algo ;
+    (void) hash;
+    (void) algo;
 #endif
-    XMEMCPY(result, h, hsize) ;
+    XMEMCPY(result, h, hsize);
 
-    return 0 ;
+    return 0;
 }
 
-static void hashRestorePos(wolfssl_TI_Hash *h1, wolfssl_TI_Hash *h2) {
-	h1->used = h2->used ;
+static int hashCopy(wolfssl_TI_Hash *src, wolfssl_TI_Hash *dst) {
+    XMEMCPY(dst, src, sizeof(wolfssl_TI_Hash));
+    return 0;
 }
 
 static int hashFinal(wolfssl_TI_Hash *hash, byte* result, word32 algo, word32 hsize)
-{   
-    hashGetHash(hash, result, algo, hsize) ;
+{
+    hashGetHash(hash, result, algo, hsize);
     XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    hashInit(hash) ;
-    return 0 ;
+    hashInit(hash);
+    return 0;
 }
 
 static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word32 hsize)
@@ -153,166 +154,183 @@ static int hashHash(const byte* data, word32 len, byte* hash, word32 algo, word3
 }
 
 static int hashFree(wolfssl_TI_Hash *hash)
-{   
+{
     XFREE(hash->msg, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    hashInit(hash) ;
-    return 0 ;
+    hashInit(hash);
+    return 0;
 }
 
 #if !defined(NO_MD5)
-WOLFSSL_API void wc_InitMd5(Md5* md5)
+WOLFSSL_API int wc_InitMd5_ex(Md5* md5, void* heap, int devId)
 {
     if (md5 == NULL)
-        return ;
-    hashInit((wolfssl_TI_Hash *)md5) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)md5);
 }
-
-WOLFSSL_API void wc_Md5Update(Md5* md5, const byte* data, word32 len)
+WOLFSSL_API int wc_InitMd5(Md5* md5)
 {
-    hashUpdate((wolfssl_TI_Hash *)md5, data, len) ;
+    return wc_InitMd5_ex(md5, NULL, INVALID_DEVID);
 }
 
-WOLFSSL_API void wc_Md5Final(Md5* md5, byte* hash)
+WOLFSSL_API int wc_Md5Update(Md5* md5, const byte* data, word32 len)
 {
-    hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+    return hashUpdate((wolfssl_TI_Hash *)md5, data, len);
 }
 
-WOLFSSL_API void wc_Md5GetHash(Md5* md5, byte* hash)
+WOLFSSL_API int wc_Md5Final(Md5* md5, byte* hash)
 {
-    hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
 }
 
-WOLFSSL_API void wc_Md5RestorePos(Md5* m1, Md5* m2) {
-	hashRestorePos((wolfssl_TI_Hash *)m1, (wolfssl_TI_Hash *)m2) ;
+WOLFSSL_API int wc_Md5GetHash(Md5* md5, byte* hash)
+{
+    return hashGetHash((wolfssl_TI_Hash *)md5, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
+}
+
+WOLFSSL_API int wc_Md5Copy(Md5* src, Md5* dst) {
+	return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst);
 }
 
 WOLFSSL_API int wc_Md5Hash(const byte*data, word32 len, byte*hash)
-{ 
-    return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE) ;
+{
+    return hashHash(data, len, hash, SHAMD5_ALGO_MD5, MD5_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_Md5Free(Md5* md5)
 {
-    hashFree((wolfssl_TI_Hash *)md5) ;
+    hashFree((wolfssl_TI_Hash *)md5);
 }
 
-#endif /* NO_MD5 */
+#endif /* !NO_MD5 */
 
 #if !defined(NO_SHA)
-WOLFSSL_API int wc_InitSha(Sha* sha)
+WOLFSSL_API int wc_InitSha_ex(Md5* sha, void* heap, int devId)
 {
     if (sha == NULL)
-        return 1 ;
-    return hashInit((wolfssl_TI_Hash *)sha) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)sha);
+}
+WOLFSSL_API int wc_InitSha(Sha* sha)
+{
+    return wc_InitSha_ex(sha, NULL, INVALID_DEVID);
 }
 
 WOLFSSL_API int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
 {
-    return hashUpdate((wolfssl_TI_Hash *)sha, data, len) ;
+    return hashUpdate((wolfssl_TI_Hash *)sha, data, len);
 }
 
 WOLFSSL_API int wc_ShaFinal(Sha* sha, byte* hash)
 {
-    return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_ShaGetHash(Sha* sha, byte* hash)
 {
-    return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+    return hashGetHash(sha, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
 }
 
-WOLFSSL_API void wc_ShaRestorePos(Sha* s1, Sha* s2) {
-	hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+WOLFSSL_API int wc_ShaCopy(Sha* src, Sha* dst) {
+	return hashCopy((wolfssl_TI_Hash *)src, (wolfssl_TI_Hash *)dst);
 }
 
 WOLFSSL_API int wc_ShaHash(const byte*data, word32 len, byte*hash)
-{ 
-    return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE) ;
+{
+    return hashHash(data, len, hash, SHAMD5_ALGO_SHA1, SHA_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_ShaFree(Sha* sha)
 {
-    hashFree((wolfssl_TI_Hash *)sha) ;
+    hashFree((wolfssl_TI_Hash *)sha);
 }
 
-#endif /* NO_SHA */
+#endif /* !NO_SHA */
 
-#if defined(HAVE_SHA224)
-WOLFSSL_API int wc_InitSha224(Sha224* sha224)
+#if defined(WOLFSSL_SHA224)
+WOLFSSL_API int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId)
 {
     if (sha224 == NULL)
-        return 1 ;
-    return hashInit((wolfssl_TI_Hash *)sha224) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)sha224);
+}
+WOLFSSL_API int wc_InitSha224(Sha224* sha224)
+{
+    return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
 }
 
 WOLFSSL_API int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
 {
-    return hashUpdate((wolfssl_TI_Hash *)sha224, data, len) ;
+    return hashUpdate((wolfssl_TI_Hash *)sha224, data, len);
 }
 
 WOLFSSL_API int wc_Sha224Final(Sha224* sha224, byte* hash)
 {
-    return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha224GetHash(Sha224* sha224, byte* hash)
 {
-    return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
-}
-
-WOLFSSL_API void wc_Sha224RestorePos(Sha224* s1, Sha224* s2) {
-	hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+    return hashGetHash(sha224, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash)
-{ 
-    return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE) ;
+{
+    return hashHash(data, len, hash, SHAMD5_ALGO_SHA224, SHA224_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_Sha224Free(Sha224* sha224)
 {
-    hashFree((wolfssl_TI_Hash *)sha224) ;
+    hashFree((wolfssl_TI_Hash *)sha224);
 }
 
-#endif /* HAVE_SHA224 */
+#endif /* WOLFSSL_SHA224 */
 
 #if !defined(NO_SHA256)
-WOLFSSL_API int wc_InitSha256(Sha256* sha256)
+WOLFSSL_API int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
 {
     if (sha256 == NULL)
-        return 1 ;
-    return hashInit((wolfssl_TI_Hash *)sha256) ;
+        return 1;
+    (void)heap;
+    (void)devId;
+    return hashInit((wolfssl_TI_Hash *)sha256);
+}
+
+WOLFSSL_API int wc_InitSha256(Sha256* sha256)
+{
+    return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
 }
 
 WOLFSSL_API int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
 {
-    return hashUpdate((wolfssl_TI_Hash *)sha256, data, len) ;
+    return hashUpdate((wolfssl_TI_Hash *)sha256, data, len);
 }
 
 WOLFSSL_API int wc_Sha256Final(Sha256* sha256, byte* hash)
 {
-    return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+    return hashFinal((wolfssl_TI_Hash *)sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha256GetHash(Sha256* sha256, byte* hash)
 {
-    return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
-}
-
-WOLFSSL_API void wc_Sha256RestorePos(Sha256* s1, Sha256* s2) {
-	hashRestorePos((wolfssl_TI_Hash *)s1, (wolfssl_TI_Hash *)s2) ;
+    return hashGetHash(sha256, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
 }
 
 WOLFSSL_API int wc_Sha256Hash(const byte* data, word32 len, byte*hash)
 {
-    return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE) ;
+    return hashHash(data, len, hash, SHAMD5_ALGO_SHA256, SHA256_DIGEST_SIZE);
 }
 
 WOLFSSL_API void wc_Sha256Free(Sha256* sha256)
 {
-    hashFree((wolfssl_TI_Hash *)sha256) ;
+    hashFree((wolfssl_TI_Hash *)sha256);
 }
 
-#endif
+#endif /* !NO_SHA256 */
 
 #endif
diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c
index adc0add17..9b0871f52 100644
--- a/wolfcrypt/src/random.c
+++ b/wolfcrypt/src/random.c
@@ -40,19 +40,26 @@ int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz)
     return GenerateSeed(os, seed, sz);
 }
 
-int  wc_InitRng(WC_RNG* rng)
+int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
+{
+    (void)heap;
+    (void)devId;
+    return InitRng_fips(rng);
+}
+
+int wc_InitRng(WC_RNG* rng)
 {
     return InitRng_fips(rng);
 }
 
 
-int  wc_RNG_GenerateBlock(WC_RNG* rng, byte* b, word32 sz)
+int wc_RNG_GenerateBlock(WC_RNG* rng, byte* b, word32 sz)
 {
     return RNG_GenerateBlock_fips(rng, b, sz);
 }
 
 
-int  wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
+int wc_RNG_GenerateByte(WC_RNG* rng, byte* b)
 {
     return RNG_GenerateByte(rng, b);
 }
@@ -186,6 +193,10 @@ typedef struct DRBG {
     word32 lastBlock;
     byte V[DRBG_SEED_LEN];
     byte C[DRBG_SEED_LEN];
+#ifdef WOLFSSL_ASYNC_CRYPT
+    void* heap;
+    int devId;
+#endif
     byte   matchCount;
 } DRBG;
 
@@ -198,58 +209,73 @@ static int Hash_df(DRBG* drbg, byte* out, word32 outSz, byte type,
                                                   const byte* inA, word32 inASz,
                                                   const byte* inB, word32 inBSz)
 {
+    int ret;
     byte ctr;
     int i;
     int len;
     word32 bits = (outSz * 8); /* reverse byte order */
     Sha256 sha;
-    byte digest[SHA256_DIGEST_SIZE];
+    DECLARE_VAR(digest, byte, SHA256_DIGEST_SIZE, drbg->heap);
 
     (void)drbg;
-    #ifdef LITTLE_ENDIAN_ORDER
-        bits = ByteReverseWord32(bits);
-    #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    if (digest == NULL)
+        return DRBG_FAILURE;
+#endif
+
+#ifdef LITTLE_ENDIAN_ORDER
+    bits = ByteReverseWord32(bits);
+#endif
     len = (outSz / OUTPUT_BLOCK_LEN)
         + ((outSz % OUTPUT_BLOCK_LEN) ? 1 : 0);
 
-    for (i = 0, ctr = 1; i < len; i++, ctr++)
-    {
-        if (wc_InitSha256(&sha) != 0)
-            return DRBG_FAILURE;
+    for (i = 0, ctr = 1; i < len; i++, ctr++) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wc_InitSha256_ex(&sha, drbg->heap, drbg->devId);
+    #else
+        ret = wc_InitSha256(&sha);
+    #endif
+        if (ret != 0)
+            break;
 
-        if (wc_Sha256Update(&sha, &ctr, sizeof(ctr)) != 0)
-            return DRBG_FAILURE;
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, &ctr, sizeof(ctr));
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, (byte*)&bits, sizeof(bits));
 
-        if (wc_Sha256Update(&sha, (byte*)&bits, sizeof(bits)) != 0)
-            return DRBG_FAILURE;
-
-        /* churning V is the only string that doesn't have the type added */
-        if (type != drbgInitV)
-            if (wc_Sha256Update(&sha, &type, sizeof(type)) != 0)
-                return DRBG_FAILURE;
-
-        if (wc_Sha256Update(&sha, inA, inASz) != 0)
-            return DRBG_FAILURE;
-
-        if (inB != NULL && inBSz > 0)
-            if (wc_Sha256Update(&sha, inB, inBSz) != 0)
-                return DRBG_FAILURE;
-
-        if (wc_Sha256Final(&sha, digest) != 0)
-            return DRBG_FAILURE;
-
-        if (outSz > OUTPUT_BLOCK_LEN) {
-            XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
-            outSz -= OUTPUT_BLOCK_LEN;
-            out += OUTPUT_BLOCK_LEN;
+        if (ret == 0) {
+            /* churning V is the only string that doesn't have the type added */
+            if (type != drbgInitV)
+                ret = wc_Sha256Update(&sha, &type, sizeof(type));
         }
-        else {
-            XMEMCPY(out, digest, outSz);
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, inA, inASz);
+        if (ret == 0) {
+            if (inB != NULL && inBSz > 0)
+                ret = wc_Sha256Update(&sha, inB, inBSz);
+        }
+        if (ret == 0)
+            ret = wc_Sha256Final(&sha, digest);
+
+        if (ret == 0) {
+            wc_Sha256Free(&sha);
+
+            if (outSz > OUTPUT_BLOCK_LEN) {
+                XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
+                outSz -= OUTPUT_BLOCK_LEN;
+                out += OUTPUT_BLOCK_LEN;
+            }
+            else {
+                XMEMCPY(out, digest, outSz);
+            }
         }
     }
-    ForceZero(digest, sizeof(digest));
 
-    return DRBG_SUCCESS;
+    ForceZero(digest, SHA256_DIGEST_SIZE);
+
+    FREE_VAR(digest, drbg->heap);
+
+    return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
 
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
@@ -290,12 +316,13 @@ static INLINE void array_add_one(byte* data, word32 dataSz)
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
 static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V)
 {
+    int ret = DRBG_FAILURE;
     byte data[DRBG_SEED_LEN];
     int i;
     int len;
     word32 checkBlock;
     Sha256 sha;
-    byte digest[SHA256_DIGEST_SIZE];
+    DECLARE_VAR(digest, byte, SHA256_DIGEST_SIZE, drbg->heap);
 
     /* Special case: outSz is 0 and out is NULL. wc_Generate a block to save for
      * the continuous test. */
@@ -306,46 +333,55 @@ static int Hash_gen(DRBG* drbg, byte* out, word32 outSz, const byte* V)
 
     XMEMCPY(data, V, sizeof(data));
     for (i = 0; i < len; i++) {
-        if (wc_InitSha256(&sha) != 0 ||
-            wc_Sha256Update(&sha, data, sizeof(data)) != 0 ||
-            wc_Sha256Final(&sha, digest) != 0) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wc_InitSha256_ex(&sha, drbg->heap, drbg->devId);
+    #else
+        ret = wc_InitSha256(&sha);
+    #endif
+        if (ret == 0)
+            ret = wc_Sha256Update(&sha, data, sizeof(data));
+        if (ret == 0)
+            ret = wc_Sha256Final(&sha, digest);
+        if (ret == 0)
+            wc_Sha256Free(&sha);
 
-            return DRBG_FAILURE;
-        }
-
-        XMEMCPY(&checkBlock, digest, sizeof(word32));
-        if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) {
-            if (drbg->matchCount == 1) {
-                return DRBG_CONT_FAILURE;
+        if (ret == 0) {
+            XMEMCPY(&checkBlock, digest, sizeof(word32));
+            if (drbg->reseedCtr > 1 && checkBlock == drbg->lastBlock) {
+                if (drbg->matchCount == 1) {
+                    return DRBG_CONT_FAILURE;
+                }
+                else {
+                    if (i == len) {
+                        len++;
+                    }
+                    drbg->matchCount = 1;
+                }
             }
             else {
-                if (i == len) {
-                    len++;
-                }
-                drbg->matchCount = 1;
+                drbg->matchCount = 0;
+                drbg->lastBlock = checkBlock;
             }
-        }
-        else {
-            drbg->matchCount = 0;
-            drbg->lastBlock = checkBlock;
-        }
 
-        if (out != NULL) {
-            if (outSz >= OUTPUT_BLOCK_LEN) {
-                XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
-                outSz -= OUTPUT_BLOCK_LEN;
-                out += OUTPUT_BLOCK_LEN;
-                array_add_one(data, DRBG_SEED_LEN);
-            }
-            else if (out != NULL && outSz != 0) {
-                XMEMCPY(out, digest, outSz);
-                outSz = 0;
+            if (out != NULL) {
+                if (outSz >= OUTPUT_BLOCK_LEN) {
+                    XMEMCPY(out, digest, OUTPUT_BLOCK_LEN);
+                    outSz -= OUTPUT_BLOCK_LEN;
+                    out += OUTPUT_BLOCK_LEN;
+                    array_add_one(data, DRBG_SEED_LEN);
+                }
+                else if (out != NULL && outSz != 0) {
+                    XMEMCPY(out, digest, outSz);
+                    outSz = 0;
+                }
             }
         }
     }
     ForceZero(data, sizeof(data));
 
-    return DRBG_SUCCESS;
+    FREE_VAR(digest, drbg->heap);
+
+    return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
 
 static INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen)
@@ -375,7 +411,7 @@ static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
 {
     int ret = DRBG_NEED_RESEED;
     Sha256 sha;
-    byte digest[SHA256_DIGEST_SIZE];
+    DECLARE_VAR(digest, byte, SHA256_DIGEST_SIZE, drbg->heap);
 
     if (drbg->reseedCtr != RESEED_INTERVAL) {
         byte type = drbgGenerateH;
@@ -383,19 +419,26 @@ static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
 
         ret = Hash_gen(drbg, out, outSz, drbg->V);
         if (ret == DRBG_SUCCESS) {
-            if (wc_InitSha256(&sha) != 0 ||
-                wc_Sha256Update(&sha, &type, sizeof(type)) != 0 ||
-                wc_Sha256Update(&sha, drbg->V, sizeof(drbg->V)) != 0 ||
-                wc_Sha256Final(&sha, digest) != 0) {
+        #ifdef WOLFSSL_ASYNC_CRYPT
+            ret = wc_InitSha256_ex(&sha, drbg->heap, drbg->devId);
+        #else
+            ret = wc_InitSha256(&sha);
+        #endif
+            if (ret == 0)
+                ret = wc_Sha256Update(&sha, &type, sizeof(type));
+            if (ret == 0)
+                ret = wc_Sha256Update(&sha, drbg->V, sizeof(drbg->V));
+            if (ret == 0)
+                ret = wc_Sha256Final(&sha, digest);
+            if (ret == 0)
+                wc_Sha256Free(&sha);
 
-                ret = DRBG_FAILURE;
-            }
-            else {
-                array_add(drbg->V, sizeof(drbg->V), digest, sizeof(digest));
+            if (ret == 0) {
+                array_add(drbg->V, sizeof(drbg->V), digest, SHA256_DIGEST_SIZE);
                 array_add(drbg->V, sizeof(drbg->V), drbg->C, sizeof(drbg->C));
-                #ifdef LITTLE_ENDIAN_ORDER
-                    reseedCtr = ByteReverseWord32(reseedCtr);
-                #endif
+            #ifdef LITTLE_ENDIAN_ORDER
+                reseedCtr = ByteReverseWord32(reseedCtr);
+            #endif
                 array_add(drbg->V, sizeof(drbg->V),
                                           (byte*)&reseedCtr, sizeof(reseedCtr));
                 ret = DRBG_SUCCESS;
@@ -403,18 +446,28 @@ static int Hash_DRBG_Generate(DRBG* drbg, byte* out, word32 outSz)
             drbg->reseedCtr++;
         }
     }
-    ForceZero(digest, sizeof(digest));
+    ForceZero(digest, SHA256_DIGEST_SIZE);
 
-    return ret;
+    FREE_VAR(digest, drbg->heap);
+
+    return (ret == 0) ? DRBG_SUCCESS : DRBG_FAILURE;
 }
 
 /* Returns: DRBG_SUCCESS or DRBG_FAILURE */
 static int Hash_DRBG_Instantiate(DRBG* drbg, const byte* seed, word32 seedSz,
-                                             const byte* nonce, word32 nonceSz)
+                                             const byte* nonce, word32 nonceSz,
+                                             void* heap, int devId)
 {
     int ret = DRBG_FAILURE;
 
     XMEMSET(drbg, 0, sizeof(DRBG));
+#ifdef WOLFSSL_ASYNC_CRYPT
+    drbg->heap = heap;
+    drbg->devId = devId;
+#else
+    (void)heap;
+    (void)devId;
+#endif
 
     if (Hash_df(drbg, drbg->V, sizeof(drbg->V), drbgInitV, seed, seedSz,
                                               nonce, nonceSz) == DRBG_SUCCESS &&
@@ -448,7 +501,7 @@ static int Hash_DRBG_Uninstantiate(DRBG* drbg)
 /* End NIST DRBG Code */
 
 
-int wc_InitRng_ex(WC_RNG* rng, void* heap)
+int wc_InitRng_ex(WC_RNG* rng, void* heap, int devId)
 {
     int ret = RNG_FAILURE_E;
 
@@ -461,6 +514,11 @@ int wc_InitRng_ex(WC_RNG* rng, void* heap)
 #else
     rng->heap = heap;
 #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    rng->devId = devId;
+#else
+    (void)devId;
+#endif
 
 #ifdef HAVE_HASHDRBG
     /* init the DBRG to known values */
@@ -475,7 +533,7 @@ int wc_InitRng_ex(WC_RNG* rng, void* heap)
 
     /* configure async RNG source if available */
 #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    ret = wolfAsync_DevCtxInit(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG, INVALID_DEVID);
+    ret = wolfAsync_DevCtxInit(&rng->asyncDev, WOLFSSL_ASYNC_MARKER_RNG, devId);
     if (ret != 0)
         return ret;
 #endif
@@ -489,7 +547,7 @@ int wc_InitRng_ex(WC_RNG* rng, void* heap)
 
 #ifdef HAVE_HASHDRBG
     if (wc_RNG_HealthTestLocal(0) == 0) {
-        byte entropy[ENTROPY_NONCE_SZ];
+        DECLARE_VAR(entropy, byte, ENTROPY_NONCE_SZ, rng->heap);
 
         rng->drbg =
                 (struct DRBG*)XMALLOC(sizeof(DRBG), rng->heap,
@@ -500,17 +558,16 @@ int wc_InitRng_ex(WC_RNG* rng, void* heap)
         /* This doesn't use a separate nonce. The entropy input will be
          * the default size plus the size of the nonce making the seed
          * size. */
-        else if (wc_GenerateSeed(&rng->seed,
-                                          entropy, ENTROPY_NONCE_SZ) == 0 &&
-                 Hash_DRBG_Instantiate(rng->drbg,
-                      entropy, ENTROPY_NONCE_SZ, NULL, 0) == DRBG_SUCCESS) {
-
+        else if (wc_GenerateSeed(&rng->seed, entropy, ENTROPY_NONCE_SZ) == 0 &&
+                 Hash_DRBG_Instantiate(rng->drbg, entropy, ENTROPY_NONCE_SZ,
+                                   NULL, 0, rng->heap, devId) == DRBG_SUCCESS) {
             ret = Hash_DRBG_Generate(rng->drbg, NULL, 0);
         }
         else
             ret = DRBG_FAILURE;
 
         ForceZero(entropy, ENTROPY_NONCE_SZ);
+        FREE_VAR(entropy, rng->heap);
     }
     else
         ret = DRBG_CONT_FAILURE;
@@ -537,7 +594,7 @@ int wc_InitRng_ex(WC_RNG* rng, void* heap)
 
 int wc_InitRng(WC_RNG* rng)
 {
-    return wc_InitRng_ex(rng, NULL);
+    return wc_InitRng_ex(rng, NULL, INVALID_DEVID);
 }
 
 
@@ -680,7 +737,8 @@ int wc_RNG_HealthTest(int reseed, const byte* entropyA, word32 entropyASz,
     drbg = &drbg_var;
 #endif
 
-    if (Hash_DRBG_Instantiate(drbg, entropyA, entropyASz, NULL, 0) != 0) {
+    if (Hash_DRBG_Instantiate(drbg, entropyA, entropyASz, NULL, 0, NULL,
+                                                    INVALID_DEVID) != 0) {
         goto exit_rng_ht;
     }
 
@@ -961,7 +1019,12 @@ static void wc_InitRng_IntelRD(void) {
     }
 }
 
-#define INTELRD_RETRY 32
+#ifdef WOLFSSL_ASYNC_CRYPT
+    /* need more retries if multiple cores */
+    #define INTELRD_RETRY (32 * 8)
+#else
+    #define INTELRD_RETRY 32
+#endif
 
 #ifdef HAVE_INTEL_RDSEED
 
diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c
old mode 100644
new mode 100755
index 274bcc4be..ddf230484
--- a/wolfcrypt/src/rsa.c
+++ b/wolfcrypt/src/rsa.c
@@ -148,11 +148,6 @@ int wc_RsaFlattenPublicKey(RsaKey* key, byte* a, word32* aSz, byte* b,
 #define ERROR_OUT(x) { ret = (x); goto done;}
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    static int InitAsyncRsaKey(RsaKey* key);
-    static int FreeAsyncRsaKey(RsaKey* key);
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 enum {
     RSA_STATE_NONE = 0,
 
@@ -167,18 +162,18 @@ enum {
 
 static void wc_RsaCleanup(RsaKey* key)
 {
-    if (key && key->tmp) {
+    if (key && key->data) {
         /* make sure any allocated memory is free'd */
-        if (key->tmpIsAlloc) {
+        if (key->dataIsAlloc) {
             if (key->type == RSA_PRIVATE_DECRYPT ||
                 key->type == RSA_PRIVATE_ENCRYPT) {
-                ForceZero(key->tmp, key->tmpLen);
+                ForceZero(key->data, key->dataLen);
             }
-            XFREE(key->tmp, key->heap, DYNAMIC_TYPE_RSA);
-            key->tmpIsAlloc = 0;
+            XFREE(key->data, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+            key->dataIsAlloc = 0;
         }
-        key->tmp = NULL;
-        key->tmpLen = 0;
+        key->data = NULL;
+        key->dataLen = 0;
     }
 }
 
@@ -190,39 +185,35 @@ int wc_InitRsaKey_ex(RsaKey* key, void* heap, int devId)
         return BAD_FUNC_ARG;
     }
 
-    (void)devId;
-
     key->type = RSA_TYPE_UNKNOWN;
     key->state = RSA_STATE_NONE;
     key->heap = heap;
-    key->tmp = NULL;
-    key->tmpLen = 0;
-    key->tmpIsAlloc = 0;
+    key->data = NULL;
+    key->dataLen = 0;
+    key->dataIsAlloc = 0;
 #ifdef WC_RSA_BLINDING
     key->rng = NULL;
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (devId != INVALID_DEVID) {
-        /* handle as async */
-        ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA,
-                                                                        devId);
-        if (ret == 0) {
-            ret = InitAsyncRsaKey(key);
-        }
-    }
-    else
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    /* handle as async */
+    ret = wolfAsync_DevCtxInit(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA,
+                                                            key->heap, devId);
+    #ifdef WOLFSSL_CERT_GEN
+        XMEMSET(&key->certSignCtx, 0, sizeof(CertSignCtx));
+    #endif
+#else
+    (void)devId;
 #endif
-    {
-        mp_init(&key->n);
-        mp_init(&key->e);
-        mp_init(&key->d);
-        mp_init(&key->p);
-        mp_init(&key->q);
-        mp_init(&key->dP);
-        mp_init(&key->dQ);
-        mp_init(&key->u);
-    }
+
+    mp_init(&key->n);
+    mp_init(&key->e);
+    mp_init(&key->d);
+    mp_init(&key->p);
+    mp_init(&key->q);
+    mp_init(&key->dP);
+    mp_init(&key->dQ);
+    mp_init(&key->u);
 
     return ret;
 }
@@ -242,36 +233,29 @@ int wc_FreeRsaKey(RsaKey* key)
 
     wc_RsaCleanup(key);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        ret = FreeAsyncRsaKey(key);
-        wolfAsync_DevCtxFree(&key->asyncDev);
-    }
-    else
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    wolfAsync_DevCtxFree(&key->asyncDev, WOLFSSL_ASYNC_MARKER_RSA);
 #endif
-    {
-        if (key->type == RSA_PRIVATE) {
-            mp_forcezero(&key->u);
-            mp_forcezero(&key->dQ);
-            mp_forcezero(&key->dP);
-            mp_forcezero(&key->q);
-            mp_forcezero(&key->p);
-            mp_forcezero(&key->d);
-        }
-    #ifndef USE_FAST_MATH
-        /* private part */
-        mp_clear(&key->u);
-        mp_clear(&key->dQ);
-        mp_clear(&key->dP);
-        mp_clear(&key->q);
-        mp_clear(&key->p);
-        mp_clear(&key->d);
 
-        /* public part */
-        mp_clear(&key->e);
-        mp_clear(&key->n);
-    #endif
+    if (key->type == RSA_PRIVATE) {
+        mp_forcezero(&key->u);
+        mp_forcezero(&key->dQ);
+        mp_forcezero(&key->dP);
+        mp_forcezero(&key->q);
+        mp_forcezero(&key->p);
+        mp_forcezero(&key->d);
     }
+    /* private part */
+    mp_clear(&key->u);
+    mp_clear(&key->dQ);
+    mp_clear(&key->dP);
+    mp_clear(&key->q);
+    mp_clear(&key->p);
+    mp_clear(&key->d);
+
+    /* public part */
+    mp_clear(&key->e);
+    mp_clear(&key->n);
 
     return ret;
 }
@@ -656,7 +640,7 @@ static int wc_RsaPad_ex(const byte* input, word32 inputLen, byte* pkcsBlock,
 
     #ifndef WC_NO_RSA_OAEP
         case WC_RSA_OAEP_PAD:
-            //WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
+            WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
             ret = RsaPad_OAEP(input, inputLen, pkcsBlock, pkcsBlockLen,
                          padValue, rng, hType, mgf, optLabel, labelLen, heap);
             break;
@@ -795,7 +779,7 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen,
         while (i maxOutputLen) || invalid;
 
     if (invalid) {
-        WOLFSSL_MSG("RsaUnPad error, bad formatting");
+        WOLFSSL_MSG("RsaUnPad error, invalid formatting");
         return RSA_PAD_E;
     }
 
@@ -819,16 +803,15 @@ static int wc_RsaUnPad_ex(byte* pkcsBlock, word32 pkcsBlockLen, byte** out,
 {
     int ret;
 
-    switch (padType)
-    {
+    switch (padType) {
         case WC_RSA_PKCSV15_PAD:
-            WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 padding");
+            //WOLFSSL_MSG("wolfSSL Using RSA PKCSV15 un-padding");
             ret = RsaUnPad(pkcsBlock, pkcsBlockLen, out, padValue);
             break;
 
     #ifndef WC_NO_RSA_OAEP
         case WC_RSA_OAEP_PAD:
-            WOLFSSL_MSG("wolfSSL Using RSA OAEP padding");
+            WOLFSSL_MSG("wolfSSL Using RSA OAEP un-padding");
             ret = RsaUnPad_OAEP((byte*)pkcsBlock, pkcsBlockLen, out,
                                         hType, mgf, optLabel, labelLen, heap);
             break;
@@ -884,7 +867,7 @@ static int wc_RsaFunctionSync(const byte* in, word32 inLen, byte* out,
         /* blind */
         ret = mp_rand(&rnd, get_digit_count(&key->n), rng);
         if (ret != MP_OKAY)
-            ERROR_OUT(ret);
+            goto done;
 
         /* rndi = 1/rnd mod n */
         if (mp_invmod(&rnd, &key->n, &rndi) != MP_OKAY)
@@ -1000,9 +983,6 @@ done:
         mp_clear(&rnd);
     }
 #endif
-    if (ret == MP_EXPTMOD_E) {
-        WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
-    }
     return ret;
 }
 
@@ -1012,8 +992,10 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
 {
     int ret = 0;
 
+    (void)rng;
+
 #ifdef WOLFSSL_ASYNC_CRYPT_TEST
-    AsyncCryptTestDev* testDev = &key->asyncDev.dev;
+    WC_ASYNC_TEST* testDev = &key->asyncDev.test;
     if (testDev->type == ASYNC_TEST_NONE) {
         testDev->type = ASYNC_TEST_RSA_FUNC;
         testDev->rsaFunc.in = in;
@@ -1031,11 +1013,22 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
     case RSA_PRIVATE_DECRYPT:
     case RSA_PRIVATE_ENCRYPT:
     #ifdef HAVE_CAVIUM
-        ret = NitroxRsaExptMod(in, inLen, key->d.dpraw, key->d.used,
-                               key->n.dpraw, key->n.used, out, outLen, key);
+        ret = NitroxRsaExptMod(in, inLen,
+                               key->d.raw.buf, key->d.raw.len,
+                               key->n.raw.buf, key->n.raw.len,
+                               out, outLen, key);
     #elif defined(HAVE_INTEL_QA)
-        /* TODO: Add support for Intel Quick Assist */
-        ret = -1;
+        #ifdef RSA_LOW_MEM
+            ret = IntelQaRsaPrivate(&key->asyncDev, in, inLen,
+                                    &key->d.raw, &key->n.raw,
+                                    out, outLen);
+        #else
+            ret = IntelQaRsaCrtPrivate(&key->asyncDev, in, inLen,
+                                &key->p.raw, &key->q.raw,
+                                &key->dP.raw, &key->dQ.raw,
+                                &key->u.raw,
+                                out, outLen);
+        #endif
     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     #endif
@@ -1044,11 +1037,14 @@ static int wc_RsaFunctionAsync(const byte* in, word32 inLen, byte* out,
     case RSA_PUBLIC_ENCRYPT:
     case RSA_PUBLIC_DECRYPT:
     #ifdef HAVE_CAVIUM
-        ret = NitroxRsaExptMod(in, inLen, key->e.dpraw, key->e.used,
-                               key->n.dpraw, key->n.used, out, outLen, key);
+        ret = NitroxRsaExptMod(in, inLen,
+                               key->e.raw.buf, key->e.raw.len,
+                               key->n.raw.buf, key->n.raw.len,
+                               out, outLen, key);
     #elif defined(HAVE_INTEL_QA)
-        /* TODO: Add support for Intel Quick Assist */
-        ret = -1;
+        ret = IntelQaRsaPublic(&key->asyncDev, in, inLen,
+                               &key->e.raw, &key->n.raw,
+                               out, outLen);
     #else /* WOLFSSL_ASYNC_CRYPT_TEST */
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     #endif
@@ -1072,8 +1068,9 @@ int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
         return BAD_FUNC_ARG;
     }
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA &&
+                                                        key->n.raw.len > 0) {
         ret = wc_RsaFunctionAsync(in, inLen, out, outLen, type, key, rng);
     }
     else
@@ -1082,10 +1079,17 @@ int wc_RsaFunction(const byte* in, word32 inLen, byte* out,
         ret = wc_RsaFunctionSync(in, inLen, out, outLen, type, key, rng);
     }
 
-    if (ret == MP_EXPTMOD_E) {
-        /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */
-        WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
+    /* handle error */
+    if (ret < 0 && ret != WC_PENDING_E) {
+        if (ret == MP_EXPTMOD_E) {
+            /* This can happen due to incorrectly set FP_MAX_BITS or missing XREALLOC */
+            WOLFSSL_MSG("RSA_FUNCTION MP_EXPTMOD_E: memory/config problem");
+        }
+
+        key->state = RSA_STATE_NONE;
+        wc_RsaCleanup(key);
     }
+
     return ret;
 }
 
@@ -1112,10 +1116,10 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
                             enum wc_HashType hash, int mgf,
                             byte* label, word32 labelSz, WC_RNG* rng)
 {
-    int ret = BAD_FUNC_ARG, sz;
+    int ret = RSA_WRONG_TYPE_E, sz;
 
     if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
-        return ret;
+        return BAD_FUNC_ARG;
     }
 
     sz = wc_RsaEncryptSize(key);
@@ -1134,42 +1138,47 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
     switch (key->state) {
     case RSA_STATE_NONE:
     case RSA_STATE_ENCRYPT_PAD:
+        key->state = RSA_STATE_ENCRYPT_PAD;
 
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-        if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-            if (rsa_type == RSA_PUBLIC_ENCRYPT && pad_value == RSA_BLOCK_TYPE_2) {
-                key->state = RSA_STATE_ENCRYPT_RES;
-                key->tmpLen = key->n.used;
-                return NitroxRsaPublicEncrypt(in, inLen, out, outLen, key);
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+            defined(HAVE_CAVIUM)
+        if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA && key->n.raw.buf) {
+            /* Async operations that include padding */
+            if (rsa_type == RSA_PUBLIC_ENCRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_2) {
+                key->state = RSA_STATE_ENCRYPT_EXPTMOD;
+                key->dataLen = key->n.raw.len;
+                ret = NitroxRsaPublicEncrypt(in, inLen, out, outLen, key);
+                break;
             }
-            else if (rsa_type == RSA_PRIVATE_ENCRYPT && pad_value == RSA_BLOCK_TYPE_1) {
-                key->state = RSA_STATE_ENCRYPT_RES;
-                key->tmpLen = key->n.used;
-                return NitroxRsaSSL_Sign(in, inLen, out, outLen, key);
+            else if (rsa_type == RSA_PRIVATE_ENCRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_1) {
+                key->state = RSA_STATE_ENCRYPT_EXPTMOD;
+                key->dataLen = key->n.raw.len;
+                ret = NitroxRsaSSL_Sign(in, inLen, out, outLen, key);
+                break;
             }
         }
     #endif
 
-        key->state = RSA_STATE_ENCRYPT_EXPTMOD;
-
-        ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng,
-                               pad_type, hash, mgf, label, labelSz, key->heap);
+        ret = wc_RsaPad_ex(in, inLen, out, sz, pad_value, rng, pad_type, hash,
+                                                mgf, label, labelSz, key->heap);
         if (ret < 0) {
             break;
         }
         /* fall through */
     case RSA_STATE_ENCRYPT_EXPTMOD:
-        key->state = RSA_STATE_ENCRYPT_RES;
+        key->state = RSA_STATE_ENCRYPT_EXPTMOD;
 
-        key->tmpLen = outLen;
-        ret = wc_RsaFunction(out, sz, out, &key->tmpLen, rsa_type, key, rng);
+        key->dataLen = outLen;
+        ret = wc_RsaFunction(out, sz, out, &key->dataLen, rsa_type, key, rng);
         if (ret < 0) {
             break;
         }
         /* fall through */
     case RSA_STATE_ENCRYPT_RES:
-        key->state = RSA_STATE_NONE;
-        ret = key->tmpLen;
+        key->state = RSA_STATE_ENCRYPT_RES;
+        ret = key->dataLen;
         break;
 
     default:
@@ -1178,10 +1187,12 @@ static int RsaPublicEncryptEx(const byte* in, word32 inLen, byte* out,
 
     /* if async pending then return and skip done cleanup below */
     if (ret == WC_PENDING_E) {
+        key->state++;
         return ret;
     }
 
     key->state = RSA_STATE_NONE;
+    wc_RsaCleanup(key);
 
     return ret;
 }
@@ -1207,58 +1218,63 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
                             enum wc_HashType hash, int mgf,
                             byte* label, word32 labelSz, WC_RNG* rng)
 {
-    int ret = BAD_FUNC_ARG;
+    int ret = RSA_WRONG_TYPE_E;
 
     if (in == NULL || inLen == 0 || out == NULL || key == NULL) {
-        return ret;
+        return BAD_FUNC_ARG;
     }
 
     switch (key->state) {
     case RSA_STATE_NONE:
     case RSA_STATE_DECRYPT_EXPTMOD:
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+        key->state = RSA_STATE_DECRYPT_EXPTMOD;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+            defined(HAVE_CAVIUM)
+        /* Async operations that include padding */
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-            key->tmpLen = 0;
-            if (rsa_type == RSA_PRIVATE_DECRYPT && pad_value == RSA_BLOCK_TYPE_2) {
-                key->state = RSA_STATE_DECRYPT_RES;
-                key->tmp = NULL;
+            key->dataLen = 0;
+            if (rsa_type == RSA_PRIVATE_DECRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_2) {
+                key->state = RSA_STATE_DECRYPT_UNPAD;
+                key->data = NULL;
                 ret = NitroxRsaPrivateDecrypt(in, inLen, out, outLen, key);
                 if (ret > 0) {
                     if (outPtr)
                         *outPtr = in;
                 }
-                return ret;
+                break;
             }
-            else if (rsa_type == RSA_PUBLIC_DECRYPT && pad_value == RSA_BLOCK_TYPE_1) {
-                key->state = RSA_STATE_DECRYPT_RES;
-                key->tmp = NULL;
-                return NitroxRsaSSL_Verify(in, inLen, out, outLen, key);
+            else if (rsa_type == RSA_PUBLIC_DECRYPT &&
+                                                pad_value == RSA_BLOCK_TYPE_1) {
+                key->state = RSA_STATE_DECRYPT_UNPAD;
+                key->data = NULL;
+                ret = NitroxRsaSSL_Verify(in, inLen, out, outLen, key);
+                break;
             }
         }
     #endif
 
-        key->state = RSA_STATE_DECRYPT_UNPAD;
-
         /* verify the tmp ptr is NULL, otherwise indicates bad state */
-        if (key->tmp != NULL) {
+        if (key->data != NULL) {
             ERROR_OUT(BAD_STATE_E);
         }
 
         /* if not doing this inline then allocate a buffer for it */
-        key->tmpLen = inLen;
+        key->dataLen = inLen;
         if (outPtr == NULL) {
-            key->tmp = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_RSA);
-            key->tmpIsAlloc = 1;
-            if (key->tmp == NULL) {
+            key->data = (byte*)XMALLOC(inLen, key->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+            key->dataIsAlloc = 1;
+            if (key->data == NULL) {
                 ERROR_OUT(MEMORY_E);
             }
-            XMEMCPY(key->tmp, in, inLen);
+            XMEMCPY(key->data, in, inLen);
         }
         else {
-            key->tmp = out;
+            key->data = out;
         }
-        ret = wc_RsaFunction(key->tmp, inLen, key->tmp, &key->tmpLen,
-                                                        rsa_type, key, rng);
+        ret = wc_RsaFunction(key->data, inLen, key->data, &key->dataLen, rsa_type,
+                                                                      key, rng);
         if (ret < 0) {
             break;
         }
@@ -1266,9 +1282,9 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
     case RSA_STATE_DECRYPT_UNPAD:
     {
         byte* pad = NULL;
-        key->state = RSA_STATE_DECRYPT_RES;
-        ret = wc_RsaUnPad_ex(key->tmp, key->tmpLen, &pad, pad_value, pad_type,
-                                        hash, mgf, label, labelSz, key->heap);
+        key->state = RSA_STATE_DECRYPT_UNPAD;
+        ret = wc_RsaUnPad_ex(key->data, key->dataLen, &pad, pad_value, pad_type,
+                                          hash, mgf, label, labelSz, key->heap);
         if (ret > 0 && ret <= (int)outLen && pad != NULL) {
             /* only copy output if not inline */
             if (outPtr == NULL) {
@@ -1287,10 +1303,12 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
         /* fall through */
     }
     case RSA_STATE_DECRYPT_RES:
-        key->state = RSA_STATE_NONE;
-    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
+        key->state = RSA_STATE_DECRYPT_RES;
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA) && \
+            defined(HAVE_CAVIUM)
         if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-            ret = key->tmpLen;
+            /* return event ret */
+            ret = key->asyncDev.event.ret;
         }
     #endif
         break;
@@ -1298,13 +1316,14 @@ static int RsaPrivateDecryptEx(byte* in, word32 inLen, byte* out,
         ret = BAD_STATE_E;
     }
 
+done:
+
     /* if async pending then return and skip done cleanup below */
     if (ret == WC_PENDING_E) {
+        key->state++;
         return ret;
     }
 
-done:
-
     key->state = RSA_STATE_NONE;
     wc_RsaCleanup(key);
 
@@ -1427,11 +1446,6 @@ int wc_RsaSSL_Sign(const byte* in, word32 inLen, byte* out, word32 outLen,
 
 int wc_RsaEncryptSize(RsaKey* key)
 {
-#if defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_CAVIUM)
-    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
-        return key->n.used;
-    }
-#endif
     return mp_unsigned_bin_size(&key->n);
 }
 
@@ -1481,6 +1495,26 @@ int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng)
     if (e < 3 || (e & 1) == 0)
         return BAD_FUNC_ARG;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_RSA)
+    if (key->asyncDev.marker == WOLFSSL_ASYNC_MARKER_RSA) {
+    #ifdef HAVE_CAVIUM
+        /* TODO: Not implemented */
+    #elif defined(HAVE_INTEL_QA)
+        /* TODO: Not implemented */
+    #else
+        WC_ASYNC_TEST* testDev = &key->asyncDev.test;
+        if (testDev->type == ASYNC_TEST_NONE) {
+            testDev->type = ASYNC_TEST_RSA_MAKE;
+            testDev->rsaMake.rng = rng;
+            testDev->rsaMake.key = key;
+            testDev->rsaMake.size = size;
+            testDev->rsaMake.e = e;
+            return WC_PENDING_E;
+        }
+    #endif
+    }
+#endif
+
     if ((err = mp_init_multi(&p, &q, &tmp1, &tmp2, &tmp3, NULL)) != MP_OKAY)
         return err;
 
@@ -1589,116 +1623,6 @@ int wc_RsaSetRNG(RsaKey* key, WC_RNG* rng)
 #endif /* WC_RSA_BLINDING */
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-int wc_RsaAsyncHandle(RsaKey* key, WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
-{
-    int ret;
-
-    if (key == NULL || queue == NULL || event == NULL) {
-        return BAD_FUNC_ARG;
-    }
-
-    /* make sure this rsa context had "wc_RsaAsyncInit" called on it */
-    if (key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_RSA) {
-        return ASYNC_INIT_E;
-    }
-
-    /* setup the event and push to queue */
-    ret = wolfAsync_EventInit(event, WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-    if (ret == 0) {
-        ret = wolfEventQueue_Push(queue, event);
-    }
-
-    /* check for error (helps with debugging) */
-    if (ret != 0) {
-        WOLFSSL_MSG("wc_RsaAsyncHandle failed");
-    }
-    return ret;
-}
-
-int wc_RsaAsyncWait(int ret, RsaKey* key)
-{
-    if (ret == WC_PENDING_E) {
-        WOLF_EVENT event;
-        XMEMSET(&event, 0, sizeof(event));
-        ret = wolfAsync_EventInit(&event, WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT, &key->asyncDev);
-        if (ret == 0) {
-            ret = wolfAsync_EventWait(&event);
-            if (ret == 0 && event.ret >= 0) {
-                ret = event.ret;
-            }
-        }
-    }
-    return ret;
-}
-
-/* Initialize async RSA key */
-static int InitAsyncRsaKey(RsaKey* key)
-{
-    XMEMSET(&key->n,  0, sizeof(key->n));
-    XMEMSET(&key->e,  0, sizeof(key->e));
-    XMEMSET(&key->d,  0, sizeof(key->d));
-    XMEMSET(&key->p,  0, sizeof(key->p));
-    XMEMSET(&key->q,  0, sizeof(key->q));
-    XMEMSET(&key->dP, 0, sizeof(key->dP));
-    XMEMSET(&key->dQ, 0, sizeof(key->dQ));
-    XMEMSET(&key->u,  0, sizeof(key->u));
-
-    return 0;
-}
-
-/* Free async RSA key */
-static int FreeAsyncRsaKey(RsaKey* key)
-{
-    if (key->type == RSA_PRIVATE) {
-        if (key->d.dpraw) {
-            ForceZero(key->d.dpraw, key->d.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->d.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->p.dpraw) {
-            ForceZero(key->p.dpraw, key->p.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->p.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->q.dpraw) {
-            ForceZero(key->q.dpraw, key->q.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->q.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->dP.dpraw) {
-            ForceZero(key->dP.dpraw, key->dP.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->dP.dpraw, key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->dQ.dpraw) {
-            ForceZero(key->dQ.dpraw, key->dQ.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->dQ.dpraw, key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-        if (key->u.dpraw) {
-            ForceZero(key->u.dpraw, key->u.used);
-        #ifndef USE_FAST_MATH
-            XFREE(key->u.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-        #endif
-        }
-    }
-
-#ifndef USE_FAST_MATH
-    XFREE(key->n.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-    XFREE(key->e.dpraw,  key->heap, DYNAMIC_TYPE_ASYNC_RSA);
-#endif
-
-    return InitAsyncRsaKey(key);  /* reset pointers */
-}
-
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 #undef ERROR_OUT
 
 #endif /* HAVE_FIPS */
diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c
old mode 100644
new mode 100755
index ac1a259e9..582695c34
--- a/wolfcrypt/src/sha.c
+++ b/wolfcrypt/src/sha.c
@@ -26,11 +26,10 @@
 
 #include 
 
-
 #if !defined(NO_SHA)
 
 #include 
-
+#include 
 
 /* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
@@ -38,6 +37,12 @@
 	{
 	    return InitSha_fips(sha);
 	}
+    int wc_InitSha_ex(Sha* sha, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+        return InitSha_fips(sha);
+    }
 
 	int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
 	{
@@ -48,11 +53,21 @@
 	{
 	    return ShaFinal_fips(sha,out);
     }
+    void wc_ShaFree(Sha* sha)
+    {
+        (void)sha;
+        /* Not supported in FIPS */
+    }
 
 #else /* else build without fips */
 
+
+#if defined(WOLFSSL_TI_HASH)
+    /* #include  included by wc_port.c */
+
+#else
+
 #include 
-#include 
 #ifdef NO_INLINE
     #include 
 #else
@@ -61,13 +76,8 @@
 #endif
 
 
-/****************************************/
-/* SHA Hardware Variations */
-/****************************************/
-#if defined(WOLFSSL_TI_HASH)
-    /* #include  included by wc_port.c */
-
-#elif defined(WOLFSSL_PIC32MZ_HASH)
+/* Hardware Acceleration */
+#if defined(WOLFSSL_PIC32MZ_HASH)
     #define USE_SHA_SOFTWARE_IMPL
     #define wc_InitSha   wc_InitSha_sw
     #define wc_ShaUpdate wc_ShaUpdate_sw
@@ -80,7 +90,7 @@
      * library. (See note in README).
      */
 
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         /* STM32 struct notes:
          * sha->buffer  = first 4 bytes used to hold partial block if needed
@@ -193,7 +203,7 @@
 #elif defined(FREESCALE_LTC_SHA)
 
     #include "fsl_ltc.h"
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         LTC_HASH_Init(LTC_BASE, &sha->ctx, kLTC_Sha1, NULL, 0);
         return 0;
@@ -219,7 +229,7 @@
     #define USE_SHA_SOFTWARE_IMPL /* Only for API's, actual transform is here */
     #define XSHATRANSFORM   ShaTransform
 
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         int ret = 0;
         ret = wolfSSL_CryptHwMutexLock();
@@ -251,7 +261,7 @@
     /* Software implementation */
     #define USE_SHA_SOFTWARE_IMPL
 
-    int wc_InitSha(Sha* sha)
+    static int InitSha(Sha* sha)
     {
         int ret = 0;
 
@@ -268,7 +278,7 @@
         return ret;
     }
 
-#endif
+#endif /* End Hardware Acceleration */
 
 
 /* Software implementation */
@@ -385,12 +395,46 @@ static INLINE void AddLength(Sha* sha, word32 len)
         sha->hiLen++;                       /* carry low to high */
 }
 
+int wc_InitSha_ex(Sha* sha, void* heap, int devId)
+{
+    int ret = 0;
+
+    if (sha == NULL)
+        return BAD_FUNC_ARG;
+
+    sha->heap = heap;
+
+    ret = InitSha(sha);
+    if (ret != 0)
+        return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    ret = wolfAsync_DevCtxInit(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA,
+                                                            sha->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
+}
 
 int wc_ShaUpdate(Sha* sha, const byte* data, word32 len)
 {
     /* do block size increments */
     byte* local = (byte*)sha->buffer;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha(&sha->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    /* check that internal buffLen is valid */
+    if (sha->buffLen > SHA_BLOCK_SIZE)
+        return BUFFER_E;
+
     while (len) {
         word32 add = min(len, SHA_BLOCK_SIZE - sha->buffLen);
         XMEMCPY(&local[sha->buffLen], data, add);
@@ -416,6 +460,14 @@ int wc_ShaFinal(Sha* sha, byte* hash)
 {
     byte* local = (byte*)sha->buffer;
 
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    if (sha->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha(&sha->asyncDev, hash, NULL, SHA_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     AddLength(sha, sha->buffLen);  /* before adding pads */
 
     local[sha->buffLen++] = 0x80;  /* add 1 */
@@ -459,10 +511,61 @@ int wc_ShaFinal(Sha* sha, byte* hash)
 #endif
     XMEMCPY(hash, sha->digest, SHA_DIGEST_SIZE);
 
-    return wc_InitSha(sha);  /* reset state */
+    return InitSha(sha); /* reset state */
 }
 
 #endif /* USE_SHA_SOFTWARE_IMPL */
 
+
+int wc_InitSha(Sha* sha)
+{
+    return wc_InitSha_ex(sha, NULL, INVALID_DEVID);
+}
+
+void wc_ShaFree(Sha* sha)
+{
+    if (sha == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA)
+    wolfAsync_DevCtxFree(&sha->asyncDev, WOLFSSL_ASYNC_MARKER_SHA);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
+#endif /* !WOLFSSL_TI_HASH */
 #endif /* HAVE_FIPS */
+
+#ifndef WOLFSSL_TI_HASH
+int wc_ShaGetHash(Sha* sha, byte* hash)
+{
+    int ret;
+    Sha tmpSha;
+
+    if (sha == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_ShaCopy(sha, &tmpSha);
+    if (ret == 0) {
+        ret = wc_ShaFinal(&tmpSha, hash);
+    }
+    return ret;
+}
+
+int wc_ShaCopy(Sha* src, Sha* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+#endif /* !WOLFSSL_TI_HASH */
+
 #endif /* !NO_SHA */
diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c
old mode 100644
new mode 100755
index bf4a3d2b1..948d18514
--- a/wolfcrypt/src/sha256.c
+++ b/wolfcrypt/src/sha256.c
@@ -27,251 +27,47 @@
 #endif
 
 #include 
-#include 
 
 #if !defined(NO_SHA256)
+
+#include 
+#include 
+
+/* fips wrapper calls, user can call direct */
 #ifdef HAVE_FIPS
 
-int wc_InitSha256(Sha256* sha)
-{
-    return InitSha256_fips(sha);
-}
-
-
-int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
-{
-    return Sha256Update_fips(sha, data, len);
-}
-
-
-int wc_Sha256Final(Sha256* sha, byte* out)
-{
-    return Sha256Final_fips(sha, out);
-}
+    int wc_InitSha256(Sha256* sha)
+    {
+        return InitSha256_fips(sha);
+    }
+    int wc_InitSha256_ex(Sha256* sha, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+        return InitSha256_fips(sha);
+    }
+    int wc_Sha256Update(Sha256* sha, const byte* data, word32 len)
+    {
+        return Sha256Update_fips(sha, data, len);
+    }
+    int wc_Sha256Final(Sha256* sha, byte* out)
+    {
+        return Sha256Final_fips(sha, out);
+    }
+    void wc_Sha256Free(Sha256* sha)
+    {
+        (void)sha;
+        /* Not supported in FIPS */
+    }
 
 #else /* else build without fips */
 
-#if !defined(NO_SHA256) && defined(WOLFSSL_TI_HASH)
+
+#if defined(WOLFSSL_TI_HASH)
     /* #include  included by wc_port.c */
 #else
 
-#if !defined (ALIGN32)
-    #if defined (__GNUC__)
-        #define ALIGN32 __attribute__ ( (aligned (32)))
-    #elif defined(_MSC_VER)
-        /* disable align warning, we want alignment ! */
-        #pragma warning(disable: 4324)
-        #define ALIGN32 __declspec (align (32))
-    #else
-        #define ALIGN32
-    #endif
-#endif
-
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define wc_InitSha256   wc_InitSha256_sw
-#define wc_Sha256Update wc_Sha256Update_sw
-#define wc_Sha256Final  wc_Sha256Final_sw
-#endif
-
-#ifdef HAVE_FIPS
-    /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
-    #define FIPS_NO_WRAPPERS
-#endif
-
-#if defined(USE_INTEL_SPEEDUP)
-#define HAVE_INTEL_AVX1
-#define HAVE_INTEL_AVX2
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
-#endif
- 
-
-/*****
-Intel AVX1/AVX2 Macro Control Structure
-
-#define HAVE_INTEL_AVX1
-#define HAVE_INTEL_AVX2
-
-#define HAVE_INTEL_RORX
-
-
-int InitSha256(Sha256* sha256) { 
-     Save/Recover XMM, YMM
-     ...
-}
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-  Transform() ; Function prototype 
-#else
-  Transform() {   }
-  int Sha256Final() { 
-     Save/Recover XMM, YMM
-     ...
-  }
-#endif
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    #if defined(HAVE_INTEL_RORX
-         #define RND with rorx instuction
-    #else
-        #define RND
-    #endif
-#endif
-
-#if defined(HAVE_INTEL_AVX1)
-   
-   #define XMM Instructions/inline asm
-   
-   int Transform() {
-       Stitched Message Sched/Round
-    } 
-   
-#elif defined(HAVE_INTEL_AVX2)
-  
-  #define YMM Instructions/inline asm
-  
-  int Transform() {
-      More granural Stitched Message Sched/Round
-  }
-  
-*/
-
-
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-    #define cpuid(reg, leaf, sub)\
-            __asm__ __volatile__ ("cpuid":\
-             "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
-             "a" (leaf), "c"(sub));
-
-    #define XASM_LINK(f) asm(f)
-#else
-
-    #include 
-    #define cpuid(a,b) __cpuid((int*)a,b)
-
-    #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-#define EAX 0
-#define EBX 1
-#define ECX 2 
-#define EDX 3
-    
-#define CPUID_AVX1   0x1
-#define CPUID_AVX2   0x2
-#define CPUID_RDRAND 0x4
-#define CPUID_RDSEED 0x8
-#define CPUID_BMI2   0x10   /* MULX, RORX */
-
-#define IS_INTEL_AVX1       (cpuid_flags&CPUID_AVX1)
-#define IS_INTEL_AVX2       (cpuid_flags&CPUID_AVX2)
-#define IS_INTEL_BMI2       (cpuid_flags&CPUID_BMI2)
-#define IS_INTEL_RDRAND     (cpuid_flags&CPUID_RDRAND)
-#define IS_INTEL_RDSEED     (cpuid_flags&CPUID_RDSEED)
-
-static word32 cpuid_check = 0 ;
-static word32 cpuid_flags = 0 ;
-
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
-    int got_intel_cpu=0;
-    unsigned int reg[5]; 
-    
-    reg[4] = '\0' ;
-    cpuid(reg, 0, 0);  
-    if(XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&  
-                XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&  
-                XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {  
-        got_intel_cpu = 1;  
-    }    
-    if (got_intel_cpu) {
-        cpuid(reg, leaf, sub);
-        return((reg[num]>>bit)&0x1) ;
-    }
-    return 0 ;
-}
-
-static int set_cpuid_flags(void) {  
-    if(cpuid_check==0) {
-        if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
-        if(cpuid_flag(7, 0, EBX, 5)){  cpuid_flags |= CPUID_AVX2 ; }
-        if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
-        if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;  } 
-        if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;  }
-        cpuid_check = 1 ;
-        return 0 ;
-    }
-    return 1 ;
-}
-
-
-/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
-static int Transform(Sha256* sha256);
-
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_AVX1(Sha256 *sha256) ;
-#endif
-#if defined(HAVE_INTEL_AVX2)
-static int Transform_AVX2(Sha256 *sha256) ; 
-static int Transform_AVX1_RORX(Sha256 *sha256) ; 
-#endif
-
-static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
-
-#define XTRANSFORM(sha256, B)  (*Transform_p)(sha256)
-
-static void set_Transform(void) {
-     if(set_cpuid_flags())return ;
-
-#if defined(HAVE_INTEL_AVX2)
-     if(IS_INTEL_AVX2 && IS_INTEL_BMI2){ 
-         Transform_p = Transform_AVX1_RORX; return ; 
-         Transform_p = Transform_AVX2      ; 
-                  /* for avoiding warning,"not used" */
-     }
-#endif
-#if defined(HAVE_INTEL_AVX1)
-     Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ;
-#endif
-     Transform_p = Transform ; return ;
-}
-
-#else
-   #if defined(FREESCALE_MMCAU_SHA)
-      #define XTRANSFORM(sha256, B) Transform(sha256, B)
-   #else
-      #define XTRANSFORM(sha256, B) Transform(sha256)
-   #endif
-#endif
-
-/* Dummy for saving MM_REGs on behalf of Transform */
-#if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1)
-#define  SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
-  "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
-#elif defined(HAVE_INTEL_AVX1)
-#define  SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
-    "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
-    "xmm11","xmm12","xmm13","xmm14","xmm15")
-#else
-#define  SAVE_XMM_YMM
-#endif
-
-#ifdef WOLFSSL_PIC32MZ_HASH
-#define InitSha256   InitSha256_sw
-#define Sha256Update Sha256Update_sw
-#define Sha256Final  Sha256Final_sw
-#endif
-
 #include 
-#include 
 
 #ifdef NO_INLINE
     #include 
@@ -280,324 +76,608 @@ static void set_Transform(void) {
     #include 
 #endif
 
-#ifdef FREESCALE_MMCAU_SHA
-    #include "fsl_mmcau.h"
+
+#if defined(USE_INTEL_SPEEDUP)
+    #define HAVE_INTEL_AVX1
+    #define HAVE_INTEL_AVX2
+#endif /* USE_INTEL_SPEEDUP */
+
+#if defined(HAVE_INTEL_AVX2)
+    #define HAVE_INTEL_RORX
 #endif
 
 
-#ifdef FREESCALE_LTC_SHA
-int wc_InitSha256(Sha256* sha256)
-{
-    LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
-    return 0;
-}
-#else
-int wc_InitSha256(Sha256* sha256)
+static int InitSha256(Sha256* sha256)
 {
     int ret = 0;
-    #ifdef FREESCALE_MMCAU_SHA
-        ret = wolfSSL_CryptHwMutexLock();
-        if(ret != 0) {
-            return ret;
-        }
-        MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
-        wolfSSL_CryptHwMutexUnLock();
-    #else
-        sha256->digest[0] = 0x6A09E667L;
-        sha256->digest[1] = 0xBB67AE85L;
-        sha256->digest[2] = 0x3C6EF372L;
-        sha256->digest[3] = 0xA54FF53AL;
-        sha256->digest[4] = 0x510E527FL;
-        sha256->digest[5] = 0x9B05688CL;
-        sha256->digest[6] = 0x1F83D9ABL;
-        sha256->digest[7] = 0x5BE0CD19L;
-    #endif
+
+    if (sha256 == NULL)
+        return BAD_FUNC_ARG;
+
+    sha256->digest[0] = 0x6A09E667L;
+    sha256->digest[1] = 0xBB67AE85L;
+    sha256->digest[2] = 0x3C6EF372L;
+    sha256->digest[3] = 0xA54FF53AL;
+    sha256->digest[4] = 0x510E527FL;
+    sha256->digest[5] = 0x9B05688CL;
+    sha256->digest[6] = 0x1F83D9ABL;
+    sha256->digest[7] = 0x5BE0CD19L;
 
     sha256->buffLen = 0;
     sha256->loLen   = 0;
     sha256->hiLen   = 0;
-    
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ; /* choose best Transform function under this runtime environment */
-#endif
 
     return ret;
 }
-#endif /* FREESCALE_LTC_SHA */
 
-#if !defined(FREESCALE_LTC_SHA)
-#if !defined(FREESCALE_MMCAU_SHA)
-static const ALIGN32 word32 K[64] = {
-    0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
-    0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
-    0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
-    0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
-    0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
-    0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
-    0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
-    0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
-    0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
-    0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
-    0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
-    0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
-    0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
-};
 
-#endif
+/* Hardware Acceleration */
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
 
-#if defined(FREESCALE_MMCAU_SHA)
+    /* in case intel instructions aren't available, plus we need the K[] global */
+    #define NEED_SOFT_SHA256
 
-static int Transform(Sha256* sha256, byte* buf)
-{
-    int ret = wolfSSL_CryptHwMutexLock();
-    if(ret == 0) {
-        MMCAU_SHA256_HashN(buf, 1, (uint32_t*)sha256->digest);
+    /*****
+    Intel AVX1/AVX2 Macro Control Structure
+
+    #define HAVE_INTEL_AVX1
+    #define HAVE_INTEL_AVX2
+
+    #define HAVE_INTEL_RORX
+
+
+    int InitSha256(Sha256* sha256) {
+         Save/Recover XMM, YMM
+         ...
+    }
+
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+      Transform(); Function prototype
+    #else
+      Transform() {   }
+      int Sha256Final() {
+         Save/Recover XMM, YMM
+         ...
+      }
+    #endif
+
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+        #if defined(HAVE_INTEL_RORX
+             #define RND with rorx instuction
+        #else
+            #define RND
+        #endif
+    #endif
+
+    #if defined(HAVE_INTEL_AVX1)
+
+       #define XMM Instructions/inline asm
+
+       int Transform() {
+           Stitched Message Sched/Round
+        }
+
+    #elif defined(HAVE_INTEL_AVX2)
+
+      #define YMM Instructions/inline asm
+
+      int Transform() {
+          More granural Stitched Message Sched/Round
+      }
+
+    */
+
+    /* Each platform needs to query info type 1 from cpuid to see if aesni is
+     * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+     */
+
+    #ifndef _MSC_VER
+        #define cpuid(reg, leaf, sub)\
+                __asm__ __volatile__ ("cpuid":\
+                 "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+                 "a" (leaf), "c"(sub));
+
+        #define XASM_LINK(f) asm(f)
+    #else
+        #include 
+        #define cpuid(a,b) __cpuid((int*)a,b)
+
+        #define XASM_LINK(f)
+    #endif /* _MSC_VER */
+
+    #define EAX 0
+    #define EBX 1
+    #define ECX 2
+    #define EDX 3
+
+    #define CPUID_AVX1   0x1
+    #define CPUID_AVX2   0x2
+    #define CPUID_RDRAND 0x4
+    #define CPUID_RDSEED 0x8
+    #define CPUID_BMI2   0x10   /* MULX, RORX */
+
+    #define IS_INTEL_AVX1       (cpuid_flags & CPUID_AVX1)
+    #define IS_INTEL_AVX2       (cpuid_flags & CPUID_AVX2)
+    #define IS_INTEL_BMI2       (cpuid_flags & CPUID_BMI2)
+    #define IS_INTEL_RDRAND     (cpuid_flags & CPUID_RDRAND)
+    #define IS_INTEL_RDSEED     (cpuid_flags & CPUID_RDSEED)
+
+    static word32 cpuid_check = 0;
+    static word32 cpuid_flags = 0;
+
+    static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
+        int got_intel_cpu=0;
+        unsigned int reg[5];
+
+        reg[4] = '\0';
+        cpuid(reg, 0, 0);
+        if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+            XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+            XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
+            got_intel_cpu = 1;
+        }
+        if (got_intel_cpu) {
+            cpuid(reg, leaf, sub);
+            return ((reg[num] >> bit) & 0x1);
+        }
+        return 0;
+    }
+
+    static int set_cpuid_flags(void) {
+        if (cpuid_check==0) {
+            if (cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1; }
+            if (cpuid_flag(7, 0, EBX, 5)) { cpuid_flags |= CPUID_AVX2; }
+            if (cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2; }
+            if (cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND; }
+            if (cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED; }
+            cpuid_check = 1;
+            return 0;
+        }
+        return 1;
+    }
+
+    /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha256 */
+    static int Transform(Sha256* sha256);
+    #if defined(HAVE_INTEL_AVX1)
+        static int Transform_AVX1(Sha256 *sha256);
+    #endif
+    #if defined(HAVE_INTEL_AVX2)
+        static int Transform_AVX2(Sha256 *sha256);
+        static int Transform_AVX1_RORX(Sha256 *sha256);
+    #endif
+    static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
+    #define XTRANSFORM(sha256, B)  (*Transform_p)(sha256)
+
+    static void set_Transform(void) {
+         if (set_cpuid_flags()) return;
+
+    #if defined(HAVE_INTEL_AVX2)
+         if (IS_INTEL_AVX2 && IS_INTEL_BMI2) {
+             Transform_p = Transform_AVX1_RORX; return;
+             Transform_p = Transform_AVX2;
+                      /* for avoiding warning,"not used" */
+         }
+    #endif
+    #if defined(HAVE_INTEL_AVX1)
+         Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform); return;
+    #endif
+         Transform_p = Transform; return;
+    }
+
+    /* Dummy for saving MM_REGs on behalf of Transform */
+    #if defined(HAVE_INTEL_AVX2) && !defined(HAVE_INTEL_AVX1)
+        #define SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
+          "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
+    #elif defined(HAVE_INTEL_AVX1)
+        #define SAVE_XMM_YMM   __asm__ volatile("or %%r8d, %%r8d":::\
+            "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
+            "xmm11","xmm12","xmm13","xmm14","xmm15")
+    #endif
+
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha256->heap = heap;
+
+        ret = InitSha256(sha256);
+        if (ret != 0)
+            return ret;
+
+        /* choose best Transform function under this runtime environment */
+        set_Transform();
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+
+#elif defined(FREESCALE_LTC_SHA)
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+
+        LTC_HASH_Init(LTC_BASE, &sha256->ctx, kLTC_Sha256, NULL, 0);
+
+        return 0;
+    }
+
+#elif defined(FREESCALE_MMCAU_SHA)
+    #include "fsl_mmcau.h"
+    #define XTRANSFORM(sha256, B) Transform(sha256, B)
+
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+
+        (void)heap;
+        (void)devId;
+
+        ret = wolfSSL_CryptHwMutexLock();
+        if (ret != 0) {
+            return ret;
+        }
+        MMCAU_SHA256_InitializeOutput((uint32_t*)sha256->digest);
         wolfSSL_CryptHwMutexUnLock();
+
+        sha256->buffLen = 0;
+        sha256->loLen   = 0;
+        sha256->hiLen   = 0;
+
+        return ret;
     }
-    return ret;
-}
 
-#endif /* FREESCALE_MMCAU_SHA */
+    static int Transform(Sha256* sha256, byte* buf)
+    {
+        int ret = wolfSSL_CryptHwMutexLock();
+        if (ret == 0) {
+            MMCAU_SHA256_HashN(buf, 1, sha256->digest);
+            wolfSSL_CryptHwMutexUnLock();
+        }
+        return ret;
+    }
 
-#define Ch(x,y,z)       ((z) ^ ((x) & ((y) ^ (z))))
-#define Maj(x,y,z)      ((((x) | (y)) & (z)) | ((x) & (y)))
-#define R(x, n)         (((x)&0xFFFFFFFFU)>>(n))
+#elif defined(WOLFSSL_PIC32MZ_HASH)
+    #define NEED_SOFT_SHA256
 
-#define S(x, n)         rotrFixed(x, n)
-#define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
-#define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
-#define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
-#define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+    #define wc_InitSha256   wc_InitSha256_sw
+    #define wc_Sha256Update wc_Sha256Update_sw
+    #define wc_Sha256Final  wc_Sha256Final_sw
 
-#define RND(a,b,c,d,e,f,g,h,i) \
-     t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
-     t1 = Sigma0((a)) + Maj((a), (b), (c)); \
-     (d) += t0; \
-     (h)  = t0 + t1;
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
 
-#if !defined(FREESCALE_MMCAU_SHA)
-static int Transform(Sha256* sha256)
-{
-    word32 S[8], t0, t1;
-    int i;
+        sha256->heap = heap;
 
-#ifdef WOLFSSL_SMALL_STACK
-    word32* W;
+        return InitSha256(sha256);
+    }
 
-    W = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-    if (W == NULL)
-        return MEMORY_E;
 #else
-    word32 W[64];
+    #define NEED_SOFT_SHA256
+
+    #define XTRANSFORM(sha256, B) Transform(sha256)
+
+    int wc_InitSha256_ex(Sha256* sha256, void* heap, int devId)
+    {
+        int ret = 0;
+        if (sha256 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha256->heap = heap;
+
+        ret = InitSha256(sha256);
+        if (ret != 0)
+            return ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        ret = wolfAsync_DevCtxInit(&sha256->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA256, sha256->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+#endif /* End Hardware Acceleration */
+
+#ifndef SAVE_XMM_YMM
+    #define SAVE_XMM_YMM
 #endif
 
-    /* Copy context->state[] to working vars */
-    for (i = 0; i < 8; i++)
-        S[i] = sha256->digest[i];
+#ifdef NEED_SOFT_SHA256
 
-    for (i = 0; i < 16; i++)
-        W[i] = sha256->buffer[i];
+    static const ALIGN32 word32 K[64] = {
+        0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL,
+        0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L,
+        0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L,
+        0xC19BF174L, 0xE49B69C1L, 0xEFBE4786L, 0x0FC19DC6L, 0x240CA1CCL,
+        0x2DE92C6FL, 0x4A7484AAL, 0x5CB0A9DCL, 0x76F988DAL, 0x983E5152L,
+        0xA831C66DL, 0xB00327C8L, 0xBF597FC7L, 0xC6E00BF3L, 0xD5A79147L,
+        0x06CA6351L, 0x14292967L, 0x27B70A85L, 0x2E1B2138L, 0x4D2C6DFCL,
+        0x53380D13L, 0x650A7354L, 0x766A0ABBL, 0x81C2C92EL, 0x92722C85L,
+        0xA2BFE8A1L, 0xA81A664BL, 0xC24B8B70L, 0xC76C51A3L, 0xD192E819L,
+        0xD6990624L, 0xF40E3585L, 0x106AA070L, 0x19A4C116L, 0x1E376C08L,
+        0x2748774CL, 0x34B0BCB5L, 0x391C0CB3L, 0x4ED8AA4AL, 0x5B9CCA4FL,
+        0x682E6FF3L, 0x748F82EEL, 0x78A5636FL, 0x84C87814L, 0x8CC70208L,
+        0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
+    };
 
-    for (i = 16; i < 64; i++)
-        W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
+    #define Ch(x,y,z)       ((z) ^ ((x) & ((y) ^ (z))))
+    #define Maj(x,y,z)      ((((x) | (y)) & (z)) | ((x) & (y)))
+    #define R(x, n)         (((x) & 0xFFFFFFFFU) >> (n))
 
-    for (i = 0; i < 64; i += 8) {
-        RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
-        RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
-        RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
-        RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
-        RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
-        RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
-        RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
-        RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
+    #define S(x, n)         rotrFixed(x, n)
+    #define Sigma0(x)       (S(x, 2) ^ S(x, 13) ^ S(x, 22))
+    #define Sigma1(x)       (S(x, 6) ^ S(x, 11) ^ S(x, 25))
+    #define Gamma0(x)       (S(x, 7) ^ S(x, 18) ^ R(x, 3))
+    #define Gamma1(x)       (S(x, 17) ^ S(x, 19) ^ R(x, 10))
+
+    #define RND(a,b,c,d,e,f,g,h,i) \
+         t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
+         t1 = Sigma0((a)) + Maj((a), (b), (c)); \
+         (d) += t0; \
+         (h)  = t0 + t1;
+
+    static int Transform(Sha256* sha256)
+    {
+        word32 S[8], t0, t1;
+        int i;
+
+    #ifdef WOLFSSL_SMALL_STACK
+        word32* W;
+
+        W = (word32*)XMALLOC(sizeof(word32) * SHA256_BLOCK_SIZE, NULL,
+            DYNAMIC_TYPE_TMP_BUFFER);
+        if (W == NULL)
+            return MEMORY_E;
+    #else
+        word32 W[SHA256_BLOCK_SIZE];
+    #endif
+
+        /* Copy context->state[] to working vars */
+        for (i = 0; i < 8; i++)
+            S[i] = sha256->digest[i];
+
+        for (i = 0; i < 16; i++)
+            W[i] = sha256->buffer[i];
+
+        for (i = 16; i < SHA256_BLOCK_SIZE; i++)
+            W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
+
+        for (i = 0; i < SHA256_BLOCK_SIZE; i += 8) {
+            RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
+            RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
+            RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
+            RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
+            RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
+            RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
+            RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
+            RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
+        }
+
+        /* Add the working vars back into digest state[] */
+        for (i = 0; i < 8; i++) {
+            sha256->digest[i] += S[i];
+        }
+
+    #ifdef WOLFSSL_SMALL_STACK
+        XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    #endif
+
+        return 0;
     }
-
-    /* Add the working vars back into digest state[] */
-    for (i = 0; i < 8; i++) {
-        sha256->digest[i] += S[i];
-    }
-
-#ifdef WOLFSSL_SMALL_STACK
-    XFREE(W, NULL, DYNAMIC_TYPE_TMP_BUFFER);
 #endif
+/* End wc_ software implementation */
 
-    return 0;
-}
 
-#endif /* #if !defined(FREESCALE_MMCAU_SHA) */
+#ifdef XTRANSFORM
 
-static INLINE void AddLength(Sha256* sha256, word32 len)
-{
-    word32 tmp = sha256->loLen;
-    if ( (sha256->loLen += len) < tmp)
-        sha256->hiLen++;                       /* carry low to high */
-}
-#endif /* FREESCALE_LTC_SHA */
+    static INLINE void AddLength(Sha256* sha256, word32 len)
+    {
+        word32 tmp = sha256->loLen;
+        if ( (sha256->loLen += len) < tmp)
+            sha256->hiLen++;                       /* carry low to high */
+    }
 
-#ifdef FREESCALE_LTC_SHA
-int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
-    LTC_HASH_Update(&sha256->ctx, data, len);
-    return 0;
-}
-#else
-static INLINE int Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
+    static INLINE int Sha256Update(Sha256* sha256, const byte* data, word32 len)
+    {
+        int ret = 0;
+        byte* local;
 
-    /* do block size increments */
-    byte* local = (byte*)sha256->buffer;
+        if (sha256 == NULL || (data == NULL && len > 0)) {
+            return BAD_FUNC_ARG;
+        }
 
-    SAVE_XMM_YMM ; /* for Intel AVX */
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha256(&sha256->asyncDev, NULL, data, len);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
 
-    while (len) {
-        word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
-        XMEMCPY(&local[sha256->buffLen], data, add);
+        /* do block size increments */
+        local = (byte*)sha256->buffer;
 
-        sha256->buffLen += add;
-        data            += add;
-        len             -= add;
+        /* check that internal buffLen is valid */
+        if (sha256->buffLen > SHA256_BLOCK_SIZE)
+            return BUFFER_E;
 
-        if (sha256->buffLen == SHA256_BLOCK_SIZE) {
-            int ret;
+        SAVE_XMM_YMM; /* for Intel AVX */
 
-            #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-                #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-                if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-                #endif
-                ByteReverseWords(sha256->buffer, sha256->buffer,
-                                 SHA256_BLOCK_SIZE);
+        while (len) {
+            word32 add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
+            XMEMCPY(&local[sha256->buffLen], data, add);
+
+            sha256->buffLen += add;
+            data            += add;
+            len             -= add;
+
+            if (sha256->buffLen == SHA256_BLOCK_SIZE) {
+        #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+                if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
             #endif
+                {
+                    ByteReverseWords(sha256->buffer, sha256->buffer,
+                                                             SHA256_BLOCK_SIZE);
+                }
+        #endif
+                ret = XTRANSFORM(sha256, local);
+                if (ret != 0) {
+                    break;
+                }
+
+                AddLength(sha256, SHA256_BLOCK_SIZE);
+                sha256->buffLen = 0;
+            }
+        }
+
+        return ret;
+    }
+
+    int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
+    {
+        return Sha256Update(sha256, data, len);
+    }
+
+    static INLINE int Sha256Final(Sha256* sha256)
+    {
+        int ret;
+        byte* local = (byte*)sha256->buffer;
+
+        SAVE_XMM_YMM; /* for Intel AVX */
+
+        AddLength(sha256, sha256->buffLen);  /* before adding pads */
+        local[sha256->buffLen++] = 0x80;     /* add 1 */
+
+        /* pad with zeros */
+        if (sha256->buffLen > SHA256_PAD_SIZE) {
+            XMEMSET(&local[sha256->buffLen], 0,
+                SHA256_BLOCK_SIZE - sha256->buffLen);
+            sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
+
+    #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+            if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+        #endif
+            {
+                ByteReverseWords(sha256->buffer, sha256->buffer,
+                    SHA256_BLOCK_SIZE);
+            }
+    #endif
+
             ret = XTRANSFORM(sha256, local);
             if (ret != 0)
                 return ret;
 
-            AddLength(sha256, SHA256_BLOCK_SIZE);
             sha256->buffLen = 0;
         }
-    }
+        XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
 
-    return 0;
-}
+        /* put lengths in bits */
+        sha256->hiLen = (sha256->loLen >> (8 * sizeof(sha256->loLen) - 3)) +
+                                                         (sha256->hiLen << 3);
+        sha256->loLen = sha256->loLen << 3;
 
-int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
-{
-    return Sha256Update(sha256, data, len);
-}
-
-#endif /* FREESCALE_LTC_SHA */
-
-#ifdef FREESCALE_LTC_SHA
-int wc_Sha256Final(Sha256* sha256, byte* hash)
-{
-    uint32_t hashlen = SHA256_DIGEST_SIZE;
-    LTC_HASH_Finish(&sha256->ctx, hash, &hashlen);
-    return wc_InitSha256(sha256);  /* reset state */
-}
-#else
-static INLINE int Sha256Final(Sha256* sha256)
-{
-    byte* local = (byte*)sha256->buffer;
-    int ret;
-    
-    SAVE_XMM_YMM ; /* for Intel AVX */
-
-    AddLength(sha256, sha256->buffLen);  /* before adding pads */
-
-    local[sha256->buffLen++] = 0x80;     /* add 1 */
-
-    /* pad with zeros */
-    if (sha256->buffLen > SHA256_PAD_SIZE) {
-        XMEMSET(&local[sha256->buffLen], 0, SHA256_BLOCK_SIZE - sha256->buffLen);
-        sha256->buffLen += SHA256_BLOCK_SIZE - sha256->buffLen;
-
-        #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
-            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-            if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-            #endif
-            ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
-        #endif
-
-        ret = XTRANSFORM(sha256, local);
-        if (ret != 0)
-            return ret;
-
-        sha256->buffLen = 0;
-    }
-    XMEMSET(&local[sha256->buffLen], 0, SHA256_PAD_SIZE - sha256->buffLen);
-
-    /* put lengths in bits */
-    sha256->hiLen = (sha256->loLen >> (8*sizeof(sha256->loLen) - 3)) +
-                 (sha256->hiLen << 3);
-    sha256->loLen = sha256->loLen << 3;
-
-    /* store lengths */
+        /* store lengths */
     #if defined(LITTLE_ENDIAN_ORDER) && !defined(FREESCALE_MMCAU_SHA)
         #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+            if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
         #endif
-            ByteReverseWords(sha256->buffer, sha256->buffer, SHA256_BLOCK_SIZE);
+            {
+                ByteReverseWords(sha256->buffer, sha256->buffer,
+                    SHA256_BLOCK_SIZE);
+            }
     #endif
-    /* ! length ordering dependent on digest endian type ! */
-    XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
-    XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
-            sizeof(word32));
+        /* ! length ordering dependent on digest endian type ! */
+        XMEMCPY(&local[SHA256_PAD_SIZE], &sha256->hiLen, sizeof(word32));
+        XMEMCPY(&local[SHA256_PAD_SIZE + sizeof(word32)], &sha256->loLen,
+                sizeof(word32));
 
-    #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    #if defined(FREESCALE_MMCAU_SHA) || defined(HAVE_INTEL_AVX1) || \
+            defined(HAVE_INTEL_AVX2)
         /* Kinetis requires only these bytes reversed */
         #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
+            if (IS_INTEL_AVX1 || IS_INTEL_AVX2)
         #endif
-        ByteReverseWords(&sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
-                         &sha256->buffer[SHA256_PAD_SIZE/sizeof(word32)],
-                         2 * sizeof(word32));
+            {
+                ByteReverseWords(
+                    &sha256->buffer[SHA256_PAD_SIZE / sizeof(word32)],
+                    &sha256->buffer[SHA256_PAD_SIZE / sizeof(word32)],
+                    2 * sizeof(word32));
+            }
     #endif
 
-    return XTRANSFORM(sha256, local);
-}
+        return XTRANSFORM(sha256, local);
+    }
 
-int wc_Sha256Final(Sha256* sha256, byte* hash)
-{
-    int ret;
+    int wc_Sha256Final(Sha256* sha256, byte* hash)
+    {
+        int ret;
 
-    ret = Sha256Final(sha256);
-    if (ret != 0)
-        return ret;
+        if (sha256 == NULL || hash == NULL) {
+            return BAD_FUNC_ARG;
+        }
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+        if (sha256->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA256) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha256(&sha256->asyncDev, hash, NULL,
+                                            SHA256_DIGEST_SIZE);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        ret = Sha256Final(sha256);
+        if (ret != 0)
+            return ret;
 
     #if defined(LITTLE_ENDIAN_ORDER)
         ByteReverseWords(sha256->digest, sha256->digest, SHA256_DIGEST_SIZE);
     #endif
-    XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
+        XMEMCPY(hash, sha256->digest, SHA256_DIGEST_SIZE);
 
-    return wc_InitSha256(sha256);  /* reset state */
-}
-#endif /* FREESCALE_LTC_SHA */
+        return InitSha256(sha256);  /* reset state */
+    }
 
+#endif /* XTRANSFORM */
 
 
 #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
 
 #define _DigestToReg(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-    { word32 d ;\
-    d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs) ;\
-    d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs) ;\
+{ word32 d;\
+    d = sha256->digest[0]; __asm__ volatile("movl %0, %"#S_0::"r"(d):SSE_REGs);\
+    d = sha256->digest[1]; __asm__ volatile("movl %0, %"#S_1::"r"(d):SSE_REGs);\
+    d = sha256->digest[2]; __asm__ volatile("movl %0, %"#S_2::"r"(d):SSE_REGs);\
+    d = sha256->digest[3]; __asm__ volatile("movl %0, %"#S_3::"r"(d):SSE_REGs);\
+    d = sha256->digest[4]; __asm__ volatile("movl %0, %"#S_4::"r"(d):SSE_REGs);\
+    d = sha256->digest[5]; __asm__ volatile("movl %0, %"#S_5::"r"(d):SSE_REGs);\
+    d = sha256->digest[6]; __asm__ volatile("movl %0, %"#S_6::"r"(d):SSE_REGs);\
+    d = sha256->digest[7]; __asm__ volatile("movl %0, %"#S_7::"r"(d):SSE_REGs);\
 }
 
 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-    { word32 d ; \
-    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ; sha256->digest[0] += d;\
-    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ; sha256->digest[1] += d;\
-    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ; sha256->digest[2] += d;\
-    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ; sha256->digest[3] += d;\
-    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ; sha256->digest[4] += d;\
-    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ; sha256->digest[5] += d;\
-    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ; sha256->digest[6] += d;\
-    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ; sha256->digest[7] += d;\
+{ word32 d; \
+    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs); sha256->digest[0] += d;\
+    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs); sha256->digest[1] += d;\
+    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs); sha256->digest[2] += d;\
+    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs); sha256->digest[3] += d;\
+    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs); sha256->digest[4] += d;\
+    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs); sha256->digest[5] += d;\
+    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs); sha256->digest[6] += d;\
+    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs); sha256->digest[7] += d;\
 }
 
 
@@ -608,11 +688,9 @@ int wc_Sha256Final(Sha256* sha256, byte* hash)
     _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
 
 
-
-
-#define S_0 %r15d 
+#define S_0 %r15d
 #define S_1 %r10d
-#define S_2 %r11d       
+#define S_2 %r11d
 #define S_3 %r12d
 #define S_4 %r13d
 #define S_5 %r14d
@@ -648,7 +726,7 @@ __asm__ volatile("rorx  $13, %"#a", %%edi\n\t":::"%edi",SSE_REGs);/* edi = a>>13
 __asm__ volatile("rorx  $22, %"#a", %%edx\n\t":::"%edx",SSE_REGs); /* edx = a>>22 */\
 __asm__ volatile("xorl  %%r8d, %%edi\n\t":::"%edi","%r8",SSE_REGs);/* edi = (a>>2) ^ (a>>13)  */\
 __asm__ volatile("xorl  %%edi, %%edx\n\t":::"%edi","%edx",SSE_REGs);  /* edx = Sigma0(a)      */\
- 
+
 #define RND_STEP_RORX_6(a,b,c,d,e,f,g,h,i)\
 __asm__ volatile("movl  %"#b", %%edi\n\t":::"%edi",SSE_REGs);  /* edi = b          */\
 __asm__ volatile("orl   %"#a", %%edi\n\t":::"%edi",SSE_REGs);  /* edi = a | b      */\
@@ -664,9 +742,8 @@ __asm__ volatile("orl   %%edi, %%r8d\n\t":::"%edi","%r8",SSE_REGs); /* r8d = Maj
 __asm__ volatile("addl  "#h", "#d"\n\t");  /* d += h + w_k + Sigma1(e) + Ch(e,f,g) */\
 __asm__ volatile("addl  %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
 __asm__ volatile("addl  %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs); \
-__asm__ volatile("movl  %r8d, "#h"\n\t");   
-
-#endif
+__asm__ volatile("movl  %r8d, "#h"\n\t");
+#endif /* HAVE_INTEL_RORX */
 
 #define RND_STEP_1(a,b,c,d,e,f,g,h,i)\
 __asm__ volatile("movl  %"#e", %%edx\n\t":::"%edx",SSE_REGs);\
@@ -728,7 +805,7 @@ __asm__ volatile("movl  %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
        RND_STEP_5(a,b,c,d,e,f,g,h,i); \
        RND_STEP_6(a,b,c,d,e,f,g,h,i); \
        RND_STEP_7(a,b,c,d,e,f,g,h,i); \
-       RND_STEP_8(a,b,c,d,e,f,g,h,i); 
+       RND_STEP_8(a,b,c,d,e,f,g,h,i);
 
 #define RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i);
 #define RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_X(S_7,S_0,S_1,S_2,S_3,S_4,S_5,S_6,_i);
@@ -795,15 +872,15 @@ __asm__ volatile("movl  %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
 #define RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,_i) RND_7_8(S_1,S_2,S_3,S_4,S_5,S_6,S_7,S_0,_i);
 
 #define FOR(cnt, init, max, inc, loop)  \
-    __asm__ volatile("movl $"#init", %0\n\t"#loop":"::"m"(cnt):) 
+    __asm__ volatile("movl $"#init", %0\n\t"#loop":"::"m"(cnt):)
 #define END(cnt, init, max, inc, loop)  \
-    __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::) ;
+    __asm__ volatile("addl $"#inc", %0\n\tcmpl $"#max", %0\n\tjle "#loop"\n\t":"=m"(cnt)::);
 
 #endif  /* defined(HAVE_INTEL_AVX1) ||  defined(HAVE_INTEL_AVX2) */
 
 #if defined(HAVE_INTEL_AVX1) /* inline Assember for Intel AVX1 instructions */
 
-#define VPALIGNR(op1,op2,op3,op4) __asm__ volatile("vpalignr $"#op4", %"#op3", %"#op2", %"#op1:::XMM_REGs) 
+#define VPALIGNR(op1,op2,op3,op4) __asm__ volatile("vpalignr $"#op4", %"#op3", %"#op2", %"#op1:::XMM_REGs)
 #define VPADDD(op1,op2,op3)       __asm__ volatile("vpaddd %"#op3", %"#op2", %"#op1:::XMM_REGs)
 #define VPSRLD(op1,op2,op3)       __asm__ volatile("vpsrld $"#op3", %"#op2", %"#op1:::XMM_REGs)
 #define VPSRLQ(op1,op2,op3)       __asm__ volatile("vpsrlq $"#op3", %"#op2", %"#op1:::XMM_REGs)
@@ -816,171 +893,171 @@ __asm__ volatile("movl  %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
 #define MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, SHUF_00BA, SHUF_DC00,\
      a,b,c,d,e,f,g,h,_i)\
             RND_STEP_1(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP0, X3, X2, 4) ;\
+    VPALIGNR (XTMP0, X3, X2, 4);\
             RND_STEP_2(a,b,c,d,e,f,g,h,_i);\
-    VPADDD   (XTMP0, XTMP0, X0) ;\
+    VPADDD   (XTMP0, XTMP0, X0);\
             RND_STEP_3(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP1, X1, X0, 4) ;   /* XTMP1 = W[-15] */\
+    VPALIGNR (XTMP1, X1, X0, 4);   /* XTMP1 = W[-15] */\
             RND_STEP_4(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1, 7) ;\
+    VPSRLD   (XTMP2, XTMP1, 7);\
             RND_STEP_5(a,b,c,d,e,f,g,h,_i);\
-    VPSLLD   (XTMP3, XTMP1, 25) ; /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
+    VPSLLD   (XTMP3, XTMP1, 25); /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
             RND_STEP_6(a,b,c,d,e,f,g,h,_i);\
-    VPOR     (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 */\
+    VPOR     (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 */\
             RND_STEP_7(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1,18) ;\
+    VPSRLD   (XTMP2, XTMP1,18);\
             RND_STEP_8(a,b,c,d,e,f,g,h,_i);\
 \
             RND_STEP_1(h,a,b,c,d,e,f,g,_i+1);\
-    VPSRLD   (XTMP4, XTMP1, 3)      ;  /* XTMP4 = W[-15] >> 3 */\
+    VPSRLD   (XTMP4, XTMP1, 3);  /* XTMP4 = W[-15] >> 3 */\
             RND_STEP_2(h,a,b,c,d,e,f,g,_i+1);\
-    VPSLLD   (XTMP1, XTMP1, 14) ; /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
+    VPSLLD   (XTMP1, XTMP1, 14); /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
             RND_STEP_3(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP1)  ;\
+    VPXOR    (XTMP3, XTMP3, XTMP1);\
             RND_STEP_4(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
+    VPXOR    (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
             RND_STEP_5(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP1, XTMP3, XTMP4)  ;  /* XTMP1 = s0 */\
+    VPXOR    (XTMP1, XTMP3, XTMP4);  /* XTMP1 = s0 */\
             RND_STEP_6(h,a,b,c,d,e,f,g,_i+1);\
-    VPSHUFD(XTMP2, X3, 0b11111010)  ;  /* XTMP2 = W[-2] {BBAA}*/\
+    VPSHUFD(XTMP2, X3, 0b11111010);  /* XTMP2 = W[-2] {BBAA}*/\
             RND_STEP_7(h,a,b,c,d,e,f,g,_i+1);\
-    VPADDD   (XTMP0, XTMP0, XTMP1)  ;  /* XTMP0 = W[-16] + W[-7] + s0 */\
+    VPADDD   (XTMP0, XTMP0, XTMP1);  /* XTMP0 = W[-16] + W[-7] + s0 */\
             RND_STEP_8(h,a,b,c,d,e,f,g,_i+1);\
 \
             RND_STEP_1(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLD   (XTMP4, XTMP2, 10) ;      /* XTMP4 = W[-2] >> 10 {BBAA} */\
+    VPSRLD   (XTMP4, XTMP2, 10);      /* XTMP4 = W[-2] >> 10 {BBAA} */\
             RND_STEP_2(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP3, XTMP2, 19) ;      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
+    VPSRLQ   (XTMP3, XTMP2, 19);      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
             RND_STEP_3(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
             RND_STEP_4(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_5(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP4, XTMP4, XTMP2) ;   /* XTMP4 = s1 {xBxA} */\
+    VPXOR    (XTMP4, XTMP4, XTMP2);   /* XTMP4 = s1 {xBxA} */\
             RND_STEP_6(g,h,a,b,c,d,e,f,_i+2);\
-    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA)  ;  /* XTMP4 = s1 {00BA} */\
+    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA);  /* XTMP4 = s1 {00BA} */\
             RND_STEP_7(g,h,a,b,c,d,e,f,_i+2);\
-    VPADDD   (XTMP0, XTMP0, XTMP4)  ;  /* XTMP0 = {..., ..., W[1], W[0]} */\
+    VPADDD   (XTMP0, XTMP0, XTMP4);  /* XTMP0 = {..., ..., W[1], W[0]} */\
             RND_STEP_8(g,h,a,b,c,d,e,f,_i+2);\
 \
             RND_STEP_1(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFD  (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
+    VPSHUFD  (XTMP2, XTMP0, 0b01010000); /* XTMP2 = W[-2] {DDCC} */\
             RND_STEP_2(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLD   (XTMP5, XTMP2, 10);       /* XTMP5 = W[-2] >> 10 {DDCC} */\
             RND_STEP_3(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLQ   (XTMP3, XTMP2, 19);       /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
             RND_STEP_4(f,g,h,a,b,c,d,e,_i+3);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
             RND_STEP_5(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_6(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP5, XTMP5, XTMP2) ;   /* XTMP5 = s1 {xDxC} */\
+    VPXOR    (XTMP5, XTMP5, XTMP2);   /* XTMP5 = s1 {xDxC} */\
             RND_STEP_7(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
+    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00); /* XTMP5 = s1 {DC00} */\
             RND_STEP_8(f,g,h,a,b,c,d,e,_i+3);\
-    VPADDD   (X0, XTMP5, XTMP0) ;      /* X0 = {W[3], W[2], W[1], W[0]} */\
+    VPADDD   (X0, XTMP5, XTMP0);      /* X0 = {W[3], W[2], W[1], W[0]} */\
 
 #if defined(HAVE_INTEL_RORX)
 
 #define MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, \
                           XFER, SHUF_00BA, SHUF_DC00,a,b,c,d,e,f,g,h,_i)\
             RND_STEP_RORX_1(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP0, X3, X2, 4) ;\
+    VPALIGNR (XTMP0, X3, X2, 4);\
             RND_STEP_RORX_2(a,b,c,d,e,f,g,h,_i);\
-    VPADDD   (XTMP0, XTMP0, X0) ;\
+    VPADDD   (XTMP0, XTMP0, X0);\
             RND_STEP_RORX_3(a,b,c,d,e,f,g,h,_i);\
-    VPALIGNR (XTMP1, X1, X0, 4) ;   /* XTMP1 = W[-15] */\
+    VPALIGNR (XTMP1, X1, X0, 4);   /* XTMP1 = W[-15] */\
             RND_STEP_RORX_4(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1, 7) ;\
+    VPSRLD   (XTMP2, XTMP1, 7);\
             RND_STEP_RORX_5(a,b,c,d,e,f,g,h,_i);\
-    VPSLLD   (XTMP3, XTMP1, 25) ; /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
+    VPSLLD   (XTMP3, XTMP1, 25); /* VPSLLD   (XTMP3, XTMP1, (32-7)) */\
             RND_STEP_RORX_6(a,b,c,d,e,f,g,h,_i);\
-    VPOR     (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 */\
+    VPOR     (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 */\
             RND_STEP_RORX_7(a,b,c,d,e,f,g,h,_i);\
-    VPSRLD   (XTMP2, XTMP1,18) ;\
+    VPSRLD   (XTMP2, XTMP1,18);\
             RND_STEP_RORX_8(a,b,c,d,e,f,g,h,_i);\
 \
             RND_STEP_RORX_1(h,a,b,c,d,e,f,g,_i+1);\
-    VPSRLD   (XTMP4, XTMP1, 3)      ;  /* XTMP4 = W[-15] >> 3 */\
+    VPSRLD   (XTMP4, XTMP1, 3);  /* XTMP4 = W[-15] >> 3 */\
             RND_STEP_RORX_2(h,a,b,c,d,e,f,g,_i+1);\
-    VPSLLD   (XTMP1, XTMP1, 14) ; /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
+    VPSLLD   (XTMP1, XTMP1, 14); /* VPSLLD   (XTMP1, XTMP1, (32-18)) */\
             RND_STEP_RORX_3(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP1)  ;\
+    VPXOR    (XTMP3, XTMP3, XTMP1);\
             RND_STEP_RORX_4(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP3, XTMP3, XTMP2)  ;  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
+    VPXOR    (XTMP3, XTMP3, XTMP2);  /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 */\
             RND_STEP_RORX_5(h,a,b,c,d,e,f,g,_i+1);\
-    VPXOR    (XTMP1, XTMP3, XTMP4)  ;  /* XTMP1 = s0 */\
+    VPXOR    (XTMP1, XTMP3, XTMP4);  /* XTMP1 = s0 */\
             RND_STEP_RORX_6(h,a,b,c,d,e,f,g,_i+1);\
-    VPSHUFD(XTMP2, X3, 0b11111010)  ;  /* XTMP2 = W[-2] {BBAA}*/\
+    VPSHUFD(XTMP2, X3, 0b11111010);  /* XTMP2 = W[-2] {BBAA}*/\
             RND_STEP_RORX_7(h,a,b,c,d,e,f,g,_i+1);\
-    VPADDD   (XTMP0, XTMP0, XTMP1)  ;  /* XTMP0 = W[-16] + W[-7] + s0 */\
+    VPADDD   (XTMP0, XTMP0, XTMP1);  /* XTMP0 = W[-16] + W[-7] + s0 */\
             RND_STEP_RORX_8(h,a,b,c,d,e,f,g,_i+1);\
 \
             RND_STEP_RORX_1(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLD   (XTMP4, XTMP2, 10) ;      /* XTMP4 = W[-2] >> 10 {BBAA} */\
+    VPSRLD   (XTMP4, XTMP2, 10);      /* XTMP4 = W[-2] >> 10 {BBAA} */\
             RND_STEP_RORX_2(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP3, XTMP2, 19) ;      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
+    VPSRLQ   (XTMP3, XTMP2, 19);      /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */\
             RND_STEP_RORX_3(g,h,a,b,c,d,e,f,_i+2);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */\
             RND_STEP_RORX_4(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_RORX_5(g,h,a,b,c,d,e,f,_i+2);\
-    VPXOR    (XTMP4, XTMP4, XTMP2) ;   /* XTMP4 = s1 {xBxA} */\
+    VPXOR    (XTMP4, XTMP4, XTMP2);   /* XTMP4 = s1 {xBxA} */\
             RND_STEP_RORX_6(g,h,a,b,c,d,e,f,_i+2);\
-    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA)  ;  /* XTMP4 = s1 {00BA} */\
+    VPSHUFB  (XTMP4, XTMP4, SHUF_00BA);  /* XTMP4 = s1 {00BA} */\
             RND_STEP_RORX_7(g,h,a,b,c,d,e,f,_i+2);\
-    VPADDD   (XTMP0, XTMP0, XTMP4)  ;  /* XTMP0 = {..., ..., W[1], W[0]} */\
+    VPADDD   (XTMP0, XTMP0, XTMP4);  /* XTMP0 = {..., ..., W[1], W[0]} */\
             RND_STEP_RORX_8(g,h,a,b,c,d,e,f,_i+2);\
 \
             RND_STEP_RORX_1(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFD  (XTMP2, XTMP0, 0b01010000) ; /* XTMP2 = W[-2] {DDCC} */\
+    VPSHUFD  (XTMP2, XTMP0, 0b01010000); /* XTMP2 = W[-2] {DDCC} */\
             RND_STEP_RORX_2(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLD   (XTMP5, XTMP2, 10);       /* XTMP5 = W[-2] >> 10 {DDCC} */\
             RND_STEP_RORX_3(f,g,h,a,b,c,d,e,_i+3);\
     VPSRLQ   (XTMP3, XTMP2, 19);       /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */\
             RND_STEP_RORX_4(f,g,h,a,b,c,d,e,_i+3);\
-    VPSRLQ   (XTMP2, XTMP2, 17) ;      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
+    VPSRLQ   (XTMP2, XTMP2, 17);      /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */\
             RND_STEP_RORX_5(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP2, XTMP2, XTMP3) ;\
+    VPXOR    (XTMP2, XTMP2, XTMP3);\
             RND_STEP_RORX_6(f,g,h,a,b,c,d,e,_i+3);\
-    VPXOR    (XTMP5, XTMP5, XTMP2) ;   /* XTMP5 = s1 {xDxC} */\
+    VPXOR    (XTMP5, XTMP5, XTMP2);   /* XTMP5 = s1 {xDxC} */\
             RND_STEP_RORX_7(f,g,h,a,b,c,d,e,_i+3);\
-    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00) ; /* XTMP5 = s1 {DC00} */\
+    VPSHUFB  (XTMP5, XTMP5, SHUF_DC00); /* XTMP5 = s1 {DC00} */\
             RND_STEP_RORX_8(f,g,h,a,b,c,d,e,_i+3);\
-    VPADDD   (X0, XTMP5, XTMP0) ;      /* X0 = {W[3], W[2], W[1], W[0]} */\
+    VPADDD   (X0, XTMP5, XTMP0);      /* X0 = {W[3], W[2], W[1], W[0]} */\
 
-#endif
+#endif /* HAVE_INTEL_RORX */
 
 
 #define W_K_from_buff\
          __asm__ volatile("vmovdqu %0, %%xmm4\n\t"\
                           "vpshufb %%xmm13, %%xmm4, %%xmm4\n\t"\
-                          :: "m"(sha256->buffer[0]):"%xmm4") ;\
+                          :: "m"(sha256->buffer[0]):"%xmm4");\
          __asm__ volatile("vmovdqu %0, %%xmm5\n\t"\
                           "vpshufb %%xmm13, %%xmm5, %%xmm5\n\t"\
-                          ::"m"(sha256->buffer[4]):"%xmm5") ;\
+                          ::"m"(sha256->buffer[4]):"%xmm5");\
          __asm__ volatile("vmovdqu %0, %%xmm6\n\t"\
                           "vpshufb %%xmm13, %%xmm6, %%xmm6\n\t"\
-                          ::"m"(sha256->buffer[8]):"%xmm6") ;\
+                          ::"m"(sha256->buffer[8]):"%xmm6");\
          __asm__ volatile("vmovdqu %0, %%xmm7\n\t"\
                           "vpshufb %%xmm13, %%xmm7, %%xmm7\n\t"\
-                          ::"m"(sha256->buffer[12]):"%xmm7") ;\
+                          ::"m"(sha256->buffer[12]):"%xmm7");\
 
 #define _SET_W_K_XFER(reg, i)\
-    __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs) ;\
-    __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs) ;
+    __asm__ volatile("vpaddd %0, %"#reg", %%xmm9"::"m"(K[i]):XMM_REGs);\
+    __asm__ volatile("vmovdqa %%xmm9, %0":"=m"(W_K[i])::XMM_REGs);
 
 #define SET_W_K_XFER(reg, i) _SET_W_K_XFER(reg, i)
 
-static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF } ; /* shuffle xBxA -> 00BA */
-static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 } ; /* shuffle xDxC -> DC00 */
-static const ALIGN32 word64 mBYTE_FLIP_MASK[] =  { 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
+static const ALIGN32 word64 mSHUF_00BA[] = { 0x0b0a090803020100, 0xFFFFFFFFFFFFFFFF }; /* shuffle xBxA -> 00BA */
+static const ALIGN32 word64 mSHUF_DC00[] = { 0xFFFFFFFFFFFFFFFF, 0x0b0a090803020100 }; /* shuffle xDxC -> DC00 */
+static const ALIGN32 word64 mBYTE_FLIP_MASK[] =  { 0x0405060700010203, 0x0c0d0e0f08090a0b };
 
 
 #define _Init_Masks(mask1, mask2, mask3)\
-__asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0])) ;\
-__asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0])) ;\
-__asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0])) ;
+__asm__ volatile("vmovdqu %0, %"#mask1 ::"m"(mBYTE_FLIP_MASK[0]));\
+__asm__ volatile("vmovdqu %0, %"#mask2 ::"m"(mSHUF_00BA[0]));\
+__asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0]));
 
 #define Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)\
     _Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00)
@@ -1008,77 +1085,77 @@ __asm__ volatile("vmovdqu %0, %"#mask3 ::"m"(mSHUF_DC00[0])) ;
 
 static int Transform_AVX1(Sha256* sha256)
 {
-    ALIGN32 word32 W_K[64] ;  /* temp for W+K */
+    ALIGN32 word32 W_K[64];  /* temp for W+K */
 
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
-    W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
+    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00);
+    W_K_from_buff; /* X0, X1, X2, X3 = W[0..15]; */
 
-    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-  
-    SET_W_K_XFER(X0, 0) ;
-    MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
-    SET_W_K_XFER(X1, 4) ;
+    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
+
+    SET_W_K_XFER(X0, 0);
+
+    MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
+    SET_W_K_XFER(X1, 4);
     MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
-    SET_W_K_XFER(X2, 8) ;
-    MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-    SET_W_K_XFER(X3, 12) ;
-    MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
-    SET_W_K_XFER(X0, 16) ;
-    MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-    SET_W_K_XFER(X1, 20) ;
-    MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
-    SET_W_K_XFER(X2, 24) ;
-    MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-    SET_W_K_XFER(X3, 28) ;
-    MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
-    SET_W_K_XFER(X0, 32) ;
-    MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-    SET_W_K_XFER(X1, 36) ;
-    MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
-    SET_W_K_XFER(X2, 40) ;
-    MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-    SET_W_K_XFER(X3, 44) ;
-    MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER, 
-            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4);
+    SET_W_K_XFER(X2, 8);
+    MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    SET_W_K_XFER(X3, 12);
+    MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12);
+    SET_W_K_XFER(X0, 16);
+    MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    SET_W_K_XFER(X1, 20);
+    MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20);
+    SET_W_K_XFER(X2, 24);
+    MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    SET_W_K_XFER(X3, 28);
+    MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28);
+    SET_W_K_XFER(X0, 32);
+    MessageSched(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    SET_W_K_XFER(X1, 36);
+    MessageSched(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36);
+    SET_W_K_XFER(X2, 40);
+    MessageSched(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    SET_W_K_XFER(X3, 44);
+    MessageSched(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, XFER,
+            SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44);
 
-    SET_W_K_XFER(X0, 48) ;
-    SET_W_K_XFER(X1, 52) ;
-    SET_W_K_XFER(X2, 56) ;
-    SET_W_K_XFER(X3, 60) ;
-    
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
+    SET_W_K_XFER(X0, 48);
+    SET_W_K_XFER(X1, 52);
+    SET_W_K_XFER(X2, 56);
+    SET_W_K_XFER(X3, 60);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;     
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-        
-    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;  
-        
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
+
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
+
+    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
     return 0;
 }
@@ -1086,128 +1163,126 @@ static int Transform_AVX1(Sha256* sha256)
 #if defined(HAVE_INTEL_RORX)
 static int Transform_AVX1_RORX(Sha256* sha256)
 {
-    ALIGN32 word32 W_K[64] ;  /* temp for W+K */
+    ALIGN32 word32 W_K[64];  /* temp for W+K */
 
-    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00) ;
-    W_K_from_buff ; /* X0, X1, X2, X3 = W[0..15] ; */
+    Init_Masks(BYTE_FLIP_MASK, SHUF_00BA, SHUF_DC00);
+    W_K_from_buff; /* X0, X1, X2, X3 = W[0..15]; */
 
-    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
-    SET_W_K_XFER(X0, 0) ;
-    MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
-    SET_W_K_XFER(X1, 4) ;
-    MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4) ;
-    SET_W_K_XFER(X2, 8) ;
-    MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-    SET_W_K_XFER(X3, 12) ;
-    MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12) ;
-    SET_W_K_XFER(X0, 16) ;
-    MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-    SET_W_K_XFER(X1, 20) ;
-    MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20) ;
-    SET_W_K_XFER(X2, 24) ;
-    MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-    SET_W_K_XFER(X3, 28) ;
-    MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28) ;
-    SET_W_K_XFER(X0, 32) ;
-    MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-    SET_W_K_XFER(X1, 36) ;
-    MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5, 
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36) ;
-    SET_W_K_XFER(X2, 40) ;
+    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
+    SET_W_K_XFER(X0, 0);
+    MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
+    SET_W_K_XFER(X1, 4);
+    MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,4);
+    SET_W_K_XFER(X2, 8);
     MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-    SET_W_K_XFER(X3, 44) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    SET_W_K_XFER(X3, 12);
     MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
-            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44) ;
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,12);
+    SET_W_K_XFER(X0, 16);
+    MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    SET_W_K_XFER(X1, 20);
+    MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,20);
+    SET_W_K_XFER(X2, 24);
+    MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    SET_W_K_XFER(X3, 28);
+    MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,28);
+    SET_W_K_XFER(X0, 32);
+    MessageSched_RORX(X0, X1, X2, X3, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    SET_W_K_XFER(X1, 36);
+    MessageSched_RORX(X1, X2, X3, X0, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,36);
+    SET_W_K_XFER(X2, 40);
+    MessageSched_RORX(X2, X3, X0, X1, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    SET_W_K_XFER(X3, 44);
+    MessageSched_RORX(X3, X0, X1, X2, XTMP0, XTMP1, XTMP2, XTMP3, XTMP4, XTMP5,
+            XFER, SHUF_00BA, SHUF_DC00, S_4,S_5,S_6,S_7,S_0,S_1,S_2,S_3,44);
 
-    SET_W_K_XFER(X0, 48) ;
-    SET_W_K_XFER(X1, 52) ;
-    SET_W_K_XFER(X2, 56) ;
-    SET_W_K_XFER(X3, 60) ;
-    
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
+    SET_W_K_XFER(X0, 48);
+    SET_W_K_XFER(X1, 52);
+    SET_W_K_XFER(X2, 56);
+    SET_W_K_XFER(X3, 60);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;     
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
 
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
-        
-    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;  
-        
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
+
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
+
+    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
     return 0;
 }
 #endif  /* HAVE_INTEL_RORX */
-
 #endif  /* HAVE_INTEL_AVX1 */
 
 
 #if defined(HAVE_INTEL_AVX2)
 
-#define _MOVE_to_REG(ymm, mem)       __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs) ;
-#define _MOVE_to_MEM(mem, ymm)       __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs) ;
+#define _MOVE_to_REG(ymm, mem)       __asm__ volatile("vmovdqu %0, %%"#ymm" ":: "m"(mem):YMM_REGs);
+#define _MOVE_to_MEM(mem, ymm)       __asm__ volatile("vmovdqu %%"#ymm", %0" : "=m"(mem)::YMM_REGs);
 #define _BYTE_SWAP(ymm, map)              __asm__ volatile("vpshufb %0, %%"#ymm", %%"#ymm"\n\t"\
-                                                       :: "m"(map):YMM_REGs) ;
+                                                       :: "m"(map):YMM_REGs);
 #define _MOVE_128(ymm0, ymm1, ymm2, map)   __asm__ volatile("vperm2i128  $"#map", %%"\
-                                  #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
+                                  #ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs);
 #define _MOVE_BYTE(ymm0, ymm1, map)  __asm__ volatile("vpshufb %0, %%"#ymm1", %%"\
-                                  #ymm0"\n\t":: "m"(map):YMM_REGs) ;
+                                  #ymm0"\n\t":: "m"(map):YMM_REGs);
 #define _S_TEMP(dest, src, bits, temp)    __asm__ volatile("vpsrld  $"#bits", %%"\
          #src", %%"#dest"\n\tvpslld  $32-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
-         #temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
+         #temp",%%"#dest", %%"#dest" ":::YMM_REGs);
 #define _AVX2_R(dest, src, bits)          __asm__ volatile("vpsrld  $"#bits", %%"\
-                                  #src", %%"#dest" ":::YMM_REGs) ;
+                                  #src", %%"#dest" ":::YMM_REGs);
 #define _XOR(dest, src1, src2)       __asm__ volatile("vpxor   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _OR(dest, src1, src2)       __asm__ volatile("vpor    %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _ADD(dest, src1, src2)       __asm__ volatile("vpaddd   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _ADD_MEM(dest, src1, mem)    __asm__ volatile("vpaddd   %0, %%"#src1", %%"\
-         #dest" "::"m"(mem):YMM_REGs) ;
+         #dest" "::"m"(mem):YMM_REGs);
 #define _BLEND(map, dest, src1, src2)    __asm__ volatile("vpblendd    $"#map", %%"\
-         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs) ;
+         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs);
 
-#define    _EXTRACT_XMM_0(xmm, mem)  __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_1(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_2(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_3(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
+#define    _EXTRACT_XMM_0(xmm, mem)  __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_1(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_2(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_3(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
 #define    _EXTRACT_XMM_4(ymm, xmm, mem)\
-      __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;\
-      __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_5(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_6(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
-#define    _EXTRACT_XMM_7(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs) ;
+      __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs);\
+      __asm__ volatile("vpextrd $0, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_5(xmm, mem)  __asm__ volatile("vpextrd $1, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_6(xmm, mem)  __asm__ volatile("vpextrd $2, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
+#define    _EXTRACT_XMM_7(xmm, mem)  __asm__ volatile("vpextrd $3, %%"#xmm", %0 ":"=r"(mem)::YMM_REGs);
 
-#define    _SWAP_YMM_HL(ymm)   __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs) ;
-#define     SWAP_YMM_HL(ymm)   _SWAP_YMM_HL(ymm) 
+#define    _SWAP_YMM_HL(ymm)   __asm__ volatile("vperm2i128 $0x1, %%"#ymm", %%"#ymm", %%"#ymm" ":::YMM_REGs);
+#define     SWAP_YMM_HL(ymm)   _SWAP_YMM_HL(ymm)
 
 #define MOVE_to_REG(ymm, mem)      _MOVE_to_REG(ymm, mem)
 #define MOVE_to_MEM(mem, ymm)      _MOVE_to_MEM(mem, ymm)
 #define BYTE_SWAP(ymm, map)        _BYTE_SWAP(ymm, map)
-#define MOVE_128(ymm0, ymm1, ymm2, map) _MOVE_128(ymm0, ymm1, ymm2, map) 
+#define MOVE_128(ymm0, ymm1, ymm2, map) _MOVE_128(ymm0, ymm1, ymm2, map)
 #define MOVE_BYTE(ymm0, ymm1, map) _MOVE_BYTE(ymm0, ymm1, map)
 #define XOR(dest, src1, src2)      _XOR(dest, src1, src2)
 #define OR(dest, src1, src2)       _OR(dest, src1, src2)
@@ -1215,31 +1290,31 @@ static int Transform_AVX1_RORX(Sha256* sha256)
 #define ADD_MEM(dest, src1, mem)  _ADD_MEM(dest, src1, mem)
 #define BLEND(map, dest, src1, src2) _BLEND(map, dest, src1, src2)
 
-#define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp); 
+#define S_TMP(dest, src, bits, temp) _S_TEMP(dest, src, bits, temp);
 #define AVX2_S(dest, src, bits)      S_TMP(dest, src, bits, S_TEMP)
 #define AVX2_R(dest, src, bits)      _AVX2_R(dest, src, bits)
 
 #define GAMMA0(dest, src)      AVX2_S(dest, src, 7);  AVX2_S(G_TEMP, src, 18); \
-    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3);  XOR(dest, G_TEMP, dest) ;
-#define GAMMA0_1(dest, src)    AVX2_S(dest, src, 7);  AVX2_S(G_TEMP, src, 18); 
-#define GAMMA0_2(dest, src)    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 3);  \
-    XOR(dest, G_TEMP, dest) ;
+    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 3);  XOR(dest, G_TEMP, dest);
+#define GAMMA0_1(dest, src)    AVX2_S(dest, src, 7);  AVX2_S(G_TEMP, src, 18);
+#define GAMMA0_2(dest, src)    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 3);  \
+    XOR(dest, G_TEMP, dest);
 
 #define GAMMA1(dest, src)      AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19); \
-    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest) ;
-#define GAMMA1_1(dest, src)    AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19); 
-#define GAMMA1_2(dest, src)    XOR(dest, G_TEMP, dest) ; AVX2_R(G_TEMP, src, 10); \
-    XOR(dest, G_TEMP, dest) ;
+    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 10); XOR(dest, G_TEMP, dest);
+#define GAMMA1_1(dest, src)    AVX2_S(dest, src, 17); AVX2_S(G_TEMP, src, 19);
+#define GAMMA1_2(dest, src)    XOR(dest, G_TEMP, dest); AVX2_R(G_TEMP, src, 10); \
+    XOR(dest, G_TEMP, dest);
 
-#define    FEEDBACK1_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]) ; \
-    BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2) ;
-#define    FEEDBACK2_to_W_I_2    MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ;  \
-    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]) ; BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2) ; 
-#define    FEEDBACK3_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]) ; \
-    BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2) ; 
+#define    FEEDBACK1_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP1toW_I_2[0]); \
+    BLEND(0x0c, W_I_2, YMM_TEMP0, W_I_2);
+#define    FEEDBACK2_to_W_I_2    MOVE_128(YMM_TEMP0, W_I, W_I, 0x08);  \
+    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAP2toW_I_2[0]); BLEND(0x30, W_I_2, YMM_TEMP0, W_I_2);
+#define    FEEDBACK3_to_W_I_2    MOVE_BYTE(YMM_TEMP0, W_I, mMAP3toW_I_2[0]); \
+    BLEND(0xc0, W_I_2, YMM_TEMP0, W_I_2);
 
-#define    FEEDBACK_to_W_I_7     MOVE_128(YMM_TEMP0, W_I, W_I, 0x08) ;\
-    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]) ; BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7) ;
+#define    FEEDBACK_to_W_I_7     MOVE_128(YMM_TEMP0, W_I, W_I, 0x08);\
+    MOVE_BYTE(YMM_TEMP0, YMM_TEMP0, mMAPtoW_I_7[0]); BLEND(0x80, W_I_7, YMM_TEMP0, W_I_7);
 
 #undef voitle
 
@@ -1261,69 +1336,69 @@ static int Transform_AVX1_RORX(Sha256* sha256)
 
 
 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
-    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
 
 #define MOVE_7_to_15(w_i_15, w_i_7)\
-    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs) ;\
+    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs);\
 
 #define MOVE_I_to_7(w_i_7, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs);\
 
 #define MOVE_I_to_2(w_i_2, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs);\
 
 #define ROTATE_W(w_i_16, w_i_15, w_i_7, w_i_2, w_i)\
-    MOVE_15_to_16(w_i_16, w_i_15, w_i_7) ; \
-    MOVE_7_to_15(w_i_15, w_i_7) ; \
-    MOVE_I_to_7(w_i_7, w_i) ; \
-    MOVE_I_to_2(w_i_2, w_i) ;\
+    MOVE_15_to_16(w_i_16, w_i_15, w_i_7); \
+    MOVE_7_to_15(w_i_15, w_i_7); \
+    MOVE_I_to_7(w_i_7, w_i); \
+    MOVE_I_to_2(w_i_2, w_i);\
 
 #define _RegToDigest(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-    { word32 d ;\
-    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs) ;\
+    { word32 d;\
+    __asm__ volatile("movl %"#S_0", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[0] += d;\
-    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_1", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[1] += d;\
-    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_2", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[2] += d;\
-    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_3", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[3] += d;\
-    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_4", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[4] += d;\
-    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_5", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[5] += d;\
-    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_6", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[6] += d;\
-    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs) ;\
+    __asm__ volatile("movl %"#S_7", %0":"=r"(d)::SSE_REGs);\
     sha256->digest[7] += d;\
 }
 
 #define _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
-  { word32 d[8] ;\
-    __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs) ;\
-    __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs) ;\
+  { word32 d[8];\
+    __asm__ volatile("movl %"#S_0", %0":"=r"(d[0])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_1", %0":"=r"(d[1])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_2", %0":"=r"(d[2])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_3", %0":"=r"(d[3])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_4", %0":"=r"(d[4])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_5", %0":"=r"(d[5])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_6", %0":"=r"(d[6])::SSE_REGs);\
+    __asm__ volatile("movl %"#S_7", %0":"=r"(d[7])::SSE_REGs);\
         printf("S[0..7]=%08x,%08x,%08x,%08x,%08x,%08x,%08x,%08x\n", d[0],d[1],d[2],d[3],d[4],d[5],d[6],d[7]);\
-    __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs) ;\
-    __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs) ;\
+    __asm__ volatile("movl %0, %"#S_0::"r"(d[0]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_1::"r"(d[1]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_2::"r"(d[2]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_3::"r"(d[3]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_4::"r"(d[4]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_5::"r"(d[5]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_6::"r"(d[6]):SSE_REGs);\
+    __asm__ volatile("movl %0, %"#S_7::"r"(d[7]):SSE_REGs);\
 }
 
 
@@ -1336,383 +1411,382 @@ static int Transform_AVX1_RORX(Sha256* sha256)
 #define DumS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )\
     _DumpS(S_0, S_1, S_2, S_3, S_4, S_5, S_6, S_7 )
 
-        
-    /* Byte swap Masks to ensure that rest of the words are filled with zero's. */
-    static const unsigned long mBYTE_FLIP_MASK_16[] =  
-        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
-    static const unsigned long mBYTE_FLIP_MASK_15[] =  
-        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b } ;
-    static const unsigned long mBYTE_FLIP_MASK_7 [] =  
-        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b } ;
-    static const unsigned long mBYTE_FLIP_MASK_2 [] =  
-        { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 } ;
 
-    static const unsigned long mMAPtoW_I_7[] =  
-        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 } ;
-    static const unsigned long mMAP1toW_I_2[] = 
-        { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 } ;
-    static const unsigned long mMAP2toW_I_2[] = 
-        { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 } ;
-    static const unsigned long mMAP3toW_I_2[] = 
-        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 } ;
- 
+    /* Byte swap Masks to ensure that rest of the words are filled with zero's. */
+    static const unsigned long mBYTE_FLIP_MASK_16[] =
+        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b };
+    static const unsigned long mBYTE_FLIP_MASK_15[] =
+        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x0c0d0e0f08090a0b };
+    static const unsigned long mBYTE_FLIP_MASK_7 [] =
+        { 0x0405060700010203, 0x0c0d0e0f08090a0b, 0x0405060700010203, 0x8080808008090a0b };
+    static const unsigned long mBYTE_FLIP_MASK_2 [] =
+        { 0x0405060700010203, 0x8080808080808080, 0x8080808080808080, 0x8080808080808080 };
+
+    static const unsigned long mMAPtoW_I_7[] =
+        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0302010080808080 };
+    static const unsigned long mMAP1toW_I_2[] =
+        { 0x8080808080808080, 0x0706050403020100, 0x8080808080808080, 0x8080808080808080 };
+    static const unsigned long mMAP2toW_I_2[] =
+        { 0x8080808080808080, 0x8080808080808080, 0x0f0e0d0c0b0a0908, 0x8080808080808080 };
+    static const unsigned long mMAP3toW_I_2[] =
+        { 0x8080808080808080, 0x8080808080808080, 0x8080808080808080, 0x0706050403020100 };
+
 static int Transform_AVX2(Sha256* sha256)
 {
+#ifdef WOLFSSL_SMALL_STACK
+    word32* W_K;
+    W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (W_K == NULL)
+        return MEMORY_E;
+#else
+    word32 W_K[64];
+#endif
 
-    #ifdef WOLFSSL_SMALL_STACK
-        word32* W_K;
-        W_K = (word32*) XMALLOC(sizeof(word32) * 64, NULL, DYNAMIC_TYPE_TMP_BUFFER);
-        if (W_K == NULL)
-            return MEMORY_E;
-    #else
-        word32 W_K[64]  ;
-    #endif
+    MOVE_to_REG(W_I_16, sha256->buffer[0]);     BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]);
+    MOVE_to_REG(W_I_15, sha256->buffer[1]);     BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]);
+    MOVE_to_REG(W_I,    sha256->buffer[8]);    BYTE_SWAP(W_I,    mBYTE_FLIP_MASK_16[0]);
+    MOVE_to_REG(W_I_7,  sha256->buffer[16-7]); BYTE_SWAP(W_I_7,  mBYTE_FLIP_MASK_7[0]);
+    MOVE_to_REG(W_I_2,  sha256->buffer[16-2]); BYTE_SWAP(W_I_2,  mBYTE_FLIP_MASK_2[0]);
 
-    MOVE_to_REG(W_I_16, sha256->buffer[0]);     BYTE_SWAP(W_I_16, mBYTE_FLIP_MASK_16[0]) ;
-    MOVE_to_REG(W_I_15, sha256->buffer[1]);     BYTE_SWAP(W_I_15, mBYTE_FLIP_MASK_15[0]) ;
-    MOVE_to_REG(W_I,    sha256->buffer[8]) ;    BYTE_SWAP(W_I,    mBYTE_FLIP_MASK_16[0]) ;
-    MOVE_to_REG(W_I_7,  sha256->buffer[16-7]) ; BYTE_SWAP(W_I_7,  mBYTE_FLIP_MASK_7[0])  ;
-    MOVE_to_REG(W_I_2,  sha256->buffer[16-2]) ; BYTE_SWAP(W_I_2,  mBYTE_FLIP_MASK_2[0])  ;
+    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
-    DigestToReg(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;
+    ADD_MEM(W_K_TEMP, W_I_16, K[0]);
+    MOVE_to_MEM(W_K[0], W_K_TEMP);
 
-    ADD_MEM(W_K_TEMP, W_I_16, K[0]) ;
-    MOVE_to_MEM(W_K[0], W_K_TEMP) ; 
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3);
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7);
 
-    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,0) ;
-    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,1) ;
-    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,2) ;
-    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,3) ;  
-    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,4) ;
-    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,5) ;
-    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,6) ;
-    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,7) ;
+    ADD_MEM(YMM_TEMP0, W_I, K[8]);
+    MOVE_to_MEM(W_K[8], YMM_TEMP0);
 
-    ADD_MEM(YMM_TEMP0, W_I, K[8]) ;
-    MOVE_to_MEM(W_K[8], YMM_TEMP0) ; 
+    /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
+    GAMMA1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
 
-        /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,8) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,9) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,10) ;
-        FEEDBACK_to_W_I_7 ; 
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,11) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,12) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,13) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
-        GAMMA1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,14) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
+    MOVE_to_REG(YMM_TEMP0, K[16]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[16], YMM_TEMP0);
 
-        MOVE_to_REG(YMM_TEMP0, K[16]) ;    
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,15) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[16], YMM_TEMP0) ;
+    /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
+    GAMMA1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
 
-        /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,16) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,17) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,18) ;
-        FEEDBACK_to_W_I_7 ; 
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
-        GAMMA1(YMM_TEMP0, W_I_2) ; 
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,19) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,20) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,21) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,22) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
+    MOVE_to_REG(YMM_TEMP0, K[24]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[24], YMM_TEMP0);
 
-        MOVE_to_REG(YMM_TEMP0, K[24]) ;    
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,23) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[24], YMM_TEMP0) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
+    GAMMA1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
 
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,24) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,25) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,26) ;
-        FEEDBACK_to_W_I_7 ; 
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,27) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,28) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,29) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
-        GAMMA1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,30) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
+    MOVE_to_REG(YMM_TEMP0, K[32]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[32], YMM_TEMP0);
 
-        MOVE_to_REG(YMM_TEMP0, K[32]) ;    
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,31) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[32], YMM_TEMP0) ;
 
-        
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ; 
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34) ;
-        FEEDBACK_to_W_I_7 ; 
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,32);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,33);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,34);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,35);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,36);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,37);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,38);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
 
-        MOVE_to_REG(YMM_TEMP0, K[40]) ;    
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[40], YMM_TEMP0) ;
+    MOVE_to_REG(YMM_TEMP0, K[40]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,39);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[40], YMM_TEMP0);
 
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42) ;
-        FEEDBACK_to_W_I_7 ; 
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,40);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,41);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,42);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,43);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,44);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,45);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,46);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
 
-        MOVE_to_REG(YMM_TEMP0, K[48]) ;    
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[48], YMM_TEMP0) ;
-        
-                /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
-                RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-        GAMMA0_1(W_I_TEMP, W_I_15) ;
-                RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-        GAMMA0_2(W_I_TEMP, W_I_15) ;
-                RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48) ;
-        ADD(W_I_TEMP, W_I_16, W_I_TEMP) ;/* for saving W_I before adding incomplete W_I_7 */
-                RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-        ADD(W_I, W_I_7, W_I_TEMP);
-                RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ; 
-                RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-        ADD(W_I, W_I, YMM_TEMP0) ;/* now W[16..17] are completed */
-                RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-        FEEDBACK1_to_W_I_2 ;
-                RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50) ;
-        FEEDBACK_to_W_I_7 ; 
-                RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-        ADD(W_I_TEMP, W_I_7, W_I_TEMP);
-                RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ;/* now W[16..19] are completed */
-                RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-        FEEDBACK2_to_W_I_2 ;
-                RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..21] are completed */
-                RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53) ;
-        FEEDBACK3_to_W_I_2 ;
-                RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-        GAMMA1_1(YMM_TEMP0, W_I_2) ;
-                RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-        GAMMA1_2(YMM_TEMP0, W_I_2) ;
-                RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54) ;
-        ADD(W_I, W_I_TEMP, YMM_TEMP0) ; /* now W[16..23] are completed */
-                RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
+    MOVE_to_REG(YMM_TEMP0, K[48]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,47);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[48], YMM_TEMP0);
 
-        MOVE_to_REG(YMM_TEMP0, K[56]) ;    
-                RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-        ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I) ;
-                RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55) ;
-        ADD(YMM_TEMP0, YMM_TEMP0, W_I) ;
-        MOVE_to_MEM(W_K[56], YMM_TEMP0) ;        
-        
-        RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56) ;
-        RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57) ;
-        RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58) ;
-        RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59) ;
+            /* W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15] + W[i-16]) */
+            RND_0_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    GAMMA0_1(W_I_TEMP, W_I_15);
+            RND_0_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    GAMMA0_2(W_I_TEMP, W_I_15);
+            RND_0_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,48);
+    ADD(W_I_TEMP, W_I_16, W_I_TEMP);/* for saving W_I before adding incomplete W_I_7 */
+            RND_7_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    ADD(W_I, W_I_7, W_I_TEMP);
+            RND_7_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_7_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,49);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_6_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    ADD(W_I, W_I, YMM_TEMP0);/* now W[16..17] are completed */
+            RND_6_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    FEEDBACK1_to_W_I_2;
+            RND_6_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,50);
+    FEEDBACK_to_W_I_7;
+            RND_5_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
+    ADD(W_I_TEMP, W_I_7, W_I_TEMP);
+            RND_5_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_5_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,51);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_4_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0);/* now W[16..19] are completed */
+            RND_4_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    FEEDBACK2_to_W_I_2;
+            RND_4_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,52);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_3_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_3_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..21] are completed */
+            RND_3_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,53);
+    FEEDBACK3_to_W_I_2;
+            RND_2_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    GAMMA1_1(YMM_TEMP0, W_I_2);
+            RND_2_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    GAMMA1_2(YMM_TEMP0, W_I_2);
+            RND_2_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,54);
+    ADD(W_I, W_I_TEMP, YMM_TEMP0); /* now W[16..23] are completed */
+            RND_1_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
 
-        RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60) ;
-        RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61) ;
-        RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62) ;
-        RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63) ;
+    MOVE_to_REG(YMM_TEMP0, K[56]);
+            RND_1_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
+    ROTATE_W(W_I_16, W_I_15, W_I_7, W_I_2, W_I);
+            RND_1_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,55);
+    ADD(YMM_TEMP0, YMM_TEMP0, W_I);
+    MOVE_to_MEM(W_K[56], YMM_TEMP0);
 
-    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7) ;  
+    RND_0(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,56);
+    RND_7(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,57);
+    RND_6(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,58);
+    RND_5(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,59);
+
+    RND_4(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,60);
+    RND_3(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,61);
+    RND_2(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,62);
+    RND_1(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7,63);
+
+    RegToDigest(S_0,S_1,S_2,S_3,S_4,S_5,S_6,S_7);
 
 #ifdef WOLFSSL_SMALL_STACK
     XFREE(W_K, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -1723,53 +1797,197 @@ static int Transform_AVX2(Sha256* sha256)
 
 #endif   /* HAVE_INTEL_AVX2 */
 
+
 #ifdef WOLFSSL_SHA224
-int wc_InitSha224(Sha224* sha224)
-{
-    sha224->digest[0] = 0xc1059ed8;
-    sha224->digest[1] = 0x367cd507;
-    sha224->digest[2] = 0x3070dd17;
-    sha224->digest[3] = 0xf70e5939;
-    sha224->digest[4] = 0xffc00b31;
-    sha224->digest[5] = 0x68581511;
-    sha224->digest[6] = 0x64f98fa7;
-    sha224->digest[7] = 0xbefa4fa4;
+    static int InitSha224(Sha224* sha224)
+    {
+        int ret = 0;
 
-    sha224->buffLen = 0;
-    sha224->loLen   = 0;
-    sha224->hiLen   = 0;
+        sha224->digest[0] = 0xc1059ed8;
+        sha224->digest[1] = 0x367cd507;
+        sha224->digest[2] = 0x3070dd17;
+        sha224->digest[3] = 0xf70e5939;
+        sha224->digest[4] = 0xffc00b31;
+        sha224->digest[5] = 0x68581511;
+        sha224->digest[6] = 0x64f98fa7;
+        sha224->digest[7] = 0xbefa4fa4;
 
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ;
-#endif
+        sha224->buffLen = 0;
+        sha224->loLen   = 0;
+        sha224->hiLen   = 0;
 
-    return 0;
-}
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+        /* choose best Transform function under this runtime environment */
+        set_Transform();
+    #endif
 
-int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
-{
-    return Sha256Update((Sha256 *)sha224, data, len);
-}
-
-
-int wc_Sha224Final(Sha224* sha224, byte* hash)
-{
-    int ret = Sha256Final((Sha256 *)sha224);
-    if (ret != 0)
         return ret;
+    }
+
+    int wc_InitSha224_ex(Sha224* sha224, void* heap, int devId)
+    {
+        int ret = 0;
+
+        if (sha224 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha224->heap = heap;
+
+        ret = InitSha224(sha224);
+        if (ret != 0)
+            return ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        ret = wolfAsync_DevCtxInit(&sha224->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA224, sha224->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+
+    int wc_InitSha224(Sha224* sha224)
+    {
+        return wc_InitSha224_ex(sha224, NULL, INVALID_DEVID);
+    }
+
+    int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len)
+    {
+        int ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha224(&sha224->asyncDev, NULL, data, len);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        ret = Sha256Update((Sha256 *)sha224, data, len);
+
+        return ret;
+    }
+
+    int wc_Sha224Final(Sha224* sha224, byte* hash)
+    {
+        int ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        if (sha224->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA224) {
+        #if defined(HAVE_INTEL_QA)
+            return IntelQaSymSha224(&sha224->asyncDev, hash, NULL,
+                                            SHA224_DIGEST_SIZE);
+        #endif
+        }
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        ret = Sha256Final((Sha256*)sha224);
+        if (ret != 0)
+            return ret;
 
     #if defined(LITTLE_ENDIAN_ORDER)
         ByteReverseWords(sha224->digest, sha224->digest, SHA224_DIGEST_SIZE);
     #endif
-    XMEMCPY(hash, sha224->digest, SHA224_DIGEST_SIZE);
+        XMEMCPY(hash, sha224->digest, SHA224_DIGEST_SIZE);
+
+        return InitSha224(sha224);  /* reset state */
+    }
+
+    void wc_Sha224Free(Sha224* sha224)
+    {
+        if (sha224 == NULL)
+            return;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA224)
+        wolfAsync_DevCtxFree(&sha224->asyncDev, WOLFSSL_ASYNC_MARKER_SHA224);
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+    }
 
-    return wc_InitSha224(sha224);  /* reset state */
-}
 #endif /* WOLFSSL_SHA224 */
 
-#endif   /* HAVE_FIPS */
 
-#endif   /* WOLFSSL_TI_HAHS */
+int wc_InitSha256(Sha256* sha256)
+{
+    return wc_InitSha256_ex(sha256, NULL, INVALID_DEVID);
+}
+
+void wc_Sha256Free(Sha256* sha256)
+{
+    if (sha256 == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA256)
+    wolfAsync_DevCtxFree(&sha256->asyncDev, WOLFSSL_ASYNC_MARKER_SHA256);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
+#endif /* !WOLFSSL_TI_HASH */
+#endif /* HAVE_FIPS */
+
+
+#ifndef WOLFSSL_TI_HASH
+#ifdef WOLFSSL_SHA224
+    int wc_Sha224GetHash(Sha224* sha224, byte* hash)
+    {
+        int ret;
+        Sha224 tmpSha224;
+
+        if (sha224 == NULL || hash == NULL)
+            return BAD_FUNC_ARG;
+
+        ret = wc_Sha224Copy(sha224, &tmpSha224);
+        if (ret == 0) {
+            ret = wc_Sha224Final(&tmpSha224, hash);
+        }
+        return ret;
+    }
+    int wc_Sha224Copy(Sha224* src, Sha224* dst)
+    {
+        int ret = 0;
+
+        if (src == NULL || dst == NULL)
+            return BAD_FUNC_ARG;
+
+        XMEMCPY(dst, src, sizeof(Sha224));
+
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+    #endif
+
+        return ret;
+    }
+#endif /* WOLFSSL_SHA224 */
+
+int wc_Sha256GetHash(Sha256* sha256, byte* hash)
+{
+    int ret;
+    Sha256 tmpSha256;
+
+    if (sha256 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha256Copy(sha256, &tmpSha256);
+    if (ret == 0) {
+        ret = wc_Sha256Final(&tmpSha256, hash);
+    }
+    return ret;
+}
+int wc_Sha256Copy(Sha256* src, Sha256* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha256));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+#endif /* !WOLFSSL_TI_HASH */
 
 #endif /* NO_SHA256 */
-
diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c
old mode 100644
new mode 100755
index dbf2cec2e..55b6d4587
--- a/wolfcrypt/src/sha512.c
+++ b/wolfcrypt/src/sha512.c
@@ -25,54 +25,68 @@
 #endif
 
 #include 
-#include 
 
 #ifdef WOLFSSL_SHA512
 
-#ifdef HAVE_FIPS
-int wc_InitSha512(Sha512* sha)
-{
-    return InitSha512_fips(sha);
-}
-
-
-int wc_Sha512Update(Sha512* sha, const byte* data, word32 len)
-{
-    return Sha512Update_fips(sha, data, len);
-}
-
-
-int wc_Sha512Final(Sha512* sha, byte* out)
-{
-    return Sha512Final_fips(sha, out);
-}
-
-
-#if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
-
-int wc_InitSha384(Sha384* sha)
-{
-    return InitSha384_fips(sha);
-}
-
-
-int wc_Sha384Update(Sha384* sha, const byte* data, word32 len)
-{
-    return Sha384Update_fips(sha, data, len);
-}
-
-
-int wc_Sha384Final(Sha384* sha, byte* out)
-{
-    return Sha384Final_fips(sha, out);
-}
-
-
-#endif /* WOLFSSL_SHA384 */
-#else /* else build without using fips */
-#include 
+#include 
 #include 
 
+/* fips wrapper calls, user can call direct */
+#ifdef HAVE_FIPS
+    int wc_InitSha512(Sha512* sha)
+    {
+        return InitSha512_fips(sha);
+    }
+    int wc_InitSha512_ex(Sha512* sha, void* heap, int devId)
+    {
+        (void)heap;
+        (void)devId;
+        return InitSha512_fips(sha);
+    }
+    int wc_Sha512Update(Sha512* sha, const byte* data, word32 len)
+    {
+        return Sha512Update_fips(sha, data, len);
+    }
+    int wc_Sha512Final(Sha512* sha, byte* out)
+    {
+        return Sha512Final_fips(sha, out);
+    }
+    void wc_Sha512Free(Sha512* sha)
+    {
+        (void)sha;
+        /* Not supported in FIPS */
+    }
+
+    #if defined(WOLFSSL_SHA384) || defined(HAVE_AESGCM)
+        int wc_InitSha384(Sha384* sha)
+        {
+            return InitSha384_fips(sha);
+        }
+        int wc_InitSha384_ex(Sha384* sha, void* heap, int devId)
+        {
+            (void)heap;
+            (void)devId;
+            return InitSha384_fips(sha);
+        }
+        int wc_Sha384Update(Sha384* sha, const byte* data, word32 len)
+        {
+            return Sha384Update_fips(sha, data, len);
+        }
+        int wc_Sha384Final(Sha384* sha, byte* out)
+        {
+            return Sha384Final_fips(sha, out);
+        }
+        void wc_Sha384Free(Sha384* sha)
+        {
+            (void)sha;
+            /* Not supported in FIPS */
+        }
+    #endif /* WOLFSSL_SHA384 || HAVE_AESGCM */
+
+#else /* else build without using fips */
+
+#include 
+
 #ifdef NO_INLINE
     #include 
 #else
@@ -82,252 +96,54 @@ int wc_Sha384Final(Sha384* sha, byte* out)
 
 
 #if defined(USE_INTEL_SPEEDUP)
-  #define HAVE_INTEL_AVX1
-  #define HAVE_INTEL_AVX2
-#endif
-
-#if defined(HAVE_INTEL_AVX1)
-/* #define DEBUG_XMM  */
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-#define HAVE_INTEL_RORX
-/* #define DEBUG_YMM  */
-#endif
-
-/*****
-Intel AVX1/AVX2 Macro Control Structure
-
-#if defined(HAVE_INteL_SPEEDUP)
     #define HAVE_INTEL_AVX1
     #define HAVE_INTEL_AVX2
 #endif
 
-int InitSha512(Sha512* sha512) { 
-     Save/Recover XMM, YMM
-     ...
-
-     Check Intel AVX cpuid flags
-}
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-  Transform_AVX1() ; # Function prototype 
-  Transform_AVX2() ; #
-#endif
-
-  _Transform() {     # Native Transform Function body
-  
-  }
-  
-  int Sha512Update() { 
-     Save/Recover XMM, YMM
-     ...
-  }
-  
-  int Sha512Final() { 
-     Save/Recover XMM, YMM
-     ...
-  }
-
-
 #if defined(HAVE_INTEL_AVX1)
-   
-   XMM Instructions/INLINE asm Definitions
-
+    /* #define DEBUG_XMM  */
 #endif
 
 #if defined(HAVE_INTEL_AVX2)
-
-   YMM Instructions/INLINE asm Definitions
-
+    #define HAVE_INTEL_RORX
+    /* #define DEBUG_YMM  */
 #endif
 
-#if defnied(HAVE_INTEL_AVX1)
-  
-  int Transform_AVX1() {
-      Stitched Message Sched/Round
-  }
-
-#endif
-
-#if defnied(HAVE_INTEL_AVX2)
-  
-  int Transform_AVX2() {
-      Stitched Message Sched/Round
-  }
-#endif
-
-
-*/
-
-#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-
-
-/* Each platform needs to query info type 1 from cpuid to see if aesni is
- * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
- */
-
-#ifndef _MSC_VER
-    #define cpuid(reg, leaf, sub)\
-            __asm__ __volatile__ ("cpuid":\
-             "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
-             "a" (leaf), "c"(sub));
-
-    #define XASM_LINK(f) asm(f)
-#else
-
-    #include 
-    #define cpuid(a,b) __cpuid((int*)a,b)
-
-    #define XASM_LINK(f)
-
-#endif /* _MSC_VER */
-
-#define EAX 0
-#define EBX 1
-#define ECX 2 
-#define EDX 3
-    
-#define CPUID_AVX1   0x1
-#define CPUID_AVX2   0x2
-#define CPUID_RDRAND 0x4
-#define CPUID_RDSEED 0x8
-#define CPUID_BMI2   0x10   /* MULX, RORX */
-
-#define IS_INTEL_AVX1       (cpuid_flags&CPUID_AVX1)
-#define IS_INTEL_AVX2       (cpuid_flags&CPUID_AVX2)
-#define IS_INTEL_BMI2       (cpuid_flags&CPUID_BMI2)
-#define IS_INTEL_RDRAND     (cpuid_flags&CPUID_RDRAND)
-#define IS_INTEL_RDSEED     (cpuid_flags&CPUID_RDSEED)
-
-static word32 cpuid_check = 0 ;
-static word32 cpuid_flags = 0 ;
-
-static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
-    int got_intel_cpu=0;
-    unsigned int reg[5]; 
-    
-    reg[4] = '\0' ;
-    cpuid(reg, 0, 0);  
-    if(XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&  
-                XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&  
-                XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {  
-        got_intel_cpu = 1;  
-    }    
-    if (got_intel_cpu) {
-        cpuid(reg, leaf, sub);
-        return((reg[num]>>bit)&0x1) ;
-    }
-    return 0 ;
-}
-
-
-static int set_cpuid_flags() {
-    if(cpuid_check ==0) {
-        if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
-        if(cpuid_flag(7, 0, EBX, 5)){  cpuid_flags |= CPUID_AVX2 ; }
-        if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
-        if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;  } 
-        if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;  }
-		cpuid_check = 1 ;
-		return 0 ;
-    }
-    return 1 ;
-}
-
-
-/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */
-
-#if defined(HAVE_INTEL_AVX1)
-static int Transform_AVX1(Sha512 *sha512) ;
-#endif
-
-#if defined(HAVE_INTEL_AVX2)
-static int Transform_AVX2(Sha512 *sha512) ; 
-
-#if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
-static int Transform_AVX1_RORX(Sha512 *sha512) ;
-#endif
-
-#endif
-
-static int _Transform(Sha512 *sha512) ; 
-    
-static int (*Transform_p)(Sha512* sha512) = _Transform ;
-
-#define Transform(sha512) (*Transform_p)(sha512)
-
-static void set_Transform(void) {
-     if(set_cpuid_flags()) return ;
-
-#if defined(HAVE_INTEL_AVX2)
-     if(IS_INTEL_AVX2 && IS_INTEL_BMI2){ 
-         Transform_p = Transform_AVX1_RORX; return ; 
-         Transform_p = Transform_AVX2      ; 
-                  /* for avoiding warning,"not used" */
-     }
-#endif
-#if defined(HAVE_INTEL_AVX1)
-     Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; return ;
-#endif
-     Transform_p = _Transform ; return ;
-}
-
-#else
-   #define Transform(sha512) _Transform(sha512)
-#endif
-
-/* Dummy for saving MM_REGs on behalf of Transform */
-/* #if defined(HAVE_INTEL_AVX2)
- #define  SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
-   "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
-   "%ymm12","%ymm13","%ymm14","%ymm15")
-*/
-#if defined(HAVE_INTEL_AVX1)
-   #define  SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
-    "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
-#else
-#define  SAVE_XMM_YMM
-#endif
-
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-
-#include 
-
-#endif /* defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2) */
-
 
 #if defined(HAVE_INTEL_RORX)
-#define ROTR(func, bits, x) \
-word64 func(word64 x) {  word64 ret ;\
-    __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\
-    return ret ;\
-}
+    #define ROTR(func, bits, x) \
+    word64 func(word64 x) {  word64 ret ;\
+        __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\
+        return ret ;\
+    }
 
-static INLINE ROTR(rotrFixed64_28, 28, x)
-static INLINE ROTR(rotrFixed64_34, 34, x)
-static INLINE ROTR(rotrFixed64_39, 39, x)
-static INLINE ROTR(rotrFixed64_14, 14, x)
-static INLINE ROTR(rotrFixed64_18, 18, x)
-static INLINE ROTR(rotrFixed64_41, 41, x)
+    static INLINE ROTR(rotrFixed64_28, 28, x);
+    static INLINE ROTR(rotrFixed64_34, 34, x);
+    static INLINE ROTR(rotrFixed64_39, 39, x);
+    static INLINE ROTR(rotrFixed64_14, 14, x);
+    static INLINE ROTR(rotrFixed64_18, 18, x);
+    static INLINE ROTR(rotrFixed64_41, 41, x);
 
-#define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x))
-#define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x))
+    #define S0_RORX(x) (rotrFixed64_28(x)^rotrFixed64_34(x)^rotrFixed64_39(x))
+    #define S1_RORX(x) (rotrFixed64_14(x)^rotrFixed64_18(x)^rotrFixed64_41(x))
+#endif /* HAVE_INTEL_RORX */
+
+#if defined(HAVE_BYTEREVERSE64) && \
+        !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
+    #define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
+    #define ByteReverseWords64_1(buf, size) \
+        { unsigned int i ;\
+            for(i=0; i< size/sizeof(word64); i++){\
+                __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
+            }\
+        }
 #endif
 
-#if defined(HAVE_BYTEREVERSE64) && !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
-#define ByteReverseWords64(out, in, size) ByteReverseWords64_1(out, size)
-#define ByteReverseWords64_1(buf, size)\
- { unsigned int i ;\
-   for(i=0; i< size/sizeof(word64); i++){\
-       __asm__ volatile("bswapq %0":"+r"(buf[i])::) ;\
-   }\
-}
-#endif
-
-
-int wc_InitSha512(Sha512* sha512)
+static int InitSha512(Sha512* sha512)
 {
+    if (sha512 == NULL)
+        return BAD_FUNC_ARG;
+
     sha512->digest[0] = W64LIT(0x6a09e667f3bcc908);
     sha512->digest[1] = W64LIT(0xbb67ae8584caa73b);
     sha512->digest[2] = W64LIT(0x3c6ef372fe94f82b);
@@ -340,15 +156,233 @@ int wc_InitSha512(Sha512* sha512)
     sha512->buffLen = 0;
     sha512->loLen   = 0;
     sha512->hiLen   = 0;
-    
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ; /* choose best Transform function under this runtime environment */
-#endif
-    
-    return 0 ;
+
+    return 0;
 }
 
 
+/* Hardware Acceleration */
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+
+    /*****
+    Intel AVX1/AVX2 Macro Control Structure
+
+    #if defined(HAVE_INteL_SPEEDUP)
+        #define HAVE_INTEL_AVX1
+        #define HAVE_INTEL_AVX2
+    #endif
+
+    int InitSha512(Sha512* sha512) {
+         Save/Recover XMM, YMM
+         ...
+
+         Check Intel AVX cpuid flags
+    }
+
+    #if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
+      Transform_AVX1(); # Function prototype
+      Transform_AVX2(); #
+    #endif
+
+      _Transform() {     # Native Transform Function body
+
+      }
+
+      int Sha512Update() {
+         Save/Recover XMM, YMM
+         ...
+      }
+
+      int Sha512Final() {
+         Save/Recover XMM, YMM
+         ...
+      }
+
+
+    #if defined(HAVE_INTEL_AVX1)
+
+       XMM Instructions/INLINE asm Definitions
+
+    #endif
+
+    #if defined(HAVE_INTEL_AVX2)
+
+       YMM Instructions/INLINE asm Definitions
+
+    #endif
+
+    #if defnied(HAVE_INTEL_AVX1)
+
+      int Transform_AVX1() {
+          Stitched Message Sched/Round
+      }
+
+    #endif
+
+    #if defnied(HAVE_INTEL_AVX2)
+
+      int Transform_AVX2() {
+          Stitched Message Sched/Round
+      }
+    #endif
+
+    */
+
+
+    /* Each platform needs to query info type 1 from cpuid to see if aesni is
+     * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
+     */
+
+    #ifndef _MSC_VER
+        #define cpuid(reg, leaf, sub)\
+            __asm__ __volatile__ ("cpuid":\
+                "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
+                "a" (leaf), "c"(sub));
+
+        #define XASM_LINK(f) asm(f)
+    #else
+
+        #include 
+        #define cpuid(a,b) __cpuid((int*)a,b)
+
+        #define XASM_LINK(f)
+    #endif /* _MSC_VER */
+
+    #define EAX 0
+    #define EBX 1
+    #define ECX 2
+    #define EDX 3
+
+    #define CPUID_AVX1   0x1
+    #define CPUID_AVX2   0x2
+    #define CPUID_RDRAND 0x4
+    #define CPUID_RDSEED 0x8
+    #define CPUID_BMI2   0x10   /* MULX, RORX */
+
+    #define IS_INTEL_AVX1       (cpuid_flags & CPUID_AVX1)
+    #define IS_INTEL_AVX2       (cpuid_flags & CPUID_AVX2)
+    #define IS_INTEL_BMI2       (cpuid_flags & CPUID_BMI2)
+    #define IS_INTEL_RDRAND     (cpuid_flags & CPUID_RDRAND)
+    #define IS_INTEL_RDSEED     (cpuid_flags & CPUID_RDSEED)
+
+    static word32 cpuid_check = 0;
+    static word32 cpuid_flags = 0;
+
+    static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
+        int got_intel_cpu = 0;
+        unsigned int reg[5];
+
+        reg[4] = '\0';
+        cpuid(reg, 0, 0);
+        if (XMEMCMP((char *)&(reg[EBX]), "Genu", 4) == 0 &&
+            XMEMCMP((char *)&(reg[EDX]), "ineI", 4) == 0 &&
+            XMEMCMP((char *)&(reg[ECX]), "ntel", 4) == 0) {
+            got_intel_cpu = 1;
+        }
+        if (got_intel_cpu) {
+            cpuid(reg, leaf, sub);
+            return ((reg[num] >> bit) & 0x1);
+        }
+        return 0;
+    }
+
+
+    static int set_cpuid_flags() {
+        if(cpuid_check ==0) {
+            if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
+            if(cpuid_flag(7, 0, EBX, 5)){  cpuid_flags |= CPUID_AVX2 ; }
+            if(cpuid_flag(7, 0, EBX, 8)) { cpuid_flags |= CPUID_BMI2 ; }
+            if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;  }
+            if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;  }
+    		cpuid_check = 1 ;
+    		return 0 ;
+        }
+        return 1 ;
+    }
+
+
+    #if defined(HAVE_INTEL_AVX1)
+        static int Transform_AVX1(Sha512 *sha512);
+    #endif
+    #if defined(HAVE_INTEL_AVX2)
+        static int Transform_AVX2(Sha512 *sha512);
+        #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_RORX)
+            static int Transform_AVX1_RORX(Sha512 *sha512);
+        #endif
+    #endif
+    static int _Transform(Sha512 *sha512);
+    static int (*Transform_p)(Sha512* sha512) = _Transform;
+    #define Transform(sha512) (*Transform_p)(sha512)
+
+    /* Dummy for saving MM_REGs on behalf of Transform */
+    /* #if defined(HAVE_INTEL_AVX2)
+     #define SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
+       "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
+       "%ymm12","%ymm13","%ymm14","%ymm15")
+    */
+    #if defined(HAVE_INTEL_AVX1)
+        #define SAVE_XMM_YMM   __asm__ volatile("orq %%r8, %%r8":::\
+            "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
+    #endif
+
+
+    int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId)
+    {
+        int ret = InitSha512(sha512);
+
+        (void)heap;
+        (void)devId;
+
+        if (set_cpuid_flags())
+            return ret;
+
+    #if defined(HAVE_INTEL_AVX2)
+        if (IS_INTEL_AVX2 && IS_INTEL_BMI2) {
+            Transform_p = Transform_AVX1_RORX; return ret;
+            Transform_p = Transform_AVX2;
+                /* for avoiding warning,"not used" */
+        }
+    #endif
+    #if defined(HAVE_INTEL_AVX1)
+        Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform); return ret;
+    #endif
+        Transform_p = _Transform;
+
+        return ret;
+    }
+
+#else
+    #define Transform(sha512) _Transform(sha512)
+
+    int wc_InitSha512_ex(Sha512* sha512, void* heap, int devId)
+    {
+        int ret = 0;
+
+        if (sha512 == NULL)
+            return BAD_FUNC_ARG;
+
+        sha512->heap = heap;
+
+        ret = InitSha512(sha512);
+        if (ret != 0)
+            return ret;
+
+    #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+        ret = wolfAsync_DevCtxInit(&sha512->asyncDev,
+                            WOLFSSL_ASYNC_MARKER_SHA512, sha512->heap, devId);
+    #else
+        (void)devId;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+
+        return ret;
+    }
+
+#endif /* Hardware Acceleration */
+
+#ifndef SAVE_XMM_YMM
+    #define SAVE_XMM_YMM
+#endif
+
 static const word64 K512[80] = {
 	W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
 	W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
@@ -442,7 +476,7 @@ static int _Transform(Sha512* sha512)
     /* over twice as small, but 50% slower */
     /* 80 operations, not unrolled */
     for (j = 0; j < 80; j += 16) {
-        int m; 
+        int m;
         for (m = 0; m < 16; m++) { /* braces needed here for macros {} */
             R(m);
         }
@@ -489,9 +523,16 @@ static INLINE void AddLength(Sha512* sha512, word32 len)
 
 static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len)
 {
+    int ret = 0;
+
     /* do block size increments */
     byte* local = (byte*)sha512->buffer;
-    SAVE_XMM_YMM ; /* for Intel AVX */
+
+    /* check that internal buffLen is valid */
+    if (sha512->buffLen > SHA512_BLOCK_SIZE)
+        return BUFFER_E;
+
+    SAVE_XMM_YMM; /* for Intel AVX */
 
     while (len) {
         word32 add = min(len, SHA512_BLOCK_SIZE - sha512->buffLen);
@@ -502,27 +543,35 @@ static INLINE int Sha512Update(Sha512* sha512, const byte* data, word32 len)
         len          -= add;
 
         if (sha512->buffLen == SHA512_BLOCK_SIZE) {
-            int ret;
-            #if defined(LITTLE_ENDIAN_ORDER)
-                #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-                if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2) 
-                #endif
-                    ByteReverseWords64(sha512->buffer, sha512->buffer,
-                                   SHA512_BLOCK_SIZE);
-            #endif
+    #if defined(LITTLE_ENDIAN_ORDER)
+        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+            if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+        #endif
+                ByteReverseWords64(sha512->buffer, sha512->buffer,
+                               SHA512_BLOCK_SIZE);
+    #endif
             ret = Transform(sha512);
             if (ret != 0)
-                return ret;
+                break;
 
             AddLength(sha512, SHA512_BLOCK_SIZE);
             sha512->buffLen = 0;
         }
     }
-    return 0;
+
+    return ret;
 }
 
 int wc_Sha512Update(Sha512* sha512, const byte* data, word32 len)
 {
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha512(&sha512->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
     return Sha512Update(sha512, data, len);
 }
 
@@ -539,14 +588,15 @@ static INLINE int Sha512Final(Sha512* sha512)
 
     /* pad with zeros */
     if (sha512->buffLen > SHA512_PAD_SIZE) {
-        XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE -sha512->buffLen);
+        XMEMSET(&local[sha512->buffLen], 0, SHA512_BLOCK_SIZE - sha512->buffLen);
         sha512->buffLen += SHA512_BLOCK_SIZE - sha512->buffLen;
-        #if defined(LITTLE_ENDIAN_ORDER) 
-            #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-            if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-            #endif
+#if defined(LITTLE_ENDIAN_ORDER)
+    #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+        if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+    #endif
             ByteReverseWords64(sha512->buffer,sha512->buffer,SHA512_BLOCK_SIZE);
-        #endif
+
+#endif /* LITTLE_ENDIAN_ORDER */
         ret = Transform(sha512);
         if (ret != 0)
             return ret;
@@ -554,29 +604,29 @@ static INLINE int Sha512Final(Sha512* sha512)
         sha512->buffLen = 0;
     }
     XMEMSET(&local[sha512->buffLen], 0, SHA512_PAD_SIZE - sha512->buffLen);
-   
+
     /* put lengths in bits */
-    sha512->hiLen = (sha512->loLen >> (8*sizeof(sha512->loLen) - 3)) + 
-                 (sha512->hiLen << 3);
+    sha512->hiLen = (sha512->loLen >> (8 * sizeof(sha512->loLen) - 3)) +
+                                                         (sha512->hiLen << 3);
     sha512->loLen = sha512->loLen << 3;
 
     /* store lengths */
-    #if defined(LITTLE_ENDIAN_ORDER)
-        #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-        if(!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
-        #endif
+#if defined(LITTLE_ENDIAN_ORDER)
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    if (!IS_INTEL_AVX1 && !IS_INTEL_AVX2)
+#endif
         ByteReverseWords64(sha512->buffer, sha512->buffer, SHA512_PAD_SIZE);
-    #endif
+#endif
     /* ! length ordering dependent on digest endian type ! */
 
     sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2] = sha512->hiLen;
     sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 1] = sha512->loLen;
-    #if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
-    if(IS_INTEL_AVX1 || IS_INTEL_AVX2)
+#if defined(HAVE_INTEL_AVX1) || defined(HAVE_INTEL_AVX2)
+    if (IS_INTEL_AVX1 || IS_INTEL_AVX2)
         ByteReverseWords64(&(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
                            &(sha512->buffer[SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
                            SHA512_BLOCK_SIZE - SHA512_PAD_SIZE);
-    #endif
+#endif
     ret = Transform(sha512);
     if (ret != 0)
         return ret;
@@ -590,88 +640,113 @@ static INLINE int Sha512Final(Sha512* sha512)
 
 int wc_Sha512Final(Sha512* sha512, byte* hash)
 {
-    int ret = Sha512Final(sha512);
+    int ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    if (sha512->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA512) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha512(&sha512->asyncDev, hash, NULL,
+                                            SHA512_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    ret = Sha512Final(sha512);
     if (ret != 0)
         return ret;
 
     XMEMCPY(hash, sha512->digest, SHA512_DIGEST_SIZE);
 
-    return wc_InitSha512(sha512);  /* reset state */
+    return InitSha512(sha512);  /* reset state */
+}
+
+
+int wc_InitSha512(Sha512* sha512)
+{
+    return wc_InitSha512_ex(sha512, NULL, INVALID_DEVID);
+}
+
+void wc_Sha512Free(Sha512* sha512)
+{
+    if (sha512 == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA512)
+    wolfAsync_DevCtxFree(&sha512->asyncDev, WOLFSSL_ASYNC_MARKER_SHA512);
+#endif /* WOLFSSL_ASYNC_CRYPT */
 }
 
 
 #if defined(HAVE_INTEL_AVX1)
 
-#define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ;
+#define Rx_1(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i];
 #define Rx_2(i) d(i)+=h(i);
 #define Rx_3(i) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
 
 #if defined(HAVE_INTEL_RORX)
-#define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i] ;
-#define Rx_RORX_2(i) d(i)+=h(i);
-#define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i));
-#endif
 
-#endif
+    #define Rx_RORX_1(i) h(i)+=S1_RORX(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + W_X[i];
+    #define Rx_RORX_2(i) d(i)+=h(i);
+    #define Rx_RORX_3(i) h(i)+=S0_RORX(a(i))+Maj(a(i),b(i),c(i));
+#endif /* HAVE_INTEL_RORX */
 
-#if defined(HAVE_INTEL_AVX2) 
-#define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w ; 
+#endif /* HAVE_INTEL_AVX1 */
+
+#if defined(HAVE_INTEL_AVX2)
+#define Ry_1(i, w) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j] + w;
 #define Ry_2(i, w) d(i)+=h(i);
 #define Ry_3(i, w) h(i)+=S0(a(i))+Maj(a(i),b(i),c(i));
-#endif
+#endif /* HAVE_INTEL_AVX2 */
 
-#if defined(HAVE_INTEL_AVX1) /* INLINE Assember for Intel AVX1 instructions */
+/* INLINE Assember for Intel AVX1 instructions */
+#if defined(HAVE_INTEL_AVX1)
 #if defined(DEBUG_XMM)
+    #define SAVE_REG(i)     __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs);
+    #define RECV_REG(i)     __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs);
 
-#define SAVE_REG(i)     __asm__ volatile("vmovdqu %%xmm"#i", %0 \n\t":"=m"(reg[i][0])::XMM_REGs);
-#define RECV_REG(i)     __asm__ volatile("vmovdqu %0, %%xmm"#i" \n\t"::"m"(reg[i][0]):XMM_REGs);
-
-#define _DUMP_REG(REG, name)\
-    { word64 buf[16] ;word64 reg[16][2];int k ;\
-      SAVE_REG(0); SAVE_REG(1); SAVE_REG(2);  SAVE_REG(3);  SAVE_REG(4);  \
-      SAVE_REG(5);   SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\
-       SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \
-      __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\
-      printf(" "#name":\t") ; for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n") ; \
-      RECV_REG(0); RECV_REG(1); RECV_REG(2);  RECV_REG(3);  RECV_REG(4);\
-      RECV_REG(5);   RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\
-      RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\
-    }
-
-#define DUMP_REG(REG) _DUMP_REG(REG, #REG) 
-#define PRINTF(fmt, ...) 
+    #define _DUMP_REG(REG, name)\
+        { word64 buf[16];word64 reg[16][2];int k;\
+          SAVE_REG(0); SAVE_REG(1); SAVE_REG(2);  SAVE_REG(3);  SAVE_REG(4);  \
+          SAVE_REG(5);   SAVE_REG(6); SAVE_REG(7);SAVE_REG(8); SAVE_REG(9); SAVE_REG(10);\
+           SAVE_REG(11); SAVE_REG(12); SAVE_REG(13); SAVE_REG(14); SAVE_REG(15); \
+          __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::XMM_REGs);\
+          printf(" "#name":\t"); for(k=0; k<2; k++) printf("%016lx.", (word64)(buf[k])); printf("\n"); \
+          RECV_REG(0); RECV_REG(1); RECV_REG(2);  RECV_REG(3);  RECV_REG(4);\
+          RECV_REG(5);   RECV_REG(6); RECV_REG(7); RECV_REG(8); RECV_REG(9);\
+          RECV_REG(10); RECV_REG(11); RECV_REG(12); RECV_REG(13); RECV_REG(14); RECV_REG(15);\
+        }
 
+    #define DUMP_REG(REG) _DUMP_REG(REG, #REG)
+    #define PRINTF(fmt, ...)
 #else
-
-#define DUMP_REG(REG) 
-#define PRINTF(fmt, ...) 
-
-#endif
+    #define DUMP_REG(REG)
+    #define PRINTF(fmt, ...)
+#endif /* DEBUG_XMM */
 
 #define _MOVE_to_REG(xymm, mem)       __asm__ volatile("vmovdqu %0, %%"#xymm" "\
-        :: "m"(mem):XMM_REGs) ;
+        :: "m"(mem):XMM_REGs);
 #define _MOVE_to_MEM(mem,i, xymm)     __asm__ volatile("vmovdqu %%"#xymm", %0" :\
-         "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs) ;
+         "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::XMM_REGs);
 #define _MOVE(dest, src)              __asm__ volatile("vmovdqu %%"#src",  %%"\
-        #dest" ":::XMM_REGs) ;
+        #dest" ":::XMM_REGs);
 
 #define _S_TEMP(dest, src, bits, temp)  __asm__ volatile("vpsrlq  $"#bits", %%"\
         #src", %%"#dest"\n\tvpsllq  $64-"#bits", %%"#src", %%"#temp"\n\tvpor %%"\
-        #temp",%%"#dest", %%"#dest" ":::XMM_REGs) ;
+        #temp",%%"#dest", %%"#dest" ":::XMM_REGs);
 #define _AVX1_R(dest, src, bits)      __asm__ volatile("vpsrlq  $"#bits", %%"\
-        #src", %%"#dest" ":::XMM_REGs) ;
+        #src", %%"#dest" ":::XMM_REGs);
 #define _XOR(dest, src1, src2)        __asm__ volatile("vpxor   %%"#src1", %%"\
-        #src2", %%"#dest" ":::XMM_REGs) ;
+        #src2", %%"#dest" ":::XMM_REGs);
 #define _OR(dest, src1, src2)         __asm__ volatile("vpor    %%"#src1", %%"\
-        #src2", %%"#dest" ":::XMM_REGs) ;
+        #src2", %%"#dest" ":::XMM_REGs);
 #define _ADD(dest, src1, src2)        __asm__ volatile("vpaddq   %%"#src1", %%"\
-        #src2", %%"#dest" ":::XMM_REGs) ;
+        #src2", %%"#dest" ":::XMM_REGs);
 #define _ADD_MEM(dest, src1, mem)     __asm__ volatile("vpaddq   %0, %%"#src1", %%"\
-        #dest" "::"m"(mem):XMM_REGs) ;
+        #dest" "::"m"(mem):XMM_REGs);
 
 #define MOVE_to_REG(xymm, mem)      _MOVE_to_REG(xymm, mem)
 #define MOVE_to_MEM(mem, i, xymm)   _MOVE_to_MEM(mem, i, xymm)
-#define MOVE(dest, src)             _MOVE(dest, src)  
+#define MOVE(dest, src)             _MOVE(dest, src)
 
 #define XOR(dest, src1, src2)      _XOR(dest, src1, src2)
 #define OR(dest, src1, src2)       _OR(dest, src1, src2)
@@ -682,19 +757,19 @@ int wc_Sha512Final(Sha512* sha512, byte* hash)
 #define AVX1_R(dest, src, bits)      _AVX1_R(dest, src, bits)
 
 #define Init_Mask(mask) \
-     __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1") ;
-     
+     __asm__ volatile("vmovdqu %0, %%xmm1\n\t"::"m"(mask):"%xmm1");
+
 #define _W_from_buff1(w, buff, xmm) \
     /* X0..3(xmm4..7), W[0..15] = sha512->buffer[0.15];  */\
      __asm__ volatile("vmovdqu %1, %%"#xmm"\n\t"\
                       "vpshufb %%xmm1, %%"#xmm", %%"#xmm"\n\t"\
                       "vmovdqu %%"#xmm", %0"\
-                      :"=m"(w): "m"(buff):"%xmm0") ;
+                      :"=m"(w): "m"(buff):"%xmm0");
 
-#define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm) 
+#define W_from_buff1(w, buff, xmm) _W_from_buff1(w, buff, xmm)
 
 #define W_from_buff(w, buff)\
-     Init_Mask(mBYTE_FLIP_MASK[0]) ;\
+     Init_Mask(mBYTE_FLIP_MASK[0]);\
      W_from_buff1(w[0], buff[0], W_0);\
      W_from_buff1(w[2], buff[2], W_2);\
      W_from_buff1(w[4], buff[4], W_4);\
@@ -703,8 +778,8 @@ int wc_Sha512Final(Sha512* sha512, byte* hash)
      W_from_buff1(w[10],buff[10],W_10);\
      W_from_buff1(w[12],buff[12],W_12);\
      W_from_buff1(w[14],buff[14],W_14);
-                          
-static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
+
+static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f };
 
 #define W_I_15  xmm14
 #define W_I_7   xmm11
@@ -725,193 +800,189 @@ static word64 mBYTE_FLIP_MASK[] =  { 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
 
 #define XMM_REGs
 
-#define s0_1(dest, src)      AVX1_S(dest, src, 1); 
-#define s0_2(dest, src)      AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest) ; 
-#define s0_3(dest, src)      AVX1_R(G_TEMP, src, 7);  XOR(dest, G_TEMP, dest) ;
+#define s0_1(dest, src)      AVX1_S(dest, src, 1);
+#define s0_2(dest, src)      AVX1_S(G_TEMP, src, 8); XOR(dest, G_TEMP, dest);
+#define s0_3(dest, src)      AVX1_R(G_TEMP, src, 7);  XOR(dest, G_TEMP, dest);
 
 #define s1_1(dest, src)      AVX1_S(dest, src, 19);
-#define s1_2(dest, src)      AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest) ; 
-#define s1_3(dest, src)      AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest) ;
+#define s1_2(dest, src)      AVX1_S(G_TEMP, src, 61); XOR(dest, G_TEMP, dest);
+#define s1_3(dest, src)      AVX1_R(G_TEMP, src, 6); XOR(dest, G_TEMP, dest);
+
+#define s0_(dest, src)       s0_1(dest, src); s0_2(dest, src); s0_3(dest, src)
+#define s1_(dest, src)       s1_1(dest, src); s1_2(dest, src); s1_3(dest, src)
 
-#define s0_(dest, src)       s0_1(dest, src) ; s0_2(dest, src) ; s0_3(dest, src)
-#define s1_(dest, src)       s1_1(dest, src) ; s1_2(dest, src) ; s1_3(dest, src)
-        
 #define Block_xx_1(i) \
-    MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
-    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]) ;\
-        
+    MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\
+    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]);\
+
 #define Block_xx_2(i) \
-    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]) ;\
-    MOVE_to_REG(W_I,    W_X[(i)]) ;\
-        
+    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]);\
+    MOVE_to_REG(W_I,    W_X[(i)]);\
+
 #define Block_xx_3(i) \
-    s0_ (XMM_TEMP0, W_I_15) ;\
-        
+    s0_ (XMM_TEMP0, W_I_15);\
+
 #define Block_xx_4(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    ADD(W_I, W_I, W_I_7) ;\
-        
+    ADD(W_I, W_I, XMM_TEMP0);\
+    ADD(W_I, W_I, W_I_7);\
+
 #define Block_xx_5(i) \
-    s1_ (XMM_TEMP0, W_I_2) ;\
-    
+    s1_ (XMM_TEMP0, W_I_2);\
+
 #define Block_xx_6(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    MOVE_to_MEM(W_X,i, W_I) ;\
-    if(i==0)\
-        MOVE_to_MEM(W_X,16, W_I) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    MOVE_to_MEM(W_X,i, W_I);\
+    if (i==0)\
+        MOVE_to_MEM(W_X,16, W_I);\
 
 #define Block_xx_7(i) \
-    MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
-    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]) ;\
-            
+    MOVE_to_REG(W_I_15, W_X[(i-15)&15]);\
+    MOVE_to_REG(W_I_7,  W_X[(i- 7)&15]);\
+
 #define Block_xx_8(i) \
-    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]) ;\
-    MOVE_to_REG(W_I,    W_X[(i)]) ;\
+    MOVE_to_REG(W_I_2,  W_X[(i- 2)&15]);\
+    MOVE_to_REG(W_I,    W_X[(i)]);\
 
 #define Block_xx_9(i) \
-    s0_ (XMM_TEMP0, W_I_15) ;\
+    s0_ (XMM_TEMP0, W_I_15);\
 
 #define Block_xx_10(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    ADD(W_I, W_I, W_I_7) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    ADD(W_I, W_I, W_I_7);\
 
 #define Block_xx_11(i) \
-    s1_ (XMM_TEMP0, W_I_2) ;\
+    s1_ (XMM_TEMP0, W_I_2);\
 
 #define Block_xx_12(i) \
-    ADD(W_I, W_I, XMM_TEMP0) ;\
-    MOVE_to_MEM(W_X,i, W_I) ;\
-    if((i)==0)\
-        MOVE_to_MEM(W_X,16, W_I) ;\
+    ADD(W_I, W_I, XMM_TEMP0);\
+    MOVE_to_MEM(W_X,i, W_I);\
+    if ((i)==0)\
+        MOVE_to_MEM(W_X,16, W_I);\
 
-static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0) ; }
-static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0) ; }
-static INLINE void Block_0_3(void) { Block_xx_3(0) ; }
-static INLINE void Block_0_4(void) { Block_xx_4(0) ; }
-static INLINE void Block_0_5(void) { Block_xx_5(0) ; }
-static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0) ; }
-static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2) ; }
-static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2) ; }
-static INLINE void Block_0_9(void) { Block_xx_9(2) ; }
-static INLINE void Block_0_10(void){ Block_xx_10(2) ; }
-static INLINE void Block_0_11(void){ Block_xx_11(2) ; }
-static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2) ; }
+static INLINE void Block_0_1(word64 *W_X) { Block_xx_1(0); }
+static INLINE void Block_0_2(word64 *W_X) { Block_xx_2(0); }
+static INLINE void Block_0_3(void) { Block_xx_3(0); }
+static INLINE void Block_0_4(void) { Block_xx_4(0); }
+static INLINE void Block_0_5(void) { Block_xx_5(0); }
+static INLINE void Block_0_6(word64 *W_X) { Block_xx_6(0); }
+static INLINE void Block_0_7(word64 *W_X) { Block_xx_7(2); }
+static INLINE void Block_0_8(word64 *W_X) { Block_xx_8(2); }
+static INLINE void Block_0_9(void) { Block_xx_9(2); }
+static INLINE void Block_0_10(void){ Block_xx_10(2); }
+static INLINE void Block_0_11(void){ Block_xx_11(2); }
+static INLINE void Block_0_12(word64 *W_X){ Block_xx_12(2); }
 
-static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4) ; }
-static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4) ; }
-static INLINE void Block_4_3(void) { Block_xx_3(4) ; }
-static INLINE void Block_4_4(void) { Block_xx_4(4) ; }
-static INLINE void Block_4_5(void) { Block_xx_5(4) ; }
-static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4) ; }
-static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6) ; }
-static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6) ; }
-static INLINE void Block_4_9(void) { Block_xx_9(6) ; }
-static INLINE void Block_4_10(void){ Block_xx_10(6) ; }
-static INLINE void Block_4_11(void){ Block_xx_11(6) ; }
-static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6) ; }
+static INLINE void Block_4_1(word64 *W_X) { Block_xx_1(4); }
+static INLINE void Block_4_2(word64 *W_X) { Block_xx_2(4); }
+static INLINE void Block_4_3(void) { Block_xx_3(4); }
+static INLINE void Block_4_4(void) { Block_xx_4(4); }
+static INLINE void Block_4_5(void) { Block_xx_5(4); }
+static INLINE void Block_4_6(word64 *W_X) { Block_xx_6(4); }
+static INLINE void Block_4_7(word64 *W_X) { Block_xx_7(6); }
+static INLINE void Block_4_8(word64 *W_X) { Block_xx_8(6); }
+static INLINE void Block_4_9(void) { Block_xx_9(6); }
+static INLINE void Block_4_10(void){ Block_xx_10(6); }
+static INLINE void Block_4_11(void){ Block_xx_11(6); }
+static INLINE void Block_4_12(word64 *W_X){ Block_xx_12(6); }
 
-static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8) ; }
-static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8) ; }
-static INLINE void Block_8_3(void) { Block_xx_3(8) ; }
-static INLINE void Block_8_4(void) { Block_xx_4(8) ; }
-static INLINE void Block_8_5(void) { Block_xx_5(8) ; }
-static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8) ; }
-static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10) ; }
-static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10) ; }
-static INLINE void Block_8_9(void) { Block_xx_9(10) ; }
-static INLINE void Block_8_10(void){ Block_xx_10(10) ; }
-static INLINE void Block_8_11(void){ Block_xx_11(10) ; }
-static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10) ; }
+static INLINE void Block_8_1(word64 *W_X) { Block_xx_1(8); }
+static INLINE void Block_8_2(word64 *W_X) { Block_xx_2(8); }
+static INLINE void Block_8_3(void) { Block_xx_3(8); }
+static INLINE void Block_8_4(void) { Block_xx_4(8); }
+static INLINE void Block_8_5(void) { Block_xx_5(8); }
+static INLINE void Block_8_6(word64 *W_X) { Block_xx_6(8); }
+static INLINE void Block_8_7(word64 *W_X) { Block_xx_7(10); }
+static INLINE void Block_8_8(word64 *W_X) { Block_xx_8(10); }
+static INLINE void Block_8_9(void) { Block_xx_9(10); }
+static INLINE void Block_8_10(void){ Block_xx_10(10); }
+static INLINE void Block_8_11(void){ Block_xx_11(10); }
+static INLINE void Block_8_12(word64 *W_X){ Block_xx_12(10); }
 
-static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12) ; }
-static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12) ; }
-static INLINE void Block_12_3(void) { Block_xx_3(12) ; }
-static INLINE void Block_12_4(void) { Block_xx_4(12) ; }
-static INLINE void Block_12_5(void) { Block_xx_5(12) ; }
-static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12) ; }
-static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14) ; }
-static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14) ; }
-static INLINE void Block_12_9(void) { Block_xx_9(14) ; }
-static INLINE void Block_12_10(void){ Block_xx_10(14) ; }
-static INLINE void Block_12_11(void){ Block_xx_11(14) ; }
-static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14) ; }
+static INLINE void Block_12_1(word64 *W_X) { Block_xx_1(12); }
+static INLINE void Block_12_2(word64 *W_X) { Block_xx_2(12); }
+static INLINE void Block_12_3(void) { Block_xx_3(12); }
+static INLINE void Block_12_4(void) { Block_xx_4(12); }
+static INLINE void Block_12_5(void) { Block_xx_5(12); }
+static INLINE void Block_12_6(word64 *W_X) { Block_xx_6(12); }
+static INLINE void Block_12_7(word64 *W_X) { Block_xx_7(14); }
+static INLINE void Block_12_8(word64 *W_X) { Block_xx_8(14); }
+static INLINE void Block_12_9(void) { Block_xx_9(14); }
+static INLINE void Block_12_10(void){ Block_xx_10(14); }
+static INLINE void Block_12_11(void){ Block_xx_11(14); }
+static INLINE void Block_12_12(word64 *W_X){ Block_xx_12(14); }
 
-#endif
+#endif /* HAVE_INTEL_AVX1 */
 
 #if defined(HAVE_INTEL_AVX2)
 static const unsigned long mBYTE_FLIP_MASK_Y[] =
-   { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
+   { 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f };
 
 #define W_from_buff_Y(buff)\
     { /* X0..3(ymm9..12), W_X[0..15] = sha512->buffer[0.15];  */\
-     __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs) ;\
+     __asm__ volatile("vmovdqu %0, %%ymm8\n\t"::"m"(mBYTE_FLIP_MASK_Y[0]):YMM_REGs);\
      __asm__ volatile("vmovdqu %0, %%ymm12\n\t"\
                       "vmovdqu %1, %%ymm4\n\t"\
                       "vpshufb %%ymm8, %%ymm12, %%ymm12\n\t"\
                       "vpshufb %%ymm8, %%ymm4, %%ymm4\n\t"\
-                      :: "m"(buff[0]),  "m"(buff[4]):YMM_REGs) ;\
+                      :: "m"(buff[0]),  "m"(buff[4]):YMM_REGs);\
      __asm__ volatile("vmovdqu %0, %%ymm5\n\t"\
                       "vmovdqu %1, %%ymm6\n\t"\
                       "vpshufb %%ymm8, %%ymm5, %%ymm5\n\t"\
                       "vpshufb %%ymm8, %%ymm6, %%ymm6\n\t"\
-                      :: "m"(buff[8]),  "m"(buff[12]):YMM_REGs) ;\
+                      :: "m"(buff[8]),  "m"(buff[12]):YMM_REGs);\
     }
 
 #if defined(DEBUG_YMM)
+    #define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs);
+    #define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs);
 
-#define SAVE_REG_Y(i) __asm__ volatile("vmovdqu %%ymm"#i", %0 \n\t":"=m"(reg[i-4][0])::YMM_REGs);
-#define RECV_REG_Y(i) __asm__ volatile("vmovdqu %0, %%ymm"#i" \n\t"::"m"(reg[i-4][0]):YMM_REGs);
-
-#define _DUMP_REG_Y(REG, name)\
-    { word64 buf[16] ;word64 reg[16][2];int k ;\
-      SAVE_REG_Y(4);  SAVE_REG_Y(5);   SAVE_REG_Y(6); SAVE_REG_Y(7); \
-      SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\
-      SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \
-      __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\
-      printf(" "#name":\t") ; for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]) ; printf("\n") ; \
-      RECV_REG_Y(4);  RECV_REG_Y(5);   RECV_REG_Y(6); RECV_REG_Y(7); \
-      RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \
-      RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\
-    }
-
-#define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG) 
-#define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG) 
-#define PRINTF_Y(fmt, ...) 
+    #define _DUMP_REG_Y(REG, name)\
+        { word64 buf[16];word64 reg[16][2];int k;\
+          SAVE_REG_Y(4);  SAVE_REG_Y(5);   SAVE_REG_Y(6); SAVE_REG_Y(7); \
+          SAVE_REG_Y(8); SAVE_REG_Y(9); SAVE_REG_Y(10); SAVE_REG_Y(11); SAVE_REG_Y(12);\
+          SAVE_REG_Y(13); SAVE_REG_Y(14); SAVE_REG_Y(15); \
+          __asm__ volatile("vmovdqu %%"#REG", %0 \n\t":"=m"(buf[0])::YMM_REGs);\
+          printf(" "#name":\t"); for(k=0; k<4; k++) printf("%016lx.", (word64)buf[k]); printf("\n"); \
+          RECV_REG_Y(4);  RECV_REG_Y(5);   RECV_REG_Y(6); RECV_REG_Y(7); \
+          RECV_REG_Y(8); RECV_REG_Y(9); RECV_REG_Y(10); RECV_REG_Y(11); RECV_REG_Y(12); \
+          RECV_REG_Y(13); RECV_REG_Y(14); RECV_REG_Y(15);\
+        }
 
+    #define DUMP_REG_Y(REG) _DUMP_REG_Y(REG, #REG)
+    #define DUMP_REG2_Y(REG) _DUMP_REG_Y(REG, #REG)
+    #define PRINTF_Y(fmt, ...)
 #else
-
-#define DUMP_REG_Y(REG) 
-#define DUMP_REG2_Y(REG)
-#define PRINTF_Y(fmt, ...) 
-
-#endif
+    #define DUMP_REG_Y(REG)
+    #define DUMP_REG2_Y(REG)
+    #define PRINTF_Y(fmt, ...)
+#endif /* DEBUG_YMM */
 
 #define _MOVE_to_REGy(ymm, mem)         __asm__ volatile("vmovdqu %0, %%"#ymm" "\
-                                        :: "m"(mem):YMM_REGs) ;
+                                        :: "m"(mem):YMM_REGs);
 #define _MOVE_to_MEMy(mem,i, ymm)       __asm__ volatile("vmovdqu %%"#ymm", %0" \
-        : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs) ;
+        : "=m"(mem[i]),"=m"(mem[i+1]),"=m"(mem[i+2]),"=m"(mem[i+3])::YMM_REGs);
 #define _MOVE_128y(ymm0, ymm1, ymm2, map)  __asm__ volatile("vperm2i128  $"\
-        #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs) ;
+        #map", %%"#ymm2", %%"#ymm1", %%"#ymm0" ":::YMM_REGs);
 #define _S_TEMPy(dest, src, bits, temp) \
          __asm__ volatile("vpsrlq  $"#bits", %%"#src", %%"#dest"\n\tvpsllq  $64-"#bits\
-        ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs) ;
+        ", %%"#src", %%"#temp"\n\tvpor %%"#temp",%%"#dest", %%"#dest" ":::YMM_REGs);
 #define _AVX2_R(dest, src, bits)        __asm__ volatile("vpsrlq  $"#bits", %%"\
-         #src", %%"#dest" ":::YMM_REGs) ;
+         #src", %%"#dest" ":::YMM_REGs);
 #define _XORy(dest, src1, src2)         __asm__ volatile("vpxor   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _ADDy(dest, src1, src2)         __asm__ volatile("vpaddq   %%"#src1", %%"\
-         #src2", %%"#dest" ":::YMM_REGs) ;
+         #src2", %%"#dest" ":::YMM_REGs);
 #define _BLENDy(map, dest, src1, src2)  __asm__ volatile("vpblendd    $"#map", %%"\
-         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs) ;
+         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs);
 #define _BLENDQy(map, dest, src1, src2) __asm__ volatile("vblendpd   $"#map", %%"\
-         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs) ;
+         #src1",   %%"#src2", %%"#dest" ":::YMM_REGs);
 #define _PERMQy(map, dest, src)         __asm__ volatile("vpermq  $"#map", %%"\
-         #src", %%"#dest" ":::YMM_REGs) ;
+         #src", %%"#dest" ":::YMM_REGs);
 
 #define MOVE_to_REGy(ymm, mem)      _MOVE_to_REGy(ymm, mem)
 #define MOVE_to_MEMy(mem, i, ymm)   _MOVE_to_MEMy(mem, i, ymm)
 
-#define MOVE_128y(ymm0, ymm1, ymm2, map) _MOVE_128y(ymm0, ymm1, ymm2, map) 
+#define MOVE_128y(ymm0, ymm1, ymm2, map) _MOVE_128y(ymm0, ymm1, ymm2, map)
 #define XORy(dest, src1, src2)      _XORy(dest, src1, src2)
 #define ADDy(dest, src1, src2)      _ADDy(dest, src1, src2)
 #define BLENDy(map, dest, src1, src2) _BLENDy(map, dest, src1, src2)
@@ -924,15 +995,15 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] =
 #define AVX2_R(dest, src, bits)      _AVX2_R(dest, src, bits)
 
 
-#define    FEEDBACK1_to_W_I_2(w_i_2, w_i)    MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08) ;\
-                                       BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2) ; 
+#define    FEEDBACK1_to_W_I_2(w_i_2, w_i)    MOVE_128y(YMM_TEMP0, w_i, w_i, 0x08);\
+                                       BLENDy(0xf0, w_i_2, YMM_TEMP0, w_i_2);
 
-#define    MOVE_W_to_W_I_15(w_i_15, w_0, w_4)  BLENDQy(0x1, w_i_15, w_4, w_0) ;\
-                                       PERMQy(0x39, w_i_15, w_i_15) ;
-#define    MOVE_W_to_W_I_7(w_i_7,  w_8, w_12)  BLENDQy(0x1, w_i_7, w_12, w_8) ;\
-                                       PERMQy(0x39, w_i_7, w_i_7) ; 
-#define    MOVE_W_to_W_I_2(w_i_2,  w_12)       BLENDQy(0xc, w_i_2, w_12, w_i_2) ;\
-                                       PERMQy(0x0e, w_i_2, w_i_2) ;
+#define    MOVE_W_to_W_I_15(w_i_15, w_0, w_4)  BLENDQy(0x1, w_i_15, w_4, w_0);\
+                                       PERMQy(0x39, w_i_15, w_i_15);
+#define    MOVE_W_to_W_I_7(w_i_7,  w_8, w_12)  BLENDQy(0x1, w_i_7, w_12, w_8);\
+                                       PERMQy(0x39, w_i_7, w_i_7);
+#define    MOVE_W_to_W_I_2(w_i_2,  w_12)       BLENDQy(0xc, w_i_2, w_12, w_i_2);\
+                                       PERMQy(0x0e, w_i_2, w_i_2);
 
 
 #define W_I_16y  ymm8
@@ -957,64 +1028,63 @@ static const unsigned long mBYTE_FLIP_MASK_Y[] =
                  /* "%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15"*/
 
 #define MOVE_15_to_16(w_i_16, w_i_15, w_i_7)\
-    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128  $0x01, %%"#w_i_15", %%"#w_i_15", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x08, %%"#w_i_15", %%"#w_i_7", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vperm2i128 $0x01,  %%"#w_i_7",  %%"#w_i_7", %%"#w_i_15" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x80, %%"#w_i_15", %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x93,  %%"#w_i_16", %%"#w_i_16" ":::YMM_REGs);\
 
 #define MOVE_7_to_15(w_i_15, w_i_7)\
-    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs) ;\
+    __asm__ volatile("vmovdqu                 %%"#w_i_7",  %%"#w_i_15" ":::YMM_REGs);\
 
 #define MOVE_I_to_7(w_i_7, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i",   %%"#w_i",   %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpblendd    $0x01,       %%"#w_i_7",   %%"#w_i", %%"#w_i_7" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x39, %%"#w_i_7", %%"#w_i_7" ":::YMM_REGs);\
 
 #define MOVE_I_to_2(w_i_2, w_i)\
-    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs) ;\
-    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs) ;\
+    __asm__ volatile("vperm2i128 $0x01,       %%"#w_i", %%"#w_i", %%"#w_i_2" ":::YMM_REGs);\
+    __asm__ volatile("vpshufd    $0x0e, %%"#w_i_2", %%"#w_i_2" ":::YMM_REGs);\
 
-#endif
+#endif /* HAVE_INTEL_AVX2 */
 
 
 /***  Transform Body ***/
 #if defined(HAVE_INTEL_AVX1)
-
 static int Transform_AVX1(Sha512* sha512)
 {
     const word64* K = K512;
-    word64 W_X[16+4];
+    word64 W_X[16+4] = {0};
     word32 j;
     word64 T[8];
+
     /* Copy digest to working vars */
     XMEMCPY(T, sha512->digest, sizeof(T));
 
-    W_from_buff(W_X, sha512->buffer) ;
+    W_from_buff(W_X, sha512->buffer);
     for (j = 0; j < 80; j += 16) {
-        Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3(); 
-        Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X); 
+        Rx_1( 0); Block_0_1(W_X); Rx_2( 0); Block_0_2(W_X); Rx_3( 0); Block_0_3();
+        Rx_1( 1); Block_0_4(); Rx_2( 1); Block_0_5(); Rx_3( 1); Block_0_6(W_X);
         Rx_1( 2); Block_0_7(W_X); Rx_2( 2); Block_0_8(W_X); Rx_3( 2); Block_0_9();
-        Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X);   
-        
-        Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3(); 
-        Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X); 
+        Rx_1( 3); Block_0_10();Rx_2( 3); Block_0_11();Rx_3( 3); Block_0_12(W_X);
+
+        Rx_1( 4); Block_4_1(W_X); Rx_2( 4); Block_4_2(W_X); Rx_3( 4); Block_4_3();
+        Rx_1( 5); Block_4_4(); Rx_2( 5); Block_4_5(); Rx_3( 5); Block_4_6(W_X);
         Rx_1( 6); Block_4_7(W_X); Rx_2( 6); Block_4_8(W_X); Rx_3( 6); Block_4_9();
-        Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X);   
-        
-        Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3(); 
-        Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X); 
+        Rx_1( 7); Block_4_10();Rx_2( 7); Block_4_11();Rx_3( 7); Block_4_12(W_X);
+
+        Rx_1( 8); Block_8_1(W_X); Rx_2( 8); Block_8_2(W_X); Rx_3( 8); Block_8_3();
+        Rx_1( 9); Block_8_4(); Rx_2( 9); Block_8_5(); Rx_3( 9); Block_8_6(W_X);
         Rx_1(10); Block_8_7(W_X); Rx_2(10); Block_8_8(W_X); Rx_3(10); Block_8_9();
-        Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X);   
-        
-        Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3(); 
-        Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X); 
+        Rx_1(11); Block_8_10();Rx_2(11); Block_8_11();Rx_3(11); Block_8_12(W_X);
+
+        Rx_1(12); Block_12_1(W_X); Rx_2(12); Block_12_2(W_X); Rx_3(12); Block_12_3();
+        Rx_1(13); Block_12_4(); Rx_2(13); Block_12_5(); Rx_3(13); Block_12_6(W_X);
         Rx_1(14); Block_12_7(W_X); Rx_2(14); Block_12_8(W_X); Rx_3(14); Block_12_9();
-        Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X);     
+        Rx_1(15); Block_12_10();Rx_2(15); Block_12_11();Rx_3(15); Block_12_12(W_X);
     }
 
     /* Add the working vars back into digest */
-
     sha512->digest[0] += a(0);
     sha512->digest[1] += b(0);
     sha512->digest[2] += c(0);
@@ -1025,67 +1095,66 @@ static int Transform_AVX1(Sha512* sha512)
     sha512->digest[7] += h(0);
 
     /* Wipe variables */
-    #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
+#if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
     XMEMSET(W_X, 0, sizeof(word64) * 16);
-    #endif
+#endif
     XMEMSET(T, 0, sizeof(T));
 
     return 0;
 }
-
-#endif
+#endif /* HAVE_INTEL_AVX1 */
 
 #if defined(HAVE_INTEL_AVX2) && defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX)
-
 static int Transform_AVX1_RORX(Sha512* sha512)
 {
     const word64* K = K512;
-    word64 W_X[16+4];
+    word64 W_X[16+4] = {0};
     word32 j;
     word64 T[8];
+
     /* Copy digest to working vars */
     XMEMCPY(T, sha512->digest, sizeof(T));
 
-    W_from_buff(W_X, sha512->buffer) ;
+    W_from_buff(W_X, sha512->buffer);
     for (j = 0; j < 80; j += 16) {
-        Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X); 
-                                    Rx_RORX_3( 0); Block_0_3(); 
-        Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); Block_0_5(); 
-                                    Rx_RORX_3( 1); Block_0_6(W_X); 
-        Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); Block_0_8(W_X); 
+        Rx_RORX_1( 0); Block_0_1(W_X); Rx_RORX_2( 0); Block_0_2(W_X);
+                                    Rx_RORX_3( 0); Block_0_3();
+        Rx_RORX_1( 1); Block_0_4(); Rx_RORX_2( 1); Block_0_5();
+                                    Rx_RORX_3( 1); Block_0_6(W_X);
+        Rx_RORX_1( 2); Block_0_7(W_X); Rx_RORX_2( 2); Block_0_8(W_X);
                                     Rx_RORX_3( 2); Block_0_9();
         Rx_RORX_1( 3); Block_0_10();Rx_RORX_2( 3); Block_0_11();
-                                    Rx_RORX_3( 3); Block_0_12(W_X);   
-        
-        Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); Block_4_2(W_X); 
-                                    Rx_RORX_3( 4); Block_4_3(); 
-        Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); Block_4_5(); 
-                                    Rx_RORX_3( 5); Block_4_6(W_X); 
-        Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); Block_4_8(W_X); 
+                                    Rx_RORX_3( 3); Block_0_12(W_X);
+
+        Rx_RORX_1( 4); Block_4_1(W_X); Rx_RORX_2( 4); Block_4_2(W_X);
+                                    Rx_RORX_3( 4); Block_4_3();
+        Rx_RORX_1( 5); Block_4_4(); Rx_RORX_2( 5); Block_4_5();
+                                    Rx_RORX_3( 5); Block_4_6(W_X);
+        Rx_RORX_1( 6); Block_4_7(W_X); Rx_RORX_2( 6); Block_4_8(W_X);
                                     Rx_RORX_3( 6); Block_4_9();
         Rx_RORX_1( 7); Block_4_10();Rx_RORX_2( 7); Block_4_11();
-                                    Rx_RORX_3( 7); Block_4_12(W_X);   
-        
-        Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); Block_8_2(W_X); 
-                                    Rx_RORX_3( 8); Block_8_3(); 
-        Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); Block_8_5(); 
-                                    Rx_RORX_3( 9); Block_8_6(W_X); 
-        Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); Block_8_8(W_X); 
+                                    Rx_RORX_3( 7); Block_4_12(W_X);
+
+        Rx_RORX_1( 8); Block_8_1(W_X); Rx_RORX_2( 8); Block_8_2(W_X);
+                                    Rx_RORX_3( 8); Block_8_3();
+        Rx_RORX_1( 9); Block_8_4(); Rx_RORX_2( 9); Block_8_5();
+                                    Rx_RORX_3( 9); Block_8_6(W_X);
+        Rx_RORX_1(10); Block_8_7(W_X); Rx_RORX_2(10); Block_8_8(W_X);
                                     Rx_RORX_3(10); Block_8_9();
         Rx_RORX_1(11); Block_8_10();Rx_RORX_2(11); Block_8_11();
-                                    Rx_RORX_3(11); Block_8_12(W_X);   
-        
-        Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); Block_12_2(W_X); 
-                                     Rx_RORX_3(12); Block_12_3(); 
-        Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); Block_12_5(); 
-                                     Rx_RORX_3(13); Block_12_6(W_X); 
-        Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); Block_12_8(W_X); 
+                                    Rx_RORX_3(11); Block_8_12(W_X);
+
+        Rx_RORX_1(12); Block_12_1(W_X); Rx_RORX_2(12); Block_12_2(W_X);
+                                     Rx_RORX_3(12); Block_12_3();
+        Rx_RORX_1(13); Block_12_4(); Rx_RORX_2(13); Block_12_5();
+                                     Rx_RORX_3(13); Block_12_6(W_X);
+        Rx_RORX_1(14); Block_12_7(W_X); Rx_RORX_2(14); Block_12_8(W_X);
                                      Rx_RORX_3(14); Block_12_9();
         Rx_RORX_1(15); Block_12_10();Rx_RORX_2(15); Block_12_11();
-                                     Rx_RORX_3(15); Block_12_12(W_X);     
+                                     Rx_RORX_3(15); Block_12_12(W_X);
     }
-    /* Add the working vars back into digest */
 
+    /* Add the working vars back into digest */
     sha512->digest[0] += a(0);
     sha512->digest[1] += b(0);
     sha512->digest[2] += c(0);
@@ -1096,176 +1165,176 @@ static int Transform_AVX1_RORX(Sha512* sha512)
     sha512->digest[7] += h(0);
 
     /* Wipe variables */
-    #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
+#if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
     XMEMSET(W_X, 0, sizeof(word64) * 16);
-    #endif
+#endif
     XMEMSET(T, 0, sizeof(T));
 
     return 0;
 }
-#endif
+#endif /* HAVE_INTEL_AVX2 && HAVE_INTEL_AVX1 && HAVE_INTEL_RORX */
 
 #if defined(HAVE_INTEL_AVX2)
 
-#define s0_1y(dest, src)      AVX2_S(dest, src, 1); 
-#define s0_2y(dest, src)      AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest) ; 
-#define s0_3y(dest, src)      AVX2_R(G_TEMPy, src, 7);  XORy(dest, G_TEMPy, dest) ;
+#define s0_1y(dest, src)      AVX2_S(dest, src, 1);
+#define s0_2y(dest, src)      AVX2_S(G_TEMPy, src, 8); XORy(dest, G_TEMPy, dest);
+#define s0_3y(dest, src)      AVX2_R(G_TEMPy, src, 7);  XORy(dest, G_TEMPy, dest);
 
 #define s1_1y(dest, src)      AVX2_S(dest, src, 19);
-#define s1_2y(dest, src)      AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest) ; 
-#define s1_3y(dest, src)      AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest) ;
+#define s1_2y(dest, src)      AVX2_S(G_TEMPy, src, 61); XORy(dest, G_TEMPy, dest);
+#define s1_3y(dest, src)      AVX2_R(G_TEMPy, src, 6); XORy(dest, G_TEMPy, dest);
 
-#define s0_y(dest, src)       s0_1y(dest, src) ; s0_2y(dest, src) ; s0_3y(dest, src)
-#define s1_y(dest, src)       s1_1y(dest, src) ; s1_2y(dest, src) ; s1_3y(dest, src)
+#define s0_y(dest, src)       s0_1y(dest, src); s0_2y(dest, src); s0_3y(dest, src)
+#define s1_y(dest, src)       s1_1y(dest, src); s1_2y(dest, src); s1_3y(dest, src)
 
 
 #define Block_Y_xx_1(i, w_0, w_4, w_8, w_12)\
-    MOVE_W_to_W_I_15(W_I_15y, w_0, w_4) ;\
-    MOVE_W_to_W_I_7 (W_I_7y,  w_8, w_12) ;\
-    MOVE_W_to_W_I_2 (W_I_2y,  w_12) ;\
+    MOVE_W_to_W_I_15(W_I_15y, w_0, w_4);\
+    MOVE_W_to_W_I_7 (W_I_7y,  w_8, w_12);\
+    MOVE_W_to_W_I_2 (W_I_2y,  w_12);\
 
 #define Block_Y_xx_2(i, w_0, w_4, w_8, w_12)\
-    s0_1y (YMM_TEMP0, W_I_15y) ;\
+    s0_1y (YMM_TEMP0, W_I_15y);\
 
 #define Block_Y_xx_3(i, w_0, w_4, w_8, w_12)\
-    s0_2y (YMM_TEMP0, W_I_15y) ;\
+    s0_2y (YMM_TEMP0, W_I_15y);\
 
 #define Block_Y_xx_4(i, w_0, w_4, w_8, w_12)\
-    s0_3y (YMM_TEMP0, W_I_15y) ;\
+    s0_3y (YMM_TEMP0, W_I_15y);\
 
 #define Block_Y_xx_5(i, w_0, w_4, w_8, w_12)\
-    ADDy(W_I_TEMPy, w_0, YMM_TEMP0) ;\
+    ADDy(W_I_TEMPy, w_0, YMM_TEMP0);\
 
 #define Block_Y_xx_6(i, w_0, w_4, w_8, w_12)\
-    ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y) ;\
-    s1_1y (YMM_TEMP0, W_I_2y) ;\
+    ADDy(W_I_TEMPy, W_I_TEMPy, W_I_7y);\
+    s1_1y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_7(i, w_0, w_4, w_8, w_12)\
-    s1_2y (YMM_TEMP0, W_I_2y) ;\
+    s1_2y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_8(i, w_0, w_4, w_8, w_12)\
-    s1_3y (YMM_TEMP0, W_I_2y) ;\
-    ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\
+    s1_3y (YMM_TEMP0, W_I_2y);\
+    ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\
 
 #define Block_Y_xx_9(i, w_0, w_4, w_8, w_12)\
-    FEEDBACK1_to_W_I_2(W_I_2y, w_0) ;\
+    FEEDBACK1_to_W_I_2(W_I_2y, w_0);\
 
 #define Block_Y_xx_10(i, w_0, w_4, w_8, w_12) \
-    s1_1y (YMM_TEMP0, W_I_2y) ;\
+    s1_1y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_11(i, w_0, w_4, w_8, w_12) \
-    s1_2y (YMM_TEMP0, W_I_2y) ;\
+    s1_2y (YMM_TEMP0, W_I_2y);\
 
 #define Block_Y_xx_12(i, w_0, w_4, w_8, w_12)\
-    s1_3y (YMM_TEMP0, W_I_2y) ;\
-    ADDy(w_0, W_I_TEMPy, YMM_TEMP0) ;\
-    MOVE_to_MEMy(w,0, w_4) ;\
+    s1_3y (YMM_TEMP0, W_I_2y);\
+    ADDy(w_0, W_I_TEMPy, YMM_TEMP0);\
+    MOVE_to_MEMy(w,0, w_4);\
 
 
-static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y) ; }
-static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y) ; }
+static INLINE void Block_Y_0_1(void) { Block_Y_xx_1(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_2(void) { Block_Y_xx_2(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_3(void) { Block_Y_xx_3(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_4(void) { Block_Y_xx_4(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_5(void) { Block_Y_xx_5(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_6(void) { Block_Y_xx_6(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_7(void) { Block_Y_xx_7(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_8(void) { Block_Y_xx_8(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_9(void) { Block_Y_xx_9(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_10(void){ Block_Y_xx_10(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_11(void){ Block_Y_xx_11(0, W_0y, W_4y, W_8y, W_12y); }
+static INLINE void Block_Y_0_12(word64 *w){ Block_Y_xx_12(0, W_0y, W_4y, W_8y, W_12y); }
 
-static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y) ; }
-static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y) ; }
+static INLINE void Block_Y_4_1(void) { Block_Y_xx_1(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_2(void) { Block_Y_xx_2(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_3(void) { Block_Y_xx_3(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_4(void) { Block_Y_xx_4(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_5(void) { Block_Y_xx_5(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_6(void) { Block_Y_xx_6(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_7(void) { Block_Y_xx_7(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_8(void) { Block_Y_xx_8(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_9(void) { Block_Y_xx_9(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_10(void) { Block_Y_xx_10(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_11(void) { Block_Y_xx_11(4, W_4y, W_8y, W_12y, W_0y); }
+static INLINE void Block_Y_4_12(word64 *w) { Block_Y_xx_12(4, W_4y, W_8y, W_12y, W_0y); }
 
-static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y) ; }
-static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y) ; }
+static INLINE void Block_Y_8_1(void) { Block_Y_xx_1(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_2(void) { Block_Y_xx_2(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_3(void) { Block_Y_xx_3(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_4(void) { Block_Y_xx_4(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_5(void) { Block_Y_xx_5(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_6(void) { Block_Y_xx_6(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_7(void) { Block_Y_xx_7(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_8(void) { Block_Y_xx_8(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_9(void) { Block_Y_xx_9(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_10(void) { Block_Y_xx_10(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_11(void) { Block_Y_xx_11(8, W_8y, W_12y, W_0y, W_4y); }
+static INLINE void Block_Y_8_12(word64 *w) { Block_Y_xx_12(8, W_8y, W_12y, W_0y, W_4y); }
 
-static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y) ; }
-static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y) ; }
+static INLINE void Block_Y_12_1(void) { Block_Y_xx_1(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_2(void) { Block_Y_xx_2(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_3(void) { Block_Y_xx_3(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_4(void) { Block_Y_xx_4(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_5(void) { Block_Y_xx_5(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_6(void) { Block_Y_xx_6(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_7(void) { Block_Y_xx_7(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_8(void) { Block_Y_xx_8(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_9(void) { Block_Y_xx_9(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_10(void) { Block_Y_xx_10(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_11(void) { Block_Y_xx_11(12, W_12y, W_0y, W_4y, W_8y); }
+static INLINE void Block_Y_12_12(word64 *w) { Block_Y_xx_12(12, W_12y, W_0y, W_4y, W_8y); }
 
 
 static int Transform_AVX2(Sha512* sha512)
 {
     const word64* K = K512;
-    word64 w[4] ;
-    word32 j /*, k*/;
+    word64 w[4];
+    word32 j;
     word64 T[8];
+
     /* Copy digest to working vars */
     XMEMCPY(T, sha512->digest, sizeof(T));
 
-    W_from_buff_Y(sha512->buffer) ;
-    MOVE_to_MEMy(w,0, W_0y) ; 
+    W_from_buff_Y(sha512->buffer);
+    MOVE_to_MEMy(w,0, W_0y);
     for (j = 0; j < 80; j += 16) {
-        Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2(); 
-                                       Ry_3( 0, w[0]); Block_Y_0_3(); 
-        Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); Block_Y_0_5(); 
-                                       Ry_3( 1, w[1]); Block_Y_0_6();  
-        Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); Block_Y_0_8(); 
+        Ry_1( 0, w[0]); Block_Y_0_1(); Ry_2( 0, w[0]); Block_Y_0_2();
+                                       Ry_3( 0, w[0]); Block_Y_0_3();
+        Ry_1( 1, w[1]); Block_Y_0_4(); Ry_2( 1, w[1]); Block_Y_0_5();
+                                       Ry_3( 1, w[1]); Block_Y_0_6();
+        Ry_1( 2, w[2]); Block_Y_0_7(); Ry_2( 2, w[2]); Block_Y_0_8();
                                        Ry_3( 2, w[2]); Block_Y_0_9();
         Ry_1( 3, w[3]); Block_Y_0_10();Ry_2( 3, w[3]); Block_Y_0_11();
                                        Ry_3( 3, w[3]); Block_Y_0_12(w);
-        
-        Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); Block_Y_4_2(); 
-                                       Ry_3( 4, w[0]); Block_Y_4_3(); 
-        Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); Block_Y_4_5(); 
+
+        Ry_1( 4, w[0]); Block_Y_4_1(); Ry_2( 4, w[0]); Block_Y_4_2();
+                                       Ry_3( 4, w[0]); Block_Y_4_3();
+        Ry_1( 5, w[1]); Block_Y_4_4(); Ry_2( 5, w[1]); Block_Y_4_5();
                                        Ry_3( 5, w[1]); Block_Y_4_6();
-        Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); Block_Y_4_8(); 
+        Ry_1( 6, w[2]); Block_Y_4_7(); Ry_2( 6, w[2]); Block_Y_4_8();
                                        Ry_3( 6, w[2]); Block_Y_4_9();
-        Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);Block_Y_4_11(); 
-                                        Ry_3( 7, w[3]);Block_Y_4_12(w);  
-        
-        Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); Block_Y_8_2(); 
+        Ry_1( 7, w[3]); Block_Y_4_10(); Ry_2( 7, w[3]);Block_Y_4_11();
+                                        Ry_3( 7, w[3]);Block_Y_4_12(w);
+
+        Ry_1( 8, w[0]); Block_Y_8_1(); Ry_2( 8, w[0]); Block_Y_8_2();
                                        Ry_3( 8, w[0]); Block_Y_8_3();
-        Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); Block_Y_8_5(); 
+        Ry_1( 9, w[1]); Block_Y_8_4(); Ry_2( 9, w[1]); Block_Y_8_5();
                                        Ry_3( 9, w[1]); Block_Y_8_6();
-        Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); Block_Y_8_8(); 
-                                       Ry_3(10, w[2]); Block_Y_8_9(); 
+        Ry_1(10, w[2]); Block_Y_8_7(); Ry_2(10, w[2]); Block_Y_8_8();
+                                       Ry_3(10, w[2]); Block_Y_8_9();
         Ry_1(11, w[3]); Block_Y_8_10();Ry_2(11, w[3]); Block_Y_8_11();
                                        Ry_3(11, w[3]); Block_Y_8_12(w);
-                 
-        Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); Block_Y_12_2(); 
+
+        Ry_1(12, w[0]); Block_Y_12_1(); Ry_2(12, w[0]); Block_Y_12_2();
                                         Ry_3(12, w[0]); Block_Y_12_3();
-        Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); Block_Y_12_5(); 
-                                        Ry_3(13, w[1]); Block_Y_12_6(); 
-        Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); Block_Y_12_8(); 
+        Ry_1(13, w[1]); Block_Y_12_4(); Ry_2(13, w[1]); Block_Y_12_5();
+                                        Ry_3(13, w[1]); Block_Y_12_6();
+        Ry_1(14, w[2]); Block_Y_12_7(); Ry_2(14, w[2]); Block_Y_12_8();
                                         Ry_3(14, w[2]); Block_Y_12_9();
         Ry_1(15, w[3]); Block_Y_12_10();Ry_2(15, w[3]); Block_Y_12_11();
                                         Ry_3(15, w[3]);Block_Y_12_12(w);
     }
- 
-    /* Add the working vars back into digest */
 
+    /* Add the working vars back into digest */
     sha512->digest[0] += a(0);
     sha512->digest[1] += b(0);
     sha512->digest[2] += c(0);
@@ -1276,19 +1345,22 @@ static int Transform_AVX2(Sha512* sha512)
     sha512->digest[7] += h(0);
 
     /* Wipe variables */
-    #if !defined(HAVE_INTEL_AVX1)&&!defined(HAVE_INTEL_AVX2)
+#if !defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
     XMEMSET(W, 0, sizeof(word64) * 16);
-    #endif
+#endif
     XMEMSET(T, 0, sizeof(T));
 
     return 0;
 }
-
-#endif
+#endif /* HAVE_INTEL_AVX2 */
 
 
+
+/* -------------------------------------------------------------------------- */
+/* SHA384 */
+/* -------------------------------------------------------------------------- */
 #ifdef WOLFSSL_SHA384
-int wc_InitSha384(Sha384* sha384)
+static int InitSha384(Sha384* sha384)
 {
     sha384->digest[0] = W64LIT(0xcbbb9d5dc1059ed8);
     sha384->digest[1] = W64LIT(0x629a292a367cd507);
@@ -1303,32 +1375,151 @@ int wc_InitSha384(Sha384* sha384)
     sha384->loLen   = 0;
     sha384->hiLen   = 0;
 
-#if defined(HAVE_INTEL_AVX1)|| defined(HAVE_INTEL_AVX2)
-    set_Transform() ;
-#endif
-    
     return 0;
 }
 
 int wc_Sha384Update(Sha384* sha384, const byte* data, word32 len)
 {
-    return Sha512Update((Sha512 *)sha384, data, len);
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha384(&sha384->asyncDev, NULL, data, len);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return Sha512Update((Sha512*)sha384, data, len);
 }
 
 
 int wc_Sha384Final(Sha384* sha384, byte* hash)
 {
-    int ret = Sha512Final((Sha512 *)sha384);
+    int ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    if (sha384->asyncDev.marker == WOLFSSL_ASYNC_MARKER_SHA384) {
+    #if defined(HAVE_INTEL_QA)
+        return IntelQaSymSha384(&sha384->asyncDev, hash, NULL,
+                                            SHA384_DIGEST_SIZE);
+    #endif
+    }
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    ret = Sha512Final((Sha512*)sha384);
     if (ret != 0)
         return ret;
 
     XMEMCPY(hash, sha384->digest, SHA384_DIGEST_SIZE);
 
-    return wc_InitSha384(sha384);  /* reset state */
+    return InitSha384(sha384);  /* reset state */
 }
+
+
+int wc_InitSha384_ex(Sha384* sha384, void* heap, int devId)
+{
+    int ret;
+
+    if (sha384 == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    sha384->heap = heap;
+    ret = InitSha384(sha384);
+    if (ret != 0)
+        return ret;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    ret = wolfAsync_DevCtxInit(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384,
+                                                           sha384->heap, devId);
+#else
+    (void)devId;
+#endif /* WOLFSSL_ASYNC_CRYPT */
+
+    return ret;
+}
+
+int wc_InitSha384(Sha384* sha384)
+{
+    return wc_InitSha384_ex(sha384, NULL, INVALID_DEVID);
+}
+
+void wc_Sha384Free(Sha384* sha384)
+{
+    if (sha384 == NULL)
+        return;
+
+#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA384)
+    wolfAsync_DevCtxFree(&sha384->asyncDev, WOLFSSL_ASYNC_MARKER_SHA384);
+#endif /* WOLFSSL_ASYNC_CRYPT */
+}
+
 #endif /* WOLFSSL_SHA384 */
 
 #endif /* HAVE_FIPS */
 
-#endif /* WOLFSSL_SHA512 */
 
+int wc_Sha512GetHash(Sha512* sha512, byte* hash)
+{
+    int ret;
+    Sha512 tmpSha512;
+
+    if (sha512 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha512Copy(sha512, &tmpSha512);
+    if (ret == 0) {
+        ret = wc_Sha512Final(&tmpSha512, hash);
+    }
+    return ret;
+}
+
+int wc_Sha512Copy(Sha512* src, Sha512* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha512));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+
+#ifdef WOLFSSL_SHA384
+int wc_Sha384GetHash(Sha384* sha384, byte* hash)
+{
+    int ret;
+    Sha384 tmpSha384;
+
+    if (sha384 == NULL || hash == NULL)
+        return BAD_FUNC_ARG;
+
+    ret = wc_Sha384Copy(sha384, &tmpSha384);
+    if (ret == 0) {
+        ret = wc_Sha384Final(&tmpSha384, hash);
+    }
+    return ret;
+}
+int wc_Sha384Copy(Sha384* src, Sha384* dst)
+{
+    int ret = 0;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    XMEMCPY(dst, src, sizeof(Sha384));
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    ret = wolfAsync_DevCopy(&src->asyncDev, &dst->asyncDev);
+#endif
+
+    return ret;
+}
+#endif /* WOLFSSL_SHA384 */
+
+#endif /* WOLFSSL_SHA512 */
diff --git a/wolfcrypt/src/signature.c b/wolfcrypt/src/signature.c
index 388aafde0..247d5d931 100644
--- a/wolfcrypt/src/signature.c
+++ b/wolfcrypt/src/signature.c
@@ -175,7 +175,15 @@ int wc_SignatureVerify(
                 int is_valid_sig = 0;
 
                 /* Perform verification of signature using provided ECC key */
-                ret = wc_ecc_verify_hash(sig, sig_len, hash_data, hash_len, &is_valid_sig, (ecc_key*)key);
+                do {
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+                #endif
+                if (ret >= 0)
+                    ret = wc_ecc_verify_hash(sig, sig_len, hash_data, hash_len,
+                        &is_valid_sig, (ecc_key*)key);
+                } while (ret == WC_PENDING_E);
                 if (ret != 0 || is_valid_sig != 1) {
                     ret = SIG_VERIFY_E;
                 }
@@ -212,8 +220,15 @@ int wc_SignatureVerify(
                 plain_data = (byte*)XMALLOC(plain_len, NULL, DYNAMIC_TYPE_TMP_BUFFER);
                 if (plain_data) {
                     /* Perform verification of signature using provided RSA key */
-                    ret = wc_RsaSSL_Verify(sig, sig_len, plain_data, plain_len,
-                        (RsaKey*)key);
+                    do {
+                    #ifdef WOLFSSL_ASYNC_CRYPT
+                        ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev,
+                            WC_ASYNC_FLAG_CALL_AGAIN);
+                    #endif
+                    if (ret >= 0)
+                        ret = wc_RsaSSL_Verify(sig, sig_len, plain_data,
+                            plain_len, (RsaKey*)key);
+                    } while (ret == WC_PENDING_E);
                     if (ret >= 0) {
                         if ((word32)ret == hash_len &&
                                 XMEMCMP(plain_data, hash_data, hash_len) == 0) {
@@ -296,7 +311,15 @@ int wc_SignatureGenerate(
             case WC_SIGNATURE_TYPE_ECC:
 #if defined(HAVE_ECC) && defined(HAVE_ECC_SIGN)
                 /* Create signature using provided ECC key */
-                ret = wc_ecc_sign_hash(hash_data, hash_len, sig, sig_len, rng, (ecc_key*)key);
+                do {
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    ret = wc_AsyncWait(ret, &((ecc_key*)key)->asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+                #endif
+                if (ret >= 0)
+                    ret = wc_ecc_sign_hash(hash_data, hash_len, sig, sig_len,
+                        rng, (ecc_key*)key);
+                } while (ret == WC_PENDING_E);
 #else
                 ret = SIG_TYPE_E;
 #endif
@@ -319,7 +342,15 @@ int wc_SignatureGenerate(
             case WC_SIGNATURE_TYPE_RSA:
 #ifndef NO_RSA
                 /* Create signature using provided RSA key */
-                ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len, (RsaKey*)key, rng);
+                do {
+                #ifdef WOLFSSL_ASYNC_CRYPT
+                    ret = wc_AsyncWait(ret, &((RsaKey*)key)->asyncDev,
+                        WC_ASYNC_FLAG_CALL_AGAIN);
+                #endif
+                    if (ret >= 0)
+                        ret = wc_RsaSSL_Sign(hash_data, hash_len, sig, *sig_len,
+                            (RsaKey*)key, rng);
+                } while (ret == WC_PENDING_E);
                 if (ret >= 0) {
                     *sig_len = ret;
                     ret = 0; /* Success */
diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c
index cc74abd06..54f0dce54 100644
--- a/wolfcrypt/src/tfm.c
+++ b/wolfcrypt/src/tfm.c
@@ -988,7 +988,7 @@ int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 
   fp_init(&t);
   fp_mul(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
     err = fp_mod(&t, c, &t);
     fp_copy(&t, d);
@@ -1009,7 +1009,7 @@ int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 
   fp_init(&t);
   fp_sub(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
     err = fp_mod(&t, c, &t);
     fp_copy(&t, d);
@@ -1030,7 +1030,7 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
 
   fp_init(&t);
   fp_add(a, b, &t);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (d->size < FP_SIZE) {
     err = fp_mod(&t, c, &t);
     fp_copy(&t, d);
@@ -2194,7 +2194,7 @@ void fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
    fp_int tmp;
    fp_init(&tmp);
    fp_set(&tmp, b);
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
    if (c->size < FP_SIZE) {
      fp_sub(a, &tmp, &tmp);
      fp_copy(&tmp, c);
@@ -2218,8 +2218,11 @@ int mp_init (mp_int * a)
 
 void fp_init(fp_int *a)
 {
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     a->size = FP_SIZE;
+#endif
+#ifdef HAVE_WOLF_BIGINT
+    wc_bigint_init(&a->raw);
 #endif
     fp_zero(a);
 }
@@ -2229,7 +2232,7 @@ void fp_zero(fp_int *a)
     int size = FP_SIZE;
     a->used = 0;
     a->sign = FP_ZPOS;
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
 #endif
     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
@@ -2240,17 +2243,52 @@ void fp_clear(fp_int *a)
     int size = FP_SIZE;
     a->used = 0;
     a->sign = FP_ZPOS;
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
+    size = a->size;
+#endif
+    XMEMSET(a->dp, 0, size * sizeof(fp_digit));
+    fp_free(a);
+}
+
+void fp_forcezero (mp_int * a)
+{
+    int size = FP_SIZE;
+    a->used = 0;
+    a->sign = FP_ZPOS;
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     size = a->size;
 #endif
     ForceZero(a->dp, size * sizeof(fp_digit));
+#ifdef HAVE_WOLF_BIGINT
+    wc_bigint_zero(&a->raw);
+#endif
+    fp_free(a);
+}
+
+void mp_forcezero (mp_int * a)
+{
+    fp_forcezero(a);
+}
+
+void fp_free(fp_int* a)
+{
+#ifdef HAVE_WOLF_BIGINT
+    wc_bigint_free(&a->raw);
+#else
+    (void)a;
+#endif
 }
 
 
 /* clear one (frees)  */
 void mp_clear (mp_int * a)
 {
-    fp_zero(a);
+    fp_clear(a);
+}
+
+void mp_free(mp_int* a)
+{
+    fp_free(a);
 }
 
 /* handle up to 6 inits */
@@ -2425,7 +2463,7 @@ void fp_copy(fp_int *a, fp_int *b)
 {
     /* if source and destination are different */
     if (a != b) {
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
         /* verify a will fit in b */
         if (b->size >= a->used) {
             int x, oldused;
@@ -2524,7 +2562,7 @@ int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
   fp_init(&t);
   fp_sqr(a, &t);
 
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   if (c->size < FP_SIZE) {
     err = fp_mod(&t, b, &t);
     fp_copy(&t, c);
@@ -3271,7 +3309,7 @@ void mp_dump(const char* desc, mp_int* a, byte verbose)
   char buffer[FP_SIZE * sizeof(fp_digit) * 2];
   int size = FP_SIZE;
 
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
   size = a->size;
 #endif
 
diff --git a/wolfcrypt/src/wc_port.c b/wolfcrypt/src/wc_port.c
index 4d75a2253..05feaac96 100755
--- a/wolfcrypt/src/wc_port.c
+++ b/wolfcrypt/src/wc_port.c
@@ -29,6 +29,12 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_ECC
+    #include 
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
 
 /* IPP header files for library initialization */
 #ifdef HAVE_FAST_RSA
@@ -48,6 +54,10 @@
     #include 
 #endif
 
+#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
+    #include 
+#endif
+
 #ifdef _MSC_VER
     /* 4996 warning to use MS extensions e.g., strcpy_s instead of strncpy */
     #pragma warning(disable: 4996)
@@ -64,6 +74,14 @@ int wolfCrypt_Init(void)
     int ret = 0;
 
     if (initRefCount == 0) {
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        wolfAsync_HardwareStart();
+    #endif
+
+    #if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
+        InitMemoryTracker();
+    #endif
+
     #if WOLFSSL_CRYPT_HW_MUTEX
         /* If crypto hardware mutex protection is enabled, then initialize it */
         wolfSSL_CryptHwMutexInit();
@@ -96,7 +114,7 @@ int wolfCrypt_Init(void)
     #endif
 
     #if defined(OPENSSL_EXTRA) || defined(HAVE_WEBSERVER)
-            wolfSSL_EVP_init();
+        wolfSSL_EVP_init();
     #endif
 
     #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
@@ -106,6 +124,15 @@ int wolfCrypt_Init(void)
         }
     #endif
 
+    #ifdef HAVE_ECC
+        #ifdef ECC_CACHE_CURVE
+            if ((ret = wc_ecc_curve_cache_init()) != 0) {
+                WOLFSSL_MSG("Error creating curve cache");
+                return ret;
+            }
+        #endif
+    #endif
+
         initRefCount = 1;
     }
 
@@ -120,9 +147,28 @@ int wolfCrypt_Cleanup(void)
 
     WOLFSSL_ENTER("wolfCrypt_Cleanup");
 
-    #if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
-        ret = wc_LoggingCleanup();
+#ifdef HAVE_ECC
+    #ifdef FP_ECC
+        wc_ecc_fp_free();
     #endif
+    #ifdef ECC_CACHE_CURVE
+        wc_ecc_curve_cache_free();
+    #endif
+#endif
+
+#if defined(OPENSSL_EXTRA) || defined(DEBUG_WOLFSSL_VERBOSE)
+    ret = wc_LoggingCleanup();
+#endif
+
+#if defined(WOLFSSL_TRACK_MEMORY) && !defined(WOLFSSL_STATIC_MEMORY)
+    ShowMemoryTracker();
+#endif
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    wolfAsync_HardwareStop();
+#endif
+
+    initRefCount = 0; /* allow re-init */
 
     return ret;
 }
diff --git a/wolfcrypt/src/wolfevent.c b/wolfcrypt/src/wolfevent.c
index 6a8379bfe..6e3eae2b3 100644
--- a/wolfcrypt/src/wolfevent.c
+++ b/wolfcrypt/src/wolfevent.c
@@ -103,16 +103,7 @@ int wolfEventQueue_Push(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
     event->next = NULL;
     event->pending = 1;
 
-    if (queue->tail == NULL)  {
-        queue->head = event;
-    }
-    else {
-        queue->tail->next = event;
-        event->prev = queue->tail;
-    }
-    queue->tail = event;      /* add to the end either way */
-    queue->count++;
-    ret = 0;
+    ret = wolfEventQueue_Add(queue, event);
 
 #ifndef SINGLE_THREADED
     wc_UnLockMutex(&queue->lock);
@@ -147,6 +138,26 @@ int wolfEventQueue_Pop(WOLF_EVENT_QUEUE* queue, WOLF_EVENT** event)
     return ret;
 }
 
+/* assumes queue is locked by caller */
+int wolfEventQueue_Add(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
+{
+    if (queue == NULL || event == NULL) {
+        return BAD_FUNC_ARG;
+    }
+
+    if (queue->tail == NULL)  {
+        queue->head = event;
+    }
+    else {
+        queue->tail->next = event;
+        event->prev = queue->tail;
+    }
+    queue->tail = event;      /* add to the end either way */
+    queue->count++;
+
+    return 0;
+}
+
 /* assumes queue is locked by caller */
 int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event)
 {
diff --git a/wolfcrypt/src/wolfmath.c b/wolfcrypt/src/wolfmath.c
index df80a8406..ae569e12e 100644
--- a/wolfcrypt/src/wolfmath.c
+++ b/wolfcrypt/src/wolfmath.c
@@ -40,6 +40,18 @@
 
 #if defined(USE_FAST_MATH) || !defined(NO_BIG_INT)
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
+#ifdef NO_INLINE
+    #include 
+#else
+    #define WOLFSSL_MISC_INCLUDED
+    #include 
+#endif
+
+
 int get_digit_count(mp_int* a)
 {
     if (a == NULL)
@@ -106,4 +118,132 @@ int mp_rand(mp_int* a, int digits, WC_RNG* rng)
 }
 #endif /* WC_RSA_BLINDING */
 
+
+#ifdef HAVE_WOLF_BIGINT
+void wc_bigint_init(WC_BIGINT* a)
+{
+    if (a) {
+        a->buf = NULL;
+        a->len = 0;
+        a->heap = NULL;
+    }
+}
+
+int wc_bigint_alloc(WC_BIGINT* a, word32 sz)
+{
+    int err = MP_OKAY;
+
+    if (a == NULL)
+        return BAD_FUNC_ARG;
+
+    if (sz > 0) {
+        if (a->buf && sz > a->len) {
+            wc_bigint_free(a);
+        }
+        if (a->buf == NULL) {
+            a->buf = (byte*)XMALLOC(sz, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+            if (a->buf) {
+                XMEMSET(a->buf, 0, sz);
+            }
+            else {
+                err = MP_MEM;
+            }
+        }
+    }
+    a->len = sz;
+
+    return err;
+}
+
+/* assumes input is big endian format */
+int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen)
+{
+    int err;
+
+    if (a == NULL || in == NULL || inlen == 0)
+        return BAD_FUNC_ARG;
+
+    err = wc_bigint_alloc(a, inlen);
+    if (err == 0) {
+        XMEMCPY(a->buf, in, inlen);
+    }
+
+    return err;
+}
+
+int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen)
+{
+    word32 sz;
+
+    if (a == NULL || out == NULL || outlen == NULL || *outlen == 0)
+        return BAD_FUNC_ARG;
+
+    /* trim to fit into output buffer */
+    sz = a->len;
+    if (a->len > *outlen) {
+        WOLFSSL_MSG("wc_bigint_export: Truncating output");
+        sz = *outlen;
+    }
+
+    if (a->buf) {
+        XMEMCPY(out, a->buf, sz);
+    }
+
+    *outlen = sz;
+
+    return MP_OKAY;
+}
+
+void wc_bigint_zero(WC_BIGINT* a)
+{
+    if (a && a->buf) {
+        ForceZero(a->buf, a->len);
+    }
+}
+
+void wc_bigint_free(WC_BIGINT* a)
+{
+    if (a) {
+        if (a->buf) {
+          XFREE(a->buf, a->heap, DYNAMIC_TYPE_WOLF_BIGINT);
+        }
+        a->buf = NULL;
+        a->len = 0;
+    }
+}
+
+int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst)
+{
+    int err;
+    word32 sz;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    sz = mp_unsigned_bin_size(src);
+    err = wc_bigint_alloc(dst, sz);
+    if (err == MP_OKAY)
+        err = mp_to_unsigned_bin(src, dst->buf);
+
+    return err;
+}
+
+int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst)
+{
+    int err;
+
+    if (src == NULL || dst == NULL)
+        return BAD_FUNC_ARG;
+
+    if (src->buf == NULL)
+        return BAD_FUNC_ARG;
+
+    err = mp_read_unsigned_bin(dst, src->buf, src->len);
+    wc_bigint_free(src);
+
+    return err;
+}
+
+#endif /* HAVE_WOLF_BIGINT */
+
 #endif /* USE_FAST_MATH || !NO_BIG_INT */
diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c
index 15c5a6783..603656901 100644
--- a/wolfcrypt/test/test.c
+++ b/wolfcrypt/test/test.c
@@ -177,9 +177,6 @@
 
 #include "wolfcrypt/test/test.h"
 
-#ifdef USE_WOLFSSL_MEMORY
-    #include "wolfssl/wolfcrypt/mem_track.h"
-#endif
 
 /* for async devices */
 static int devId = INVALID_DEVID;
@@ -344,7 +341,11 @@ static void myFipsCb(int ok, int err, const char* hash)
     #ifdef BENCH_EMBEDDED
         static byte gTestMemory[10000];
     #else
-        static byte gTestMemory[100000];
+        #if defined(USE_FAST_MATH) && !defined(ALT_ECC_SIZE)
+            static byte gTestMemory[130000];
+        #else
+            static byte gTestMemory[80000];
+        #endif
     #endif
 #endif
 
@@ -366,10 +367,6 @@ int wolfcrypt_test(void* args)
     }
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    InitMemoryTracker();
-#endif
-
 #if defined(DEBUG_WOLFSSL) && !defined(HAVE_VALGRIND)
     wolfSSL_Debugging_ON();
 #endif
@@ -395,9 +392,8 @@ int wolfcrypt_test(void* args)
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     ret = wolfAsync_DevOpen(&devId);
-    if (ret != 0) {
-        err_sys("Async device open failed", -1236);
-        return -1236;
+    if (ret < 0) {
+        printf("Async device open failed\nRunning without async\n");
     }
 #else
     (void)devId;
@@ -753,12 +749,6 @@ int wolfcrypt_test(void* args)
         else
             printf( "ECC buffer test passed!\n");
     #endif
-    #if defined(FP_ECC)
-        wc_ecc_fp_free();
-    #endif
-    #ifdef ECC_CACHE_CURVE
-        wc_ecc_curve_cache_free();
-    #endif
 #endif
 
 #ifdef HAVE_CURVE25519
@@ -832,8 +822,8 @@ int wolfcrypt_test(void* args)
         printf( "memcb    test passed!\n");
 #endif
 
-#if defined(USE_WOLFSSL_MEMORY) && defined(WOLFSSL_TRACK_MEMORY)
-    ShowMemoryTracker();
+#ifdef WOLFSSL_ASYNC_CRYPT
+    wolfAsync_DevClose(&devId);
 #endif
 
     ((func_args*)args)->return_code = ret;
@@ -912,7 +902,7 @@ int error_test()
      * APIs. Check that the values that are not errors map to the unknown
      * string.
      */
-    for (i = OPEN_RAN_E; i >= BAD_PATH_ERROR; i--) {
+    for (i = MAX_CODE_E-1; i >= WC_LAST_E; i--) {
         errStr = wc_GetErrorString(i);
         wc_ErrorString(i, out);
 
@@ -1144,9 +1134,10 @@ int md2_test()
 #ifndef NO_MD5
 int md5_test(void)
 {
+    int ret;
     Md5  md5;
-    Md5  partialMd5;
     byte hash[MD5_DIGEST_SIZE];
+    byte hashcopy[MD5_DIGEST_SIZE];
 
     testVector a, b, c, d, e;
     testVector test_md5[5];
@@ -1190,35 +1181,31 @@ int md5_test(void)
     test_md5[3] = d;
     test_md5[4] = e;
 
-    wc_InitMd5(&md5);
+    ret = wc_InitMd5_ex(&md5, HEAP_HINT, devId);
+    if (ret != 0)
+        return -4009;
 
     for (i = 0; i < times; ++i) {
-        wc_Md5Update(&md5, (byte*)test_md5[i].input, (word32)test_md5[i].inLen);
-        wc_Md5Final(&md5, hash);
+        ret = wc_Md5Update(&md5, (byte*)test_md5[i].input, (word32)test_md5[i].inLen);
+        if (ret != 0)
+            return -4010;
+
+        ret = wc_Md5GetHash(&md5, hashcopy);
+        if (ret != 0)
+            return -4011;
+
+        ret = wc_Md5Final(&md5, hash);
+        if (ret != 0)
+            return -4012;
 
         if (XMEMCMP(hash, test_md5[i].output, MD5_DIGEST_SIZE) != 0)
-            return -5 - i;
+            return -10 - i;
+
+        if (XMEMCMP(hash, hashcopy, MD5_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Position restoration and getting the hash doesn't invalidate state. */
-    wc_InitMd5(&md5);
-    wc_InitMd5(&partialMd5);
-    wc_Md5Update(&partialMd5, (byte*)a.input, 1);
-    wc_Md5RestorePos(&md5, &partialMd5);
-    wc_Md5GetHash(&partialMd5, hash);
-    wc_Md5Update(&partialMd5, (byte*)a.input + 1, (word32)a.inLen - 1);
-    wc_Md5Update(&md5, (byte*)a.input + 1, (word32)a.inLen - 1);
-    wc_Md5Final(&partialMd5, hash);
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -10;
-    XMEMSET(hash, 0, a.outLen);
-    wc_Md5Final(&md5, hash);
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -11;
-    if (wc_Md5Hash((byte*)a.input, (word32)a.inLen, hash) != 0)
-        return -12;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -13;
+    wc_Md5Free(&md5);
 
     return 0;
 }
@@ -1308,8 +1295,8 @@ int md4_test(void)
 int sha_test(void)
 {
     Sha  sha;
-    Sha  partialSha;
     byte hash[SHA_DIGEST_SIZE];
+    byte hashcopy[SHA_DIGEST_SIZE];
 
     testVector a, b, c, d;
     testVector test_sha[4];
@@ -1348,54 +1335,31 @@ int sha_test(void)
     test_sha[2] = c;
     test_sha[3] = d;
 
-    ret = wc_InitSha(&sha);
+    ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4001;
 
     for (i = 0; i < times; ++i) {
-        wc_ShaUpdate(&sha, (byte*)test_sha[i].input, (word32)test_sha[i].inLen);
-        wc_ShaFinal(&sha, hash);
+        ret = wc_ShaUpdate(&sha, (byte*)test_sha[i].input, (word32)test_sha[i].inLen);
+        if (ret != 0)
+            return -4002;
+
+        ret = wc_ShaGetHash(&sha, hashcopy);
+        if (ret != 0)
+            return -4003;
+
+        ret = wc_ShaFinal(&sha, hash);
+        if (ret != 0)
+            return -4004;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA_DIGEST_SIZE) != 0)
             return -10 - i;
+
+        if (XMEMCMP(hash, hashcopy, SHA_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Position restoration and getting the hash doesn't invalidate state. */
-    ret = wc_InitSha(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_InitSha(&partialSha);
-    if (ret != 0)
-        return -21;
-    ret = wc_ShaUpdate(&partialSha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -22;
-    wc_ShaRestorePos(&sha, &partialSha);
-    ret = wc_ShaGetHash(&partialSha, hash);
-    if (ret != 0)
-        return -23;
-    ret = wc_ShaUpdate(&partialSha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -24;
-    ret = wc_ShaUpdate(&sha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -25;
-    ret = wc_ShaFinal(&partialSha, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_ShaFinal(&sha, hash);
-    if (ret != 0)
-        return -28;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -29;
-    ret = wc_ShaHash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -30;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -31;
+    wc_ShaFree(&sha);
 
     return 0;
 }
@@ -1537,6 +1501,7 @@ int sha224_test(void)
 {
     Sha224 sha;
     byte   hash[SHA224_DIGEST_SIZE];
+    byte   hashcopy[SHA224_DIGEST_SIZE];
 
     testVector a, b;
     testVector test_sha[2];
@@ -1558,7 +1523,7 @@ int sha224_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha224(&sha);
+    ret = wc_InitSha224_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4005;
 
@@ -1566,13 +1531,20 @@ int sha224_test(void)
         ret = wc_Sha224Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4006;
-        ret = wc_Sha224Final(&sha, hash);
+        ret = wc_Sha224GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4007;
+        ret = wc_Sha224Final(&sha, hash);
+        if (ret != 0)
+            return -4008;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA224_DIGEST_SIZE) != 0)
             return -10 - i;
+
+        if (XMEMCMP(hash, hashcopy, SHA224_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
+    wc_Sha224Free(&sha);
 
     /* Getting the hash doesn't invalidate state. */
     ret = wc_InitSha224(&sha);
@@ -1607,8 +1579,8 @@ int sha224_test(void)
 int sha256_test(void)
 {
     Sha256 sha;
-    Sha256 partialSha;
     byte   hash[SHA256_DIGEST_SIZE];
+    byte   hashcopy[SHA256_DIGEST_SIZE];
 
     testVector a, b;
     testVector test_sha[2];
@@ -1632,7 +1604,7 @@ int sha256_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha256(&sha);
+    ret = wc_InitSha256_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4005;
 
@@ -1640,51 +1612,20 @@ int sha256_test(void)
         ret = wc_Sha256Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4006;
-        ret = wc_Sha256Final(&sha, hash);
+        ret = wc_Sha256GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4007;
+        ret = wc_Sha256Final(&sha, hash);
+        if (ret != 0)
+            return -4008;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA256_DIGEST_SIZE) != 0)
             return -10 - i;
+        if (XMEMCMP(hash, hashcopy, SHA256_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Position restoration and getting the hash doesn't invalidate state. */
-    ret = wc_InitSha256(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_InitSha256(&partialSha);
-    if (ret != 0)
-        return -21;
-    ret = wc_Sha256Update(&partialSha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -22;
-    wc_Sha256RestorePos(&sha, &partialSha);
-    ret = wc_Sha256GetHash(&partialSha, hash);
-    if (ret != 0)
-        return -23;
-    ret = wc_Sha256Update(&partialSha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -24;
-    ret = wc_Sha256Update(&sha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -25;
-    ret = wc_Sha256Final(&partialSha, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_Sha256Final(&sha, hash);
-    if (ret != 0)
-        return -28;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -29;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_Sha256Hash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -30;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -31;
+    wc_Sha256Free(&sha);
 
     return 0;
 }
@@ -1696,6 +1637,7 @@ int sha512_test(void)
 {
     Sha512 sha;
     byte   hash[SHA512_DIGEST_SIZE];
+    byte   hashcopy[SHA512_DIGEST_SIZE];
     int    ret;
 
     testVector a, b;
@@ -1724,7 +1666,7 @@ int sha512_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha512(&sha);
+    ret = wc_InitSha512_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4009;
 
@@ -1732,38 +1674,20 @@ int sha512_test(void)
         ret = wc_Sha512Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4010;
-
-        ret = wc_Sha512Final(&sha, hash);
+        ret = wc_Sha512GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4011;
+        ret = wc_Sha512Final(&sha, hash);
+        if (ret != 0)
+            return -4012;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA512_DIGEST_SIZE) != 0)
             return -10 - i;
+        if (XMEMCMP(hash, hashcopy, SHA512_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Getting the hash doesn't invalidate state. */
-    ret = wc_InitSha512(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_Sha512Update(&sha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -21;
-    ret = wc_Sha512GetHash(&sha, hash);
-    if (ret != 0)
-        return -22;
-    ret = wc_Sha512Update(&sha, (byte*)a.input + 1, (word32)a.inLen - 1);
-    if (ret != 0)
-        return -23;
-    ret = wc_Sha512Final(&sha, hash);
-    if (ret != 0)
-        return -24;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -15;
-    ret = wc_Sha512Hash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
+    wc_Sha512Free(&sha);
 
     return 0;
 }
@@ -1775,6 +1699,7 @@ int sha384_test(void)
 {
     Sha384 sha;
     byte   hash[SHA384_DIGEST_SIZE];
+    byte   hashcopy[SHA384_DIGEST_SIZE];
     int    ret;
 
     testVector a, b;
@@ -1801,7 +1726,7 @@ int sha384_test(void)
     test_sha[0] = a;
     test_sha[1] = b;
 
-    ret = wc_InitSha384(&sha);
+    ret = wc_InitSha384_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4012;
 
@@ -1809,39 +1734,20 @@ int sha384_test(void)
         ret = wc_Sha384Update(&sha, (byte*)test_sha[i].input,(word32)test_sha[i].inLen);
         if (ret != 0)
             return -4013;
-
-        ret = wc_Sha384Final(&sha, hash);
+        ret = wc_Sha384GetHash(&sha, hashcopy);
         if (ret != 0)
             return -4014;
+        ret = wc_Sha384Final(&sha, hash);
+        if (ret != 0)
+            return -4015;
 
         if (XMEMCMP(hash, test_sha[i].output, SHA384_DIGEST_SIZE) != 0)
             return -10 - i;
+        if (XMEMCMP(hash, hashcopy, SHA384_DIGEST_SIZE) != 0)
+            return -20 - i;
     }
 
-    /* Getting the hash doesn't invalidate state. */
-    ret = wc_InitSha384(&sha);
-    if (ret != 0)
-        return -20;
-    ret = wc_Sha384Update(&sha, (byte*)a.input, 1);
-    if (ret != 0)
-        return -21;
-    ret = wc_Sha384GetHash(&sha, hash);
-    if (ret != 0)
-        return -22;
-    ret = wc_Sha384Update(&sha, (byte*)a.input + 1, (word32)(a.inLen - 1));
-    if (ret != 0)
-        return -23;
-    ret = wc_Sha384Final(&sha, hash);
-    if (ret != 0)
-        return -24;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -25;
-    XMEMSET(hash, 0, a.outLen);
-    ret = wc_Sha384Hash((byte*)a.input, (word32)a.inLen, hash);
-    if (ret != 0)
-        return -26;
-    if (XMEMCMP(hash, a.output, a.outLen) != 0)
-        return -27;
+    wc_Sha384Free(&sha);
 
     return 0;
 }
@@ -2060,11 +1966,9 @@ int hmac_md5_test(void)
         }
     #endif
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0) {
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0) {
             return -20009;
         }
-    #endif
 
         ret = wc_HmacSetKey(&hmac, MD5, (byte*)keys[i], (word32)XSTRLEN(keys[i]));
         if (ret != 0)
@@ -2080,9 +1984,8 @@ int hmac_md5_test(void)
         if (XMEMCMP(hash, test_hmac[i].output, MD5_DIGEST_SIZE) != 0)
             return -20 - i;
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-    #endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2145,10 +2048,10 @@ int hmac_sha_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
             return -20010;
-#endif
+
         ret = wc_HmacSetKey(&hmac, SHA, (byte*)keys[i], (word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4018;
@@ -2162,9 +2065,8 @@ int hmac_sha_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2228,10 +2130,10 @@ int hmac_sha224_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
             return -20011;
-#endif
+
         ret = wc_HmacSetKey(&hmac, SHA224, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4021;
@@ -2245,9 +2147,8 @@ int hmac_sha224_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA224_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2314,10 +2215,10 @@ int hmac_sha256_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
             return -20011;
-#endif
+
         ret = wc_HmacSetKey(&hmac, SHA256, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4021;
@@ -2331,9 +2232,8 @@ int hmac_sha256_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA256_DIGEST_SIZE) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2404,14 +2304,13 @@ int hmac_blake2b_test(void)
         if (i == 1)
             continue; /* cavium can't handle short keys, fips not allowed */
 #endif
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #ifdef HAVE_CAVIUM_V
-        /* Blake2 not supported on Cavium V, but SHA3 is */
-        return 0;
+
+    #if defined(HAVE_CAVIUM) && !defined(HAVE_CAVIUM_V)
+        /* Blake2 only supported on Cavium Nitrox III */
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+            return -20012;
     #endif
-        if (wc_HmacAsyncInit(&hmac, devId) != 0)
-            return -20011;
-#endif
+
         ret = wc_HmacSetKey(&hmac, BLAKE2B_ID, (byte*)keys[i],
                          (word32)XSTRLEN(keys[i]));
         if (ret != 0)
@@ -2426,9 +2325,8 @@ int hmac_blake2b_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, BLAKE2B_256) != 0)
             return -20 - i;
-#ifdef WOLFSSL_ASYNC_CRYPT
-        wc_HmacAsyncFree(&hmac);
-#endif
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2498,6 +2396,10 @@ int hmac_sha384_test(void)
         if (i == 1)
             continue; /* fips not allowed */
 #endif
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+            return -20013;
+
         ret = wc_HmacSetKey(&hmac, SHA384, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4027;
@@ -2511,6 +2413,8 @@ int hmac_sha384_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA384_DIGEST_SIZE) != 0)
             return -20 - i;
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2583,6 +2487,10 @@ int hmac_sha512_test(void)
         if (i == 1)
             continue; /* fips not allowed */
 #endif
+
+        if (wc_HmacInit(&hmac, HEAP_HINT, devId) != 0)
+            return -20014;
+
         ret = wc_HmacSetKey(&hmac, SHA512, (byte*)keys[i],(word32)XSTRLEN(keys[i]));
         if (ret != 0)
             return -4030;
@@ -2596,6 +2504,8 @@ int hmac_sha512_test(void)
 
         if (XMEMCMP(hash, test_hmac[i].output, SHA512_DIGEST_SIZE) != 0)
             return -20 - i;
+
+        wc_HmacFree(&hmac);
     }
 
 #ifndef HAVE_FIPS
@@ -2659,12 +2569,10 @@ int arc4_test(void)
         if (i == 3)
             keylen = 4;
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        if (wc_Arc4AsyncInit(&enc, devId) != 0)
+        if (wc_Arc4Init(&enc, HEAP_HINT, devId) != 0)
             return -20001;
-        if (wc_Arc4AsyncInit(&dec, devId) != 0)
+        if (wc_Arc4Init(&dec, HEAP_HINT, devId) != 0)
             return -20002;
-    #endif
 
         wc_Arc4SetKey(&enc, (byte*)keys[i], keylen);
         wc_Arc4SetKey(&dec, (byte*)keys[i], keylen);
@@ -2679,10 +2587,8 @@ int arc4_test(void)
         if (XMEMCMP(cipher, test_arc4[i].output, test_arc4[i].outLen))
             return -20 - 5 - i;
 
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        wc_Arc4AsyncFree(&enc);
-        wc_Arc4AsyncFree(&dec);
-    #endif
+        wc_Arc4Free(&enc);
+        wc_Arc4Free(&dec);
     }
 
     return 0;
@@ -3554,12 +3460,11 @@ int des3_test(void)
     int ret;
 
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_Des3AsyncInit(&enc, devId) != 0)
+    if (wc_Des3Init(&enc, HEAP_HINT, devId) != 0)
         return -20005;
-    if (wc_Des3AsyncInit(&dec, devId) != 0)
+    if (wc_Des3Init(&dec, HEAP_HINT, devId) != 0)
         return -20006;
-#endif
+
     ret = wc_Des3_SetKey(&enc, key3, iv3, DES_ENCRYPTION);
     if (ret != 0)
         return -31;
@@ -3567,9 +3472,15 @@ int des3_test(void)
     if (ret != 0)
         return -32;
     ret = wc_Des3_CbcEncrypt(&enc, cipher, vector, sizeof(vector));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -33;
     ret = wc_Des3_CbcDecrypt(&dec, plain, cipher, sizeof(cipher));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -34;
 
@@ -3579,10 +3490,9 @@ int des3_test(void)
     if (XMEMCMP(cipher, verify3, sizeof(cipher)))
         return -36;
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_Des3AsyncFree(&enc);
-    wc_Des3AsyncFree(&dec);
-#endif
+    wc_Des3Free(&enc);
+    wc_Des3Free(&dec);
+
     return 0;
 }
 #endif /* NO_DES */
@@ -3743,12 +3653,13 @@ int aes_test(void)
     byte key[] = "0123456789abcdef   ";  /* align */
     byte iv[]  = "1234567890abcdef   ";  /* align */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    if (wc_AesAsyncInit(&enc, devId) != 0)
+    if (wc_AesInit(&enc, HEAP_HINT, devId) != 0)
         return -20003;
-    if (wc_AesAsyncInit(&dec, devId) != 0)
+#ifdef HAVE_AES_DECRYPT
+    if (wc_AesInit(&dec, HEAP_HINT, devId) != 0)
         return -20004;
 #endif
+
     ret = wc_AesSetKey(&enc, key, AES_BLOCK_SIZE, iv, AES_ENCRYPTION);
     if (ret != 0)
         return -1001;
@@ -3758,11 +3669,17 @@ int aes_test(void)
         return -1002;
 #endif
 
-    ret = wc_AesCbcEncrypt(&enc, cipher, msg,   AES_BLOCK_SIZE);
+    ret = wc_AesCbcEncrypt(&enc, cipher, msg, AES_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -1005;
 #ifdef HAVE_AES_DECRYPT
     ret = wc_AesCbcDecrypt(&dec, plain, cipher, AES_BLOCK_SIZE);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (ret != 0)
         return -1006;
 
@@ -3847,10 +3764,16 @@ int aes_test(void)
                     return -1031;
 
                 ret = wc_AesCbcEncrypt(&enc, bigCipher, bigMsg, msgSz);
+            #if defined(WOLFSSL_ASYNC_CRYPT)
+                ret = wc_AsyncWait(ret, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+            #endif
                 if (ret != 0)
                     return -1032;
 
                 ret = wc_AesCbcDecrypt(&dec, bigPlain, bigCipher, msgSz);
+            #if defined(WOLFSSL_ASYNC_CRYPT)
+                ret = wc_AsyncWait(ret, &dec.asyncDev, WC_ASYNC_FLAG_NONE);
+            #endif
                 if (ret != 0)
                     return -1033;
 
@@ -3861,11 +3784,6 @@ int aes_test(void)
     }
 #endif /* WOLFSSL_AESNI HAVE_AES_DECRYPT */
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    wc_AesAsyncFree(&enc);
-    wc_AesAsyncFree(&dec);
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
 #endif /* HAVE_AES_CBC */
 
 #ifdef WOLFSSL_AES_COUNTER
@@ -4081,6 +3999,11 @@ int aes_test(void)
         return ret;
 #endif
 
+    wc_AesFree(&enc);
+#ifdef HAVE_AES_DECRYPT
+    wc_AesFree(&dec);
+#endif
+
     return ret;
 }
 
@@ -4146,7 +4069,7 @@ int aesgcm_test(void)
         0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
     };
 
-#ifndef HAVE_FIPS
+#if !defined(HAVE_FIPS) && !defined(HAVE_INTEL_QA)
     /* Test Case 12, uses same plaintext and AAD data. */
     const byte k2[] =
     {
@@ -4184,7 +4107,7 @@ int aesgcm_test(void)
         0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
         0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
     };
-#endif /* HAVE_FIPS */
+#endif /* !HAVE_FIPS && !HAVE_INTEL_QA */
 
     byte resultT[sizeof(t1)];
     byte resultP[sizeof(p)];
@@ -4195,10 +4118,22 @@ int aesgcm_test(void)
     XMEMSET(resultC, 0, sizeof(resultC));
     XMEMSET(resultP, 0, sizeof(resultP));
 
-    wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+    if (wc_AesInit(&enc, HEAP_HINT, devId) != 0) {
+        return -20003;
+    }
+
+    result = wc_AesGcmSetKey(&enc, k1, sizeof(k1));
+    if (result != 0)
+        return -66;
+
     /* AES-GCM encrypt and decrypt both use AES encrypt internally */
-    wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1, sizeof(iv1),
+    result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv1, sizeof(iv1),
                                         resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (result != 0)
+        return -67;
     if (XMEMCMP(c1, resultC, sizeof(resultC)))
         return -68;
     if (XMEMCMP(t1, resultT, sizeof(resultT)))
@@ -4206,20 +4141,29 @@ int aesgcm_test(void)
 
     result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC),
                       iv1, sizeof(iv1), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (result != 0)
         return -70;
     if (XMEMCMP(p, resultP, sizeof(resultP)))
         return -71;
 
-#ifndef HAVE_FIPS
+    /* QAT only supports 12-byte IV */
+#if !defined(HAVE_FIPS) && !defined(HAVE_INTEL_QA)
     XMEMSET(resultT, 0, sizeof(resultT));
     XMEMSET(resultC, 0, sizeof(resultC));
     XMEMSET(resultP, 0, sizeof(resultP));
 
     wc_AesGcmSetKey(&enc, k2, sizeof(k2));
     /* AES-GCM encrypt and decrypt both use AES encrypt internally */
-    wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv2, sizeof(iv2),
+    result = wc_AesGcmEncrypt(&enc, resultC, p, sizeof(p), iv2, sizeof(iv2),
                                         resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (result != 0)
+        return -229;
     if (XMEMCMP(c2, resultC, sizeof(resultC)))
         return -230;
     if (XMEMCMP(t2, resultT, sizeof(resultT)))
@@ -4227,11 +4171,16 @@ int aesgcm_test(void)
 
     result = wc_AesGcmDecrypt(&enc, resultP, resultC, sizeof(resultC),
                       iv2, sizeof(iv2), resultT, sizeof(resultT), a, sizeof(a));
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    result = wc_AsyncWait(result, &enc.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
     if (result != 0)
         return -232;
     if (XMEMCMP(p, resultP, sizeof(resultP)))
         return -233;
-#endif /* HAVE_FIPS */
+#endif /* !HAVE_FIPS && !HAVE_INTEL_QA */
+
+    wc_AesFree(&enc);
 
     return 0;
 }
@@ -4284,6 +4233,7 @@ int gmac_test(void)
 
     byte tag[16];
 
+    XMEMSET(&gmac, 0, sizeof(Gmac)); /* clear context */
     XMEMSET(tag, 0, sizeof(tag));
     wc_GmacSetKey(&gmac, k1, sizeof(k1));
     wc_GmacUpdate(&gmac, iv1, sizeof(iv1), a1, sizeof(a1), tag, sizeof(t1));
@@ -4350,6 +4300,7 @@ int aesccm_test(void)
 
     int result;
 
+    XMEMSET(&enc, 0, sizeof(Aes)); /* clear context */
     XMEMSET(t2, 0, sizeof(t2));
     XMEMSET(c2, 0, sizeof(c2));
     XMEMSET(p2, 0, sizeof(p2));
@@ -4957,7 +4908,7 @@ int idea_test(void)
 
         /* random values */
     #ifndef HAVE_FIPS
-        ret = wc_InitRng_ex(&rng, HEAP_HINT);
+        ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
     #else
         ret = wc_InitRng(&rng);
     #endif
@@ -5035,7 +4986,7 @@ static int random_rng_test(void)
     int ret, i;
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -5772,8 +5723,11 @@ static int rsa_sig_test(RsaKey* key, word32 keyLen, int modLen, WC_RNG* rng)
      *     -101 = USER_CRYPTO_ERROR
      */
     if (ret == 0)
-#elif defined(HAVE_FIPS) || !defined(WC_RSA_BLINDING)
-    /* FIPS140 implementation doesn't do blinding. */
+#elif defined(WOLFSSL_ASYNC_CRYPT)
+    /* async may not require RNG */
+    if (ret != 0 && ret != MISSING_RNG_E)
+#elif defined(HAVE_FIPS) || defined(WOLFSSL_ASYNC_CRYPT)
+    /* FIPS140 implementation does not do blinding */
     if (ret != 0)
 #else
     if (ret != MISSING_RNG_E)
@@ -6100,6 +6054,7 @@ done:
 }
 #endif
 
+#define RSA_TEST_BYTES 256
 int rsa_test(void)
 {
     byte*   tmp;
@@ -6111,10 +6066,13 @@ int rsa_test(void)
     WC_RNG rng;
     word32 idx = 0;
     int    ret;
-    byte   in[] = "Everyone gets Friday off.";
-    word32 inLen = (word32)XSTRLEN((char*)in);
-    byte   out[256];
-    byte   plain[256];
+    const char* inStr = "Everyone gets Friday off.";
+    word32      inLen = (word32)XSTRLEN((char*)inStr);
+    const word32 outSz   = RSA_TEST_BYTES;
+    const word32 plainSz = RSA_TEST_BYTES;
+    DECLARE_VAR_INIT(in, byte, inLen, inStr, HEAP_HINT);
+    DECLARE_VAR(out, byte, RSA_TEST_BYTES, HEAP_HINT);
+    DECLARE_VAR(plain, byte, RSA_TEST_BYTES, HEAP_HINT);
     byte*  res;
 #if !defined(USE_CERT_BUFFERS_1024) && !defined(USE_CERT_BUFFERS_2048) \
                                     && !defined(NO_FILESYSTEM)
@@ -6131,8 +6089,13 @@ int rsa_test(void)
 #endif
 
     tmp = (byte*)XMALLOC(FOURK_BUF, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
-    if (tmp == NULL)
-        return -38;
+    if (tmp == NULL
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        || out == NULL || plain == NULL
+    #endif
+    ) {
+        return -40;
+    }
 
 #ifdef USE_CERT_BUFFERS_1024
     XMEMCPY(tmp, client_key_der_1024, sizeof_client_key_der_1024);
@@ -6158,7 +6121,7 @@ int rsa_test(void)
 
     ret = wc_InitRsaKey_ex(&key, HEAP_HINT, devId);
     if (ret != 0) {
-        XFREE(tmp, HEAP_HINT ,DYNAMIC_TYPE_TMP_BUFFER);
+        XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
         return -39;
     }
     ret = wc_RsaPrivateKeyDecode(tmp, &idx, &key, (word32)bytes);
@@ -6168,7 +6131,7 @@ int rsa_test(void)
     }
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -6183,10 +6146,10 @@ int rsa_test(void)
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt(in, inLen, out, sizeof(out), &key, &rng);
+            ret = wc_RsaPublicEncrypt(in, inLen, out, outSz, &key, &rng);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6211,10 +6174,10 @@ int rsa_test(void)
     idx = ret; /* save off encrypted length */
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt(out, idx, plain, sizeof(plain), &key);
+            ret = wc_RsaPrivateDecrypt(out, idx, plain, plainSz, &key);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6230,7 +6193,7 @@ int rsa_test(void)
     }
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
             ret = wc_RsaPrivateDecryptInline(out, idx, &res, &key);
@@ -6245,10 +6208,10 @@ int rsa_test(void)
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaSSL_Sign(in, inLen, out, sizeof(out), &key, &rng);
+            ret = wc_RsaSSL_Sign(in, inLen, out, outSz, &key, &rng);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6258,13 +6221,13 @@ int rsa_test(void)
     }
 
     idx = ret;
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaSSL_Verify(out, idx, plain, sizeof(plain), &key);
+            ret = wc_RsaSSL_Verify(out, idx, plain, plainSz, &key);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6284,14 +6247,14 @@ int rsa_test(void)
     #if !defined(HAVE_FAST_RSA) && !defined(HAVE_USER_RSA) && \
         !defined(HAVE_FIPS)
     #ifndef NO_SHA
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                        WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6304,10 +6267,10 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                        WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6325,13 +6288,13 @@ int rsa_test(void)
     #endif /* NO_SHA */
 
     #ifndef NO_SHA256
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6344,10 +6307,10 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6365,7 +6328,7 @@ int rsa_test(void)
 
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
             ret = wc_RsaPrivateDecryptInline_ex(out, idx, &res, &key,
@@ -6380,13 +6343,13 @@ int rsa_test(void)
         return -475;
 
     /* check fails if not using the same optional label */
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6399,11 +6362,11 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
-               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
         }
     } while (ret == WC_PENDING_E);
     if (ret > 0) { /* in this case decrypt should fail */
@@ -6414,14 +6377,14 @@ int rsa_test(void)
     ret = 0;
 
     /* check using optional label with encrypt/decrypt */
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
-               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6433,11 +6396,11 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
-               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+               WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
         }
     } while (ret == WC_PENDING_E);
     if (ret < 0) {
@@ -6454,14 +6417,14 @@ int rsa_test(void)
 
     #ifndef NO_SHA
         /* check fail using mismatch hash algorithms */
-        XMEMSET(plain, 0, sizeof(plain));
+        XMEMSET(plain, 0, plainSz);
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
-                    WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, in, sizeof(in));
+                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
+                    WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA, WC_MGF1SHA1, in, inLen);
             }
         } while (ret == WC_PENDING_E);
         if (ret < 0) {
@@ -6473,11 +6436,11 @@ int rsa_test(void)
         idx = ret;
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
-                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, sizeof(in));
+                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
+                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA256, WC_MGF1SHA256, in, inLen);
             }
         } while (ret == WC_PENDING_E);
         if (ret > 0) { /* should fail */
@@ -6495,13 +6458,13 @@ int rsa_test(void)
        and test, since OAEP padding requires this.
        BAD_FUNC_ARG is returned when this case is not met */
     if (wc_RsaEncryptSize(&key) > ((int)SHA512_DIGEST_SIZE * 2) + 2) {
-        XMEMSET(plain, 0, sizeof(plain));
+        XMEMSET(plain, 0, plainSz);
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+                ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA512, WC_MGF1SHA512, NULL, 0);
             }
         } while (ret == WC_PENDING_E);
@@ -6514,10 +6477,10 @@ int rsa_test(void)
         idx = ret;
         do {
     #if defined(WOLFSSL_ASYNC_CRYPT)
-            ret = wc_RsaAsyncWait(ret, &key);
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
     #endif
             if (ret >= 0) {
-                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+                ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                   WC_RSA_OAEP_PAD, WC_HASH_TYPE_SHA512, WC_MGF1SHA512, NULL, 0);
             }
         } while (ret == WC_PENDING_E);
@@ -6536,13 +6499,13 @@ int rsa_test(void)
     #endif /* WOLFSSL_SHA512 */
 
     /* check using pkcsv15 padding with _ex API */
-    XMEMSET(plain, 0, sizeof(plain));
+    XMEMSET(plain, 0, plainSz);
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, sizeof(out), &key, &rng,
+            ret = wc_RsaPublicEncrypt_ex(in, inLen, out, outSz, &key, &rng,
                   WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, 0, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6555,10 +6518,10 @@ int rsa_test(void)
     idx = ret;
     do {
 #if defined(WOLFSSL_ASYNC_CRYPT)
-        ret = wc_RsaAsyncWait(ret, &key);
+        ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
 #endif
         if (ret >= 0) {
-            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, sizeof(plain), &key,
+            ret = wc_RsaPrivateDecrypt_ex(out, idx, plain, plainSz, &key,
                   WC_RSA_PKCSV15_PAD, WC_HASH_TYPE_NONE, 0, NULL, 0);
         }
     } while (ret == WC_PENDING_E);
@@ -6876,14 +6839,23 @@ int rsa_test(void)
         }
     #endif /* WOLFSSL_CERT_EXT */
 
-        certSz = wc_MakeSelfCert(&myCert, derCert, FOURK_BUF, &key, &rng);
-        if (certSz < 0) {
+        ret = 0;
+        do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+            if (ret >= 0) {
+                ret = wc_MakeSelfCert(&myCert, derCert, FOURK_BUF, &key, &rng);
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_FreeRng(&rng);
             return -401;
         }
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, HEAP_HINT);
@@ -7109,9 +7081,17 @@ int rsa_test(void)
             return -407;
         }
 
-        certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
                           &caKey, NULL, &rng);
-        if (certSz < 0) {
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7119,6 +7099,7 @@ int rsa_test(void)
             wc_FreeRng(&rng);
             return -408;
         }
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, HEAP_HINT);
@@ -7251,7 +7232,7 @@ int rsa_test(void)
         fclose(file3);
     #endif /* USE_CERT_BUFFERS_256 */
 
-        wc_ecc_init(&caKey);
+        wc_ecc_init_ex(&caKey, HEAP_HINT, devId);
         ret = wc_EccPrivateKeyDecode(tmp, &idx3, &caKey, (word32)bytes3);
         if (ret != 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7297,7 +7278,7 @@ int rsa_test(void)
         fclose(file3);
     #endif
 
-        wc_ecc_init(&caKeyPub);
+        wc_ecc_init_ex(&caKeyPub, HEAP_HINT, devId);
         if (ret != 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -7373,9 +7354,17 @@ int rsa_test(void)
             return -5407;
         }
 
-        certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
-                          NULL, &caKey, &rng);
-        if (certSz < 0) {
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(myCert.bodySz, myCert.sigType, derCert,
+                                  FOURK_BUF, NULL, &caKey, &rng);
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_ecc_free(&caKey);
@@ -7383,6 +7372,7 @@ int rsa_test(void)
             wc_FreeRng(&rng);
             return -5408;
         }
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, 0);
@@ -7657,17 +7647,25 @@ int rsa_test(void)
             return -456;
         }
 
-        certSz = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &caKey.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(myCert.bodySz, myCert.sigType, derCert, FOURK_BUF,
                           &caKey, NULL, &rng);
+            }
+        } while (ret == WC_PENDING_E);
         wc_FreeRsaKey(&caKey);
-        if (certSz < 0) {
+        if (ret < 0) {
             XFREE(derCert, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_FreeRng(&rng);
             return -457;
         }
-
+        certSz = ret;
 
     #ifdef WOLFSSL_TEST_CERT
         InitDecodedCert(&decode, derCert, certSz, HEAP_HINT);
@@ -7822,15 +7820,24 @@ int rsa_test(void)
             return -465;
         }
 
-        derSz = wc_SignCert(req.bodySz, req.sigType, der, FOURK_BUF,
+        ret = 0;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0) {
+                ret = wc_SignCert(req.bodySz, req.sigType, der, FOURK_BUF,
                           &key, NULL, &rng);
-        if (derSz < 0) {
+            }
+        } while (ret == WC_PENDING_E);
+        if (ret < 0) {
             XFREE(pem, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(der, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             wc_FreeRng(&rng);
             return -466;
         }
+        derSz = ret;
 
         pemSz = wc_DerToPem(der, derSz, pem, FOURK_BUF, CERTREQ_TYPE);
         if (pemSz < 0) {
@@ -7889,13 +7896,14 @@ int rsa_test(void)
     wc_FreeRsaKey(&key);
 #ifdef WOLFSSL_CERT_EXT
     wc_FreeRsaKey(&keypub);
-#endif
-#ifdef HAVE_CAVIUM
-    wc_RsaFreeCavium(&key);
 #endif
     XFREE(tmp, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
     wc_FreeRng(&rng);
 
+    FREE_VAR(in, HEAP_HINT);
+    FREE_VAR(out, HEAP_HINT);
+    FREE_VAR(plain, HEAP_HINT);
+
     return 0;
 }
 
@@ -7992,62 +8000,93 @@ int dh_test(void)
     (void)tmp;
     (void)bytes;
 
-    ret = wc_InitDhKey(&key);
-    if (ret != 0)
-        return -57;
-    ret = wc_InitDhKey(&key2);
-    if (ret != 0)
-        return -57;
+    ret = wc_InitDhKey_ex(&key, HEAP_HINT, devId);
+    if (ret != 0) {
+        ret = -57; goto done;
+    }
+    ret = wc_InitDhKey_ex(&key2, HEAP_HINT, devId);
+    if (ret != 0) {
+        ret = -57; goto done;
+    }
 
 #ifdef NO_ASN
     ret = wc_DhSetKey(&key, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-    if (ret != 0)
-        return -51;
+    if (ret != 0) {
+        ret = -51; goto done;
+    }
 
     ret = wc_DhSetKey(&key2, dh_p, sizeof(dh_p), dh_g, sizeof(dh_g));
-    if (ret != 0)
-        return -51;
+    if (ret != 0) {
+        ret = -51; goto done;
+    }
 #else
     ret = wc_DhKeyDecode(tmp, &idx, &key, bytes);
-    if (ret != 0)
-        return -51;
+    if (ret != 0) {
+        ret = -51; goto done;
+    }
 
     idx = 0;
     ret = wc_DhKeyDecode(tmp, &idx, &key2, bytes);
-    if (ret != 0)
-        return -52;
+    if (ret != 0) {
+        ret = -52; goto done;
+    }
 #endif
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
-    if (ret != 0)
-        return -53;
+    if (ret != 0) {
+        ret = -53; goto done;
+    }
 
-    ret =  wc_DhGenerateKeyPair(&key, &rng, priv, &privSz, pub, &pubSz);
-    ret += wc_DhGenerateKeyPair(&key2, &rng, priv2, &privSz2, pub2, &pubSz2);
-    if (ret != 0)
-        return -54;
+    ret = wc_DhGenerateKeyPair(&key, &rng, priv, &privSz, pub, &pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -54; goto done;
+    }
 
-    ret =  wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
-    ret += wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
-    if (ret != 0)
-        return -55;
+    ret = wc_DhGenerateKeyPair(&key2, &rng, priv2, &privSz2, pub2, &pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -54; goto done;
+    }
 
-    if (XMEMCMP(agree, agree2, agreeSz))
-        return -56;
+    ret = wc_DhAgree(&key, agree, &agreeSz, priv, privSz, pub2, pubSz2);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -55; goto done;
+    }
+
+    ret = wc_DhAgree(&key2, agree2, &agreeSz2, priv2, privSz2, pub, pubSz);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &key2.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0) {
+        ret = -55; goto done;
+    }
+
+    if (agreeSz != agreeSz2 || XMEMCMP(agree, agree2, agreeSz))
+        ret = -56; goto done;
 
     ret = dh_generate_test(&rng);
     if (ret != 0)
-        return -57;
+        ret = -57;
+
+done:
 
     wc_FreeDhKey(&key);
     wc_FreeDhKey(&key2);
     wc_FreeRng(&rng);
 
-    return 0;
+    return ret;
 }
 
 #endif /* NO_DH */
@@ -8082,11 +8121,12 @@ int dsa_test(void)
     fclose(file);
 #endif /* USE_CERT_BUFFERS */
 
-    ret = wc_InitSha(&sha);
+    ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
     if (ret != 0)
         return -4002;
     wc_ShaUpdate(&sha, tmp, bytes);
     wc_ShaFinal(&sha, hash);
+    wc_ShaFree(&sha);
 
     ret = wc_InitDsaKey(&key);
     if (ret != 0) return -66;
@@ -8095,7 +8135,7 @@ int dsa_test(void)
     if (ret != 0) return -61;
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -8242,7 +8282,7 @@ static int generate_random_salt(byte *buf, word32 size)
     if(NULL == buf || !size)
         return -1;
 
-    if (buf && size && wc_InitRng(&rng) == 0) {
+    if (buf && size && wc_InitRng_ex(&rng, HEAP_HINT, devId) == 0) {
         ret = wc_RNG_GenerateBlock(&rng, (byte *)buf, size);
 
         wc_FreeRng(&rng);
@@ -9494,6 +9534,14 @@ int x963kdf_test(void)
 
 #ifdef HAVE_ECC
 
+#ifdef BENCH_EMBEDDED
+    #define ECC_SHARED_SIZE 128
+#else
+    #define ECC_SHARED_SIZE 1024
+#endif
+#define ECC_DIGEST_SIZE     MAX_ECC_BYTES
+#define ECC_SIG_SIZE        ECC_MAX_SIG_SIZE
+
 #ifndef NO_ECC_VECTOR_TEST
     #if (defined(HAVE_ECC192) || defined(HAVE_ECC224) ||\
          !defined(NO_ECC256) || defined(HAVE_ECC384) ||\
@@ -9520,12 +9568,12 @@ static int ecc_test_vector_item(const eccVector* vector)
     int ret = 0, verify;
     word32  x;
     ecc_key userA;
-    byte    sig[1024];
+    DECLARE_VAR(sig, byte, ECC_SIG_SIZE, HEAP_HINT);
 
-    wc_ecc_init(&userA);
+    wc_ecc_init_ex(&userA, HEAP_HINT, devId);
 
-    XMEMSET(sig, 0, sizeof(sig));
-    x = sizeof(sig);
+    XMEMSET(sig, 0, ECC_SIG_SIZE);
+    x = ECC_SIG_SIZE;
 
     ret = wc_ecc_import_raw(&userA, vector->Qx, vector->Qy,
                                              vector->d, vector->curveName);
@@ -9536,8 +9584,16 @@ static int ecc_test_vector_item(const eccVector* vector)
     if (ret != 0)
         goto done;
 
-    ret = wc_ecc_verify_hash(sig, x, (byte*)vector->msg, vector->msgLen,
-                                                            &verify, &userA);
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0) {
+            ret = wc_ecc_verify_hash(sig, x, (byte*)vector->msg, vector->msgLen,
+                                                               &verify, &userA);
+        }
+    } while (ret == WC_PENDING_E);
+
     if (ret != 0)
         goto done;
 
@@ -9547,6 +9603,8 @@ static int ecc_test_vector_item(const eccVector* vector)
 done:
     wc_ecc_free(&userA);
 
+    FREE_VAR(sig, HEAP_HINT);
+
     return ret;
 }
 
@@ -9812,9 +9870,14 @@ static int ecc_test_key_gen(WC_RNG* rng, int keySize)
 
     ecc_key userA;
 
-    wc_ecc_init(&userA);
+    ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
 
     ret = wc_ecc_make_key(rng, keySize, &userA);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
     if (ret != 0)
         goto done;
 
@@ -9885,23 +9948,19 @@ done:
     return ret;
 }
 #endif /* WOLFSSL_KEY_GEN */
+
 static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
     int curve_id)
 {
-#ifdef BENCH_EMBEDDED
-    byte    sharedA[128]; /* Needs to be at least keySize */
-    byte    sharedB[128]; /* Needs to be at least keySize */
-#else
-    byte    sharedA[1024];
-    byte    sharedB[1024];
-#endif
+    DECLARE_VAR(sharedA, byte, ECC_SHARED_SIZE, HEAP_HINT);
+    DECLARE_VAR(sharedB, byte, ECC_SHARED_SIZE, HEAP_HINT);
 #ifdef HAVE_ECC_KEY_EXPORT
     byte    exportBuf[1024];
 #endif
     word32  x, y;
 #ifdef HAVE_ECC_SIGN
-    byte    sig[1024];
-    byte    digest[20];
+    DECLARE_VAR(sig, byte, ECC_SIG_SIZE, HEAP_HINT);
+    DECLARE_VAR(digest, byte, ECC_DIGEST_SIZE, HEAP_HINT);
     int     i;
 #ifdef HAVE_ECC_VERIFY
     int     verify;
@@ -9912,11 +9971,24 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
 
     (void)testVerifyCount;
 
-    wc_ecc_init(&userA);
-    wc_ecc_init(&userB);
-    wc_ecc_init(&pubKey);
+    XMEMSET(&userA, 0, sizeof(ecc_key));
+    XMEMSET(&userB, 0, sizeof(ecc_key));
+    XMEMSET(&pubKey, 0, sizeof(ecc_key));
+
+    ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_init_ex(&pubKey, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
 
     ret = wc_ecc_make_key_ex(rng, keySize, &userA, curve_id);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
     if (ret != 0)
         goto done;
 
@@ -9925,18 +9997,33 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
         goto done;
 
     ret = wc_ecc_make_key_ex(rng, keySize, &userB, curve_id);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+#endif
     if (ret != 0)
         goto done;
 
 #ifdef HAVE_ECC_DHE
-    x = sizeof(sharedA);
-    ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+    x = ECC_SHARED_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_shared_secret(&userA, &userB, sharedA, &x);
+    } while (ret == WC_PENDING_E);
     if (ret != 0) {
         goto done;
     }
 
-    y = sizeof(sharedB);
-    ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+    y = ECC_SHARED_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_shared_secret(&userB, &userA, sharedB, &y);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         goto done;
 
@@ -9986,8 +10073,14 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
         goto done;
 
 #ifdef HAVE_ECC_DHE
-    y = sizeof(sharedB);
-    ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+    y = ECC_SHARED_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         goto done;
 
@@ -10002,15 +10095,23 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
         if (ret != 0)
             goto done;
         wc_ecc_free(&pubKey);
-        wc_ecc_init(&pubKey);
+        ret = wc_ecc_init_ex(&pubKey, HEAP_HINT, devId);
+        if (ret != 0)
+            goto done;
 
         ret = wc_ecc_import_x963_ex(exportBuf, x, &pubKey, curve_id);
         if (ret != 0)
             goto done;
 
     #ifdef HAVE_ECC_DHE
-        y = sizeof(sharedB);
-        ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+        y = ECC_SHARED_SIZE;
+        do {
+        #if defined(WOLFSSL_ASYNC_CRYPT)
+            ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+        #endif
+            if (ret >= 0)
+                ret = wc_ecc_shared_secret(&userB, &pubKey, sharedB, &y);
+        } while (ret == WC_PENDING_E);
         if (ret != 0)
             goto done;
 
@@ -10023,44 +10124,73 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount,
 #endif /* HAVE_ECC_KEY_EXPORT */
 
 #ifdef HAVE_ECC_SIGN
-#ifdef ECC_SHAMIR /* ECC w/out Shamir has issue with all 0 digest */
+    /* ECC w/out Shamir has issue with all 0 digest */
+    /* WC_BIGINT doesn't have 0 len well on hardware */
+#if defined(ECC_SHAMIR) && !defined(WOLFSSL_ASYNC_CRYPT)
     /* test DSA sign hash with zeros */
-    for (i = 0; i < (int)sizeof(digest); i++) {
+    for (i = 0; i < (int)ECC_DIGEST_SIZE; i++) {
         digest[i] = 0;
     }
 
-    x = sizeof(sig);
-    ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, rng, &userA);
+    x = ECC_SIG_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_sign_hash(digest, ECC_DIGEST_SIZE, sig, &x, rng,
+                                                                        &userA);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         goto done;
 
 #ifdef HAVE_ECC_VERIFY
     for (i=0; i= 0)
+                ret = wc_ecc_verify_hash(sig, x, digest, ECC_DIGEST_SIZE,
+                                                               &verify, &userA);
+        } while (ret == WC_PENDING_E);
         if (ret != 0)
             goto done;
         if (verify != 1)
             ERROR_OUT(-1016, done);
     }
 #endif /* HAVE_ECC_VERIFY */
-#endif /* ECC_SHAMIR */
+#endif /* ECC_SHAMIR && !WOLFSSL_ASYNC_CRYPT */
 
     /* test DSA sign hash with sequence (0,1,2,3,4,...) */
-    for (i = 0; i < (int)sizeof(digest); i++) {
+    for (i = 0; i < (int)ECC_DIGEST_SIZE; i++) {
         digest[i] = (byte)i;
     }
 
-    x = sizeof(sig);
-    ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, rng, &userA);
-
+    x = ECC_SIG_SIZE;
+    do {
+    #if defined(WOLFSSL_ASYNC_CRYPT)
+        ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_CALL_AGAIN);
+    #endif
+        if (ret >= 0)
+            ret = wc_ecc_sign_hash(digest, ECC_DIGEST_SIZE, sig, &x, rng,
+                                                                        &userA);
+    } while (ret == WC_PENDING_E);
     if (ret != 0)
         ERROR_OUT(-1014, done);
 
 #ifdef HAVE_ECC_VERIFY
     for (i=0; i= 0)
+                ret = wc_ecc_verify_hash(sig, x, digest, ECC_DIGEST_SIZE,
+                                                               &verify, &userA);
+        } while (ret == WC_PENDING_E);
         if (ret != 0)
             goto done;
         if (verify != 1)
@@ -10081,6 +10211,13 @@ done:
     wc_ecc_free(&userB);
     wc_ecc_free(&userA);
 
+    FREE_VAR(sharedA, HEAP_HINT);
+    FREE_VAR(sharedB, HEAP_HINT);
+#ifdef HAVE_ECC_SIGN
+    FREE_VAR(sig, HEAP_HINT);
+    FREE_VAR(digest, HEAP_HINT);
+#endif
+
     return ret;
 }
 
@@ -10092,25 +10229,37 @@ static int ecc_test_curve(WC_RNG* rng, int keySize)
 
     ret = ecc_test_curve_size(rng, keySize, ECC_TEST_VERIFY_COUNT, ECC_CURVE_DEF);
     if (ret < 0) {
-        printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret);
-        return ret;
-    }
-
-    #ifdef HAVE_ECC_VECTOR_TEST
-        ret = ecc_test_vector(keySize);
-        if (ret < 0) {
-            printf("ecc_test_vector %d failed!: %d\n", keySize, ret);
+        if (ret == ECC_CURVE_OID_E) {
+            /* ignore error for curves not found */
+            /* some curve sizes are only available with:
+                HAVE_ECC_SECPR2, HAVE_ECC_SECPR3, HAVE_ECC_BRAINPOOL and HAVE_ECC_KOBLITZ */
+        }
+        else {
+            printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret);
             return ret;
         }
-    #endif
+    }
 
-    #ifdef WOLFSSL_KEY_GEN
-        ret = ecc_test_key_gen(rng, keySize);
-        if (ret < 0) {
+#ifdef HAVE_ECC_VECTOR_TEST
+    ret = ecc_test_vector(keySize);
+    if (ret < 0) {
+        printf("ecc_test_vector %d failed!: %d\n", keySize, ret);
+        return ret;
+    }
+#endif
+
+#ifdef WOLFSSL_KEY_GEN
+    ret = ecc_test_key_gen(rng, keySize);
+    if (ret < 0) {
+        if (ret == ECC_CURVE_OID_E) {
+            /* ignore error for curves not found */
+        }
+        else {
             printf("ecc_test_key_gen %d failed!: %d\n", keySize, ret);
             return ret;
         }
-    #endif
+    }
+#endif
 
     return 0;
 }
@@ -10687,7 +10836,7 @@ int ecc_test(void)
 #endif
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -10809,7 +10958,7 @@ done:
 int ecc_encrypt_test(void)
 {
     WC_RNG  rng;
-    int     ret;
+    int     ret = 0;
     ecc_key userA, userB;
     byte    msg[48];
     byte    plain[48];
@@ -10817,126 +10966,153 @@ int ecc_encrypt_test(void)
     word32  outSz   = sizeof(out);
     word32  plainSz = sizeof(plain);
     int     i;
+    ecEncCtx* cliCtx = NULL;
+    ecEncCtx* srvCtx = NULL;
+    byte cliSalt[EXCHANGE_SALT_SZ];
+    byte srvSalt[EXCHANGE_SALT_SZ];
+    const byte* tmpSalt;
+    byte    msg2[48];
+    byte    plain2[48];
+    byte    out2[80];
+    word32  outSz2   = sizeof(out2);
+    word32  plainSz2 = sizeof(plain2);
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
     if (ret != 0)
-        return -3001;
+        return -1001;
 
-    wc_ecc_init(&userA);
-    wc_ecc_init(&userB);
+    XMEMSET(&userA, 0, sizeof(userA));
+    XMEMSET(&userB, 0, sizeof(userB));
+
+    ret = wc_ecc_init_ex(&userA, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_init_ex(&userB, HEAP_HINT, devId);
+    if (ret != 0)
+        goto done;
 
     ret  = wc_ecc_make_key(&rng, 32, &userA);
-    ret += wc_ecc_make_key(&rng, 32, &userB);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userA.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0){
+        ret = -3001; goto done;
+    }
 
-    if (ret != 0)
-        return -3002;
+    ret = wc_ecc_make_key(&rng, 32, &userB);
+#if defined(WOLFSSL_ASYNC_CRYPT)
+    ret = wc_AsyncWait(ret, &userB.asyncDev, WC_ASYNC_FLAG_NONE);
+#endif
+    if (ret != 0){
+        ret = -3002; goto done;
+    }
 
-    for (i = 0; i < 48; i++)
+    /* set message to incrementing 0,1,2,etc... */
+    for (i = 0; i < (int)sizeof(msg); i++)
         msg[i] = i;
 
     /* encrypt msg to B */
     ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz, NULL);
-    if (ret != 0)
-        return -3003;
+    if (ret != 0) {
+        ret = -3003; goto done;
+    }
 
     /* decrypt msg from A */
     ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, NULL);
-    if (ret != 0)
-        return -3004;
-
-    if (XMEMCMP(plain, msg, sizeof(msg)) != 0)
-        return -3005;
-
-
-    {  /* let's verify message exchange works, A is client, B is server */
-        ecEncCtx* cliCtx = wc_ecc_ctx_new(REQ_RESP_CLIENT, &rng);
-        ecEncCtx* srvCtx = wc_ecc_ctx_new(REQ_RESP_SERVER, &rng);
-
-        byte cliSalt[EXCHANGE_SALT_SZ];
-        byte srvSalt[EXCHANGE_SALT_SZ];
-        const byte* tmpSalt;
-
-        if (cliCtx == NULL || srvCtx == NULL)
-            return -3006;
-
-        /* get salt to send to peer */
-        tmpSalt = wc_ecc_ctx_get_own_salt(cliCtx);
-        if (tmpSalt == NULL)
-            return -3007;
-        XMEMCPY(cliSalt, tmpSalt, EXCHANGE_SALT_SZ);
-
-        tmpSalt = wc_ecc_ctx_get_own_salt(srvCtx);
-        if (tmpSalt == NULL)
-            return -3007;
-        XMEMCPY(srvSalt, tmpSalt, EXCHANGE_SALT_SZ);
-
-        /* in actual use, we'd get the peer's salt over the transport */
-        ret  = wc_ecc_ctx_set_peer_salt(cliCtx, srvSalt);
-        ret += wc_ecc_ctx_set_peer_salt(srvCtx, cliSalt);
-
-        ret += wc_ecc_ctx_set_info(cliCtx, (byte*)"wolfSSL MSGE", 11);
-        ret += wc_ecc_ctx_set_info(srvCtx, (byte*)"wolfSSL MSGE", 11);
-
-        if (ret != 0)
-            return -3008;
-
-        /* get encrypted msg (request) to send to B */
-        outSz  = sizeof(out);
-        ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz,cliCtx);
-        if (ret != 0)
-            return -3009;
-
-        /* B decrypts msg (request) from A */
-        plainSz = sizeof(plain);
-        ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, srvCtx);
-        if (ret != 0)
-            return -3010;
-
-        if (XMEMCMP(plain, msg, sizeof(msg)) != 0)
-            return -3011;
-
-        {
-            /* msg2 (response) from B to A */
-            byte    msg2[48];
-            byte    plain2[48];
-            byte    out2[80];
-            word32  outSz2   = sizeof(out2);
-            word32  plainSz2 = sizeof(plain2);
-
-            for (i = 0; i < 48; i++)
-                msg2[i] = i+48;
-
-            /* get encrypted msg (response) to send to B */
-            ret = wc_ecc_encrypt(&userB, &userA, msg2, sizeof(msg2), out2,
-                              &outSz2, srvCtx);
-            if (ret != 0)
-                return -3012;
-
-            /* A decrypts msg (response) from B */
-            ret = wc_ecc_decrypt(&userA, &userB, out2, outSz2, plain2, &plainSz2,
-                             cliCtx);
-            if (ret != 0)
-                return -3013;
-
-            if (XMEMCMP(plain2, msg2, sizeof(msg2)) != 0)
-                return -3014;
-        }
-
-        /* cleanup */
-        wc_ecc_ctx_free(srvCtx);
-        wc_ecc_ctx_free(cliCtx);
+    if (ret != 0) {
+        ret = -3004; goto done;
     }
 
+    if (XMEMCMP(plain, msg, sizeof(msg)) != 0) {
+        ret = -3005; goto done;
+    }
+
+    /* let's verify message exchange works, A is client, B is server */
+    cliCtx = wc_ecc_ctx_new(REQ_RESP_CLIENT, &rng);
+    srvCtx = wc_ecc_ctx_new(REQ_RESP_SERVER, &rng);
+    if (cliCtx == NULL || srvCtx == NULL) {
+        ret = -3006; goto done;
+    }
+
+    /* get salt to send to peer */
+    tmpSalt = wc_ecc_ctx_get_own_salt(cliCtx);
+    if (tmpSalt == NULL) {
+        ret = -3007; goto done;
+    }
+    XMEMCPY(cliSalt, tmpSalt, EXCHANGE_SALT_SZ);
+
+    tmpSalt = wc_ecc_ctx_get_own_salt(srvCtx);
+    if (tmpSalt == NULL) {
+        ret = -3007; goto done;
+    }
+    XMEMCPY(srvSalt, tmpSalt, EXCHANGE_SALT_SZ);
+
+    /* in actual use, we'd get the peer's salt over the transport */
+    ret = wc_ecc_ctx_set_peer_salt(cliCtx, srvSalt);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_ctx_set_peer_salt(srvCtx, cliSalt);
+    if (ret != 0)
+        goto done;
+
+    ret = wc_ecc_ctx_set_info(cliCtx, (byte*)"wolfSSL MSGE", 11);
+    if (ret != 0)
+        goto done;
+    ret = wc_ecc_ctx_set_info(srvCtx, (byte*)"wolfSSL MSGE", 11);
+    if (ret != 0)
+        goto done;
+
+    /* get encrypted msg (request) to send to B */
+    outSz = sizeof(out);
+    ret = wc_ecc_encrypt(&userA, &userB, msg, sizeof(msg), out, &outSz,cliCtx);
+    if (ret != 0)
+        goto done;
+
+    /* B decrypts msg (request) from A */
+    plainSz = sizeof(plain);
+    ret = wc_ecc_decrypt(&userB, &userA, out, outSz, plain, &plainSz, srvCtx);
+    if (ret != 0)
+        goto done;
+
+    if (XMEMCMP(plain, msg, sizeof(msg)) != 0) {
+        ret = -3011; goto done;
+    }
+
+    /* msg2 (response) from B to A */
+    for (i = 0; i < (int)sizeof(msg2); i++)
+        msg2[i] = i + sizeof(msg2);
+
+    /* get encrypted msg (response) to send to B */
+    ret = wc_ecc_encrypt(&userB, &userA, msg2, sizeof(msg2), out2,
+                      &outSz2, srvCtx);
+    if (ret != 0)
+        goto done;
+
+    /* A decrypts msg (response) from B */
+    ret = wc_ecc_decrypt(&userA, &userB, out2, outSz2, plain2, &plainSz2,
+                     cliCtx);
+    if (ret != 0)
+        goto done;
+
+    if (XMEMCMP(plain2, msg2, sizeof(msg2)) != 0) {
+        ret = -3014; goto done;
+    }
+
+done:
+
     /* cleanup */
+    wc_ecc_ctx_free(srvCtx);
+    wc_ecc_ctx_free(cliCtx);
+
     wc_ecc_free(&userB);
     wc_ecc_free(&userA);
     wc_FreeRng(&rng);
 
-    return 0;
+    return ret;
 }
 
 #endif /* HAVE_ECC_ENCRYPT */
@@ -10974,7 +11150,7 @@ int ecc_test_buffers() {
         return -41;
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -11100,7 +11276,7 @@ int curve25519_test(void)
 #endif /* HAVE_CURVE25519_SHARED_SECRET */
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -11560,7 +11736,7 @@ int ed25519_test(void)
 
     /* create ed25519 keys */
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -12573,7 +12749,7 @@ int pkcs7signed_test(void)
 #endif /* USE_CERT_BUFFER_ */
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -12613,7 +12789,7 @@ int pkcs7signed_test(void)
         transId[0] = 0x13;
         transId[1] = SHA_DIGEST_SIZE * 2;
 
-        ret = wc_InitSha(&sha);
+        ret = wc_InitSha_ex(&sha, HEAP_HINT, devId);
         if (ret != 0) {
             XFREE(certDer, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
             XFREE(keyDer, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER);
@@ -12622,6 +12798,7 @@ int pkcs7signed_test(void)
         }
         wc_ShaUpdate(&sha, msg.publicKey, msg.publicKeySz);
         wc_ShaFinal(&sha, digest);
+        wc_ShaFree(&sha);
 
         for (i = 0, j = 2; i < SHA_DIGEST_SIZE; i++, j += 2) {
             snprintf((char*)&transId[j], 3, "%02x", digest[i]);
@@ -12751,7 +12928,7 @@ int mp_test()
     mp_init_copy(&p, &a);
 
 #ifndef HAVE_FIPS
-    ret = wc_InitRng_ex(&rng, HEAP_HINT);
+    ret = wc_InitRng_ex(&rng, HEAP_HINT, devId);
 #else
     ret = wc_InitRng(&rng);
 #endif
@@ -12989,28 +13166,43 @@ static void *my_Realloc_cb(void *ptr, size_t size)
 
 int memcb_test()
 {
+    int ret = 0;
     byte* b = NULL;
+    wolfSSL_Malloc_cb  mc;
+    wolfSSL_Free_cb    fc;
+    wolfSSL_Realloc_cb rc;
 
+    /* Save existing memory callbacks */
+    if (wolfSSL_GetAllocators(&mc, &fc, &rc) != 0)
+        return -12103;
+
+    /* test realloc */
     b = (byte*)XREALLOC(b, 1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
+    if (b == NULL) {
+        ERROR_OUT(-12104, exit_memcb);
+    }
     XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
     b = NULL;
 
     /* Parameter Validation testing. */
     if (wolfSSL_SetAllocators(NULL, (wolfSSL_Free_cb)&my_Free_cb,
-            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG)
-        return -12100;
+            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG) {
+        ERROR_OUT(-12100, exit_memcb);
+    }
     if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)&my_Malloc_cb, NULL,
-            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG)
-        return -12101;
+            (wolfSSL_Realloc_cb)&my_Realloc_cb) != BAD_FUNC_ARG) {
+        ERROR_OUT(-12101, exit_memcb);
+    }
     if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)&my_Malloc_cb,
-            (wolfSSL_Free_cb)&my_Free_cb, NULL) != BAD_FUNC_ARG)
-        return -12102;
+            (wolfSSL_Free_cb)&my_Free_cb, NULL) != BAD_FUNC_ARG) {
+        ERROR_OUT(-12102, exit_memcb);
+    }
 
     /* Use API. */
     if (wolfSSL_SetAllocators((wolfSSL_Malloc_cb)&my_Malloc_cb,
-            (wolfSSL_Free_cb)&my_Free_cb, (wolfSSL_Realloc_cb)my_Realloc_cb)
-            != 0)
-        return -12100;
+        (wolfSSL_Free_cb)&my_Free_cb, (wolfSSL_Realloc_cb)my_Realloc_cb) != 0) {
+        ERROR_OUT(-12100, exit_memcb);
+    }
 
     b = (byte*)XMALLOC(1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
     b = (byte*)XREALLOC(b, 1024, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -13021,8 +13213,14 @@ int memcb_test()
 #else
     if (malloc_cnt != 0 || free_cnt != 0 || realloc_cnt != 0)
 #endif
-        return -12110;
-    return 0;
+        ret = -12110;
+
+exit_memcb:
+
+    /* restore memory callbacks */
+    wolfSSL_SetAllocators(mc, fc, rc);
+
+    return ret;
 }
 #endif
 
diff --git a/wolfssl/internal.h b/wolfssl/internal.h
index 0ee4b9c83..9340a0e65 100755
--- a/wolfssl/internal.h
+++ b/wolfssl/internal.h
@@ -81,18 +81,24 @@
 #ifdef WOLFSSL_SHA512
     #include 
 #endif
-
 #ifdef HAVE_AESGCM
     #include 
 #endif
-
 #ifdef WOLFSSL_RIPEMD
     #include 
 #endif
-
 #ifdef HAVE_IDEA
     #include 
 #endif
+#ifndef NO_RSA
+    #include 
+#endif
+#ifdef HAVE_ECC
+    #include 
+#endif
+#ifndef NO_DH
+    #include 
+#endif
 
 #include 
 
@@ -1778,7 +1784,7 @@ typedef struct {
 } CertificateStatusRequest;
 
 WOLFSSL_LOCAL int   TLSX_UseCertificateStatusRequest(TLSX** extensions,
-                                    byte status_type, byte options, void* heap);
+                                    byte status_type, byte options, void* heap, int devId);
 WOLFSSL_LOCAL int   TLSX_CSR_InitRequest(TLSX* extensions, DecodedCert* cert,
                                                                     void* heap);
 WOLFSSL_LOCAL void* TLSX_CSR_GetRequest(TLSX* extensions);
@@ -1800,7 +1806,7 @@ typedef struct CSRIv2 {
 } CertificateStatusRequestItemV2;
 
 WOLFSSL_LOCAL int   TLSX_UseCertificateStatusRequestV2(TLSX** extensions,
-                                    byte status_type, byte options, void* heap);
+                                    byte status_type, byte options, void* heap, int devId);
 WOLFSSL_LOCAL int   TLSX_CSR2_InitRequests(TLSX* extensions, DecodedCert* cert,
                                                        byte isPeer, void* heap);
 WOLFSSL_LOCAL void* TLSX_CSR2_GetRequest(TLSX* extensions, byte status_type,
@@ -2208,6 +2214,10 @@ typedef struct Ciphers {
 #endif
 #if defined(BUILD_AES) || defined(BUILD_AESGCM)
     Aes*    aes;
+    #if defined(BUILD_AESGCM) || defined(HAVE_AESCCM)
+        byte* additional;
+        byte* nonce;
+    #endif
 #endif
 #ifdef HAVE_CAMELLIA
     Camellia* cam;
@@ -2224,6 +2234,7 @@ typedef struct Ciphers {
 #ifdef HAVE_IDEA
     Idea* idea;
 #endif
+    byte    state;
     byte    setup;       /* have we set it up flag for detection */
 } Ciphers;
 
@@ -2356,16 +2367,6 @@ enum AcceptState {
     ACCEPT_THIRD_REPLY_DONE
 };
 
-/* sub-states for send/do key share (key exchange) */
-enum KeyShareState {
-    KEYSHARE_BEGIN = 0,
-    KEYSHARE_BUILD,
-    KEYSHARE_DO,
-    KEYSHARE_VERIFY,
-    KEYSHARE_FINALIZE,
-    KEYSHARE_END
-};
-
 /* buffers for struct WOLFSSL */
 typedef struct Buffers {
     bufferStatic    inputBuffer;
@@ -2387,6 +2388,7 @@ typedef struct Buffers {
     buffer          serverDH_G;            /* WOLFSSL_CTX owns, unless we own */
     buffer          serverDH_Pub;
     buffer          serverDH_Priv;
+    DhKey*          serverDH_Key;
 #endif
 #ifndef NO_CERTS
     DerBuffer*      certificate;           /* WOLFSSL_CTX owns, unless we own */
@@ -2498,8 +2500,8 @@ typedef struct Options {
     byte            minDowngrade;       /* minimum downgrade version */
     byte            connectState;       /* nonblocking resume */
     byte            acceptState;        /* nonblocking resume */
-    byte            keyShareState;      /* sub-state for key share (key exchange).
-                                           See enum KeyShareState. */
+    byte            keyShareState;      /* sub-state for enum keyShareState */
+    byte            buildMsgState;      /* sub-state for enum buildMsgState */
 #ifndef NO_DH
     word16          minDhKeySz;         /* minimum DH key size */
     word16          dhKeySz;            /* actual DH key size */
@@ -2515,6 +2517,7 @@ typedef struct Options {
 
 typedef struct Arrays {
     byte*           pendingMsg;         /* defrag buffer */
+    byte*           preMasterSecret;
     word32          preMasterSz;        /* differs for DH, actual size */
     word32          pendingMsgSz;       /* defrag buffer size */
     word32          pendingMsgOffset;   /* current offset into defrag buffer */
@@ -2528,7 +2531,6 @@ typedef struct Arrays {
     byte            serverRandom[RAN_LEN];
     byte            sessionID[ID_LEN];
     byte            sessionIDSz;
-    byte            preMasterSecret[ENCRYPT_LEN];
     byte            masterSecret[SECRET_LEN];
 #ifdef WOLFSSL_DTLS
     byte            cookie[MAX_COOKIE_LEN];
@@ -2750,6 +2752,17 @@ typedef struct HS_Hashes {
 } HS_Hashes;
 
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #define MAX_ASYNC_ARGS 16
+    typedef void (*FreeArgsCb)(struct WOLFSSL* ssl, void* pArgs);
+
+    struct WOLFSSL_ASYNC {
+        WC_ASYNC_DEV* dev;
+        FreeArgsCb    freeArgs; /* function pointer to cleanup args */
+        word32        args[MAX_ASYNC_ARGS]; /* holder for current args */
+    };
+#endif
+
 #ifdef HAVE_WRITE_DUP
 
     #define WRITE_DUP_SIDE 1
@@ -2791,12 +2804,10 @@ struct WOLFSSL {
     void*           hsDoneCtx;         /*  user handshake cb context  */
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptSSLState  async;
-    AsyncCryptDev       asyncDev;
+    struct WOLFSSL_ASYNC async;
 #endif
-    void*           sigKey;             /* RsaKey or ecc_key allocated from heap */
-    word32          sigType;            /* Type of sigKey */
-    word32          sigLen;             /* Actual signature length */
+    void*           hsKey;              /* Handshake key (RsaKey or ecc_key) allocated from heap */
+    word32          hsType;             /* Type of Handshake key (hsKey) */
     WOLFSSL_CIPHER  cipher;
     hmacfp          hmac;
     Ciphers         encrypt;
@@ -2856,6 +2867,7 @@ struct WOLFSSL {
     ecc_key*        peerEccKey;              /* peer's  ECDHE key */
     ecc_key*        peerEccDsaKey;           /* peer's  ECDSA key */
     ecc_key*        eccTempKey;              /* private ECDHE key */
+    int             eccVerifyRes;
     word32          pkCurveOID;              /* curve Ecc_Sum     */
     word32          ecdhCurveOID;            /* curve Ecc_Sum     */
     word16          eccTempKeySz;            /* in octets 20 - 66 */
@@ -2977,9 +2989,6 @@ struct WOLFSSL {
 #ifdef WOLFSSL_JNI
         void* jObjectRef;     /* reference to WolfSSLSession in JNI wrapper */
 #endif /* WOLFSSL_JNI */
-#ifdef HAVE_WOLF_EVENT
-    WOLF_EVENT event;
-#endif /* HAVE_WOLF_EVENT */
 };
 
 
@@ -3241,7 +3250,7 @@ WOLFSSL_LOCAL word32  LowResTimer(void);
     WOLFSSL_LOCAL int  CopyDecodedToX509(WOLFSSL_X509*, DecodedCert*);
 #endif
 
-/* used by ssl.c and wolfssl_int.c */
+/* used by ssl.c and internal.c */
 WOLFSSL_LOCAL void c32to24(word32 in, word24 out);
 
 WOLFSSL_LOCAL const char* const* GetCipherNames(void);
@@ -3261,19 +3270,14 @@ WOLFSSL_LOCAL int SetKeysSide(WOLFSSL*, enum encrypt_side);
 
 
 #ifndef NO_DH
-    WOLFSSL_LOCAL int DhGenKeyPair(WOLFSSL* ssl,
-        byte* p, word32 pSz,
-        byte* g, word32 gSz,
+    WOLFSSL_LOCAL int DhGenKeyPair(WOLFSSL* ssl, DhKey* dhKey,
         byte* priv, word32* privSz,
         byte* pub, word32* pubSz);
-    WOLFSSL_LOCAL int DhAgree(WOLFSSL* ssl,
-        byte* p, word32 pSz,
-        byte* g, word32 gSz,
-        byte* priv, word32* privSz,
-        byte* pub, word32* pubSz,
+    WOLFSSL_LOCAL int DhAgree(WOLFSSL* ssl, DhKey* dhKey,
+        const byte* priv, word32 privSz,
         const byte* otherPub, word32 otherPubSz,
         byte* agree, word32* agreeSz);
-#endif
+#endif /* !NO_DH */
 
 #ifdef HAVE_ECC
     WOLFSSL_LOCAL int EccMakeKey(WOLFSSL* ssl, ecc_key* key, ecc_key* peer);
@@ -3281,7 +3285,17 @@ WOLFSSL_LOCAL int SetKeysSide(WOLFSSL*, enum encrypt_side);
 
 WOLFSSL_LOCAL int BuildMessage(WOLFSSL* ssl, byte* output, int outSz,
                         const byte* input, int inSz, int type, int hashOutput,
-                        int sizeOnly);
+                        int sizeOnly, int asyncOkay);
+
+WOLFSSL_LOCAL int AllocKey(WOLFSSL* ssl, int type, void** pKey);
+WOLFSSL_LOCAL void FreeKey(WOLFSSL* ssl, int type, void** pKey);
+
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WOLFSSL_LOCAL int wolfSSL_AsyncPop(WOLFSSL* ssl, byte* state);
+    WOLFSSL_LOCAL int wolfSSL_AsyncPush(WOLFSSL* ssl, WC_ASYNC_DEV* asyncDev,
+                                        word32 flags);
+#endif
+
 
 #ifdef __cplusplus
     }  /* extern "C" */
diff --git a/wolfssl/openssl/md5.h b/wolfssl/openssl/md5.h
index 2e8620825..c4f05d30c 100644
--- a/wolfssl/openssl/md5.h
+++ b/wolfssl/openssl/md5.h
@@ -18,7 +18,7 @@
 
 
 typedef struct WOLFSSL_MD5_CTX {
-    int holder[24];   /* big enough to hold wolfcrypt md5, but check on init */
+    int holder[28 + (WC_ASYNC_DEV_SIZE / sizeof(int))];   /* big enough to hold wolfcrypt md5, but check on init */
 } WOLFSSL_MD5_CTX;
 
 WOLFSSL_API void wolfSSL_MD5_Init(WOLFSSL_MD5_CTX*);
diff --git a/wolfssl/openssl/sha.h b/wolfssl/openssl/sha.h
index d9e168129..7495d4a37 100644
--- a/wolfssl/openssl/sha.h
+++ b/wolfssl/openssl/sha.h
@@ -17,7 +17,8 @@
 
 
 typedef struct WOLFSSL_SHA_CTX {
-    int holder[24];   /* big enough to hold wolfcrypt sha, but check on init */
+    /* big enough to hold wolfcrypt Sha, but check on init */
+    int holder[28 + (WC_ASYNC_DEV_SIZE / sizeof(int))];
 } WOLFSSL_SHA_CTX;
 
 WOLFSSL_API void wolfSSL_SHA_Init(WOLFSSL_SHA_CTX*);
@@ -51,7 +52,8 @@ typedef WOLFSSL_SHA_CTX SHA_CTX;
  * struct are 16 byte aligned. Any derefrence to those elements after casting to
  * Sha224, is expected to also be 16 byte aligned addresses.  */
 typedef struct WOLFSSL_SHA224_CTX {
-    ALIGN16 long long holder[28];   /* big enough, but check on init */
+    /* big enough to hold wolfcrypt Sha224, but check on init */
+    ALIGN16 int holder[34 + (WC_ASYNC_DEV_SIZE / sizeof(int))];
 } WOLFSSL_SHA224_CTX;
 
 WOLFSSL_API void wolfSSL_SHA224_Init(WOLFSSL_SHA224_CTX*);
@@ -77,7 +79,8 @@ typedef WOLFSSL_SHA224_CTX SHA224_CTX;
  * struct are 16 byte aligned. Any derefrence to those elements after casting to
  * Sha256, is expected to also be 16 byte aligned addresses.  */
 typedef struct WOLFSSL_SHA256_CTX {
-    ALIGN16 int holder[28];   /* big enough to hold wolfcrypt sha, but check on init */
+    /* big enough to hold wolfcrypt Sha256, but check on init */
+    ALIGN16 int holder[34 + (WC_ASYNC_DEV_SIZE / sizeof(int))];
 } WOLFSSL_SHA256_CTX;
 
 WOLFSSL_API void wolfSSL_SHA256_Init(WOLFSSL_SHA256_CTX*);
@@ -100,7 +103,8 @@ typedef WOLFSSL_SHA256_CTX SHA256_CTX;
 #ifdef WOLFSSL_SHA384
 
 typedef struct WOLFSSL_SHA384_CTX {
-    long long holder[32];   /* big enough, but check on init */
+    /* big enough to hold wolfCrypt Sha384, but check on init */
+    long long holder[32 + (WC_ASYNC_DEV_SIZE / sizeof(long long))];
 } WOLFSSL_SHA384_CTX;
 
 WOLFSSL_API void wolfSSL_SHA384_Init(WOLFSSL_SHA384_CTX*);
@@ -124,7 +128,8 @@ typedef WOLFSSL_SHA384_CTX SHA384_CTX;
 #ifdef WOLFSSL_SHA512
 
 typedef struct WOLFSSL_SHA512_CTX {
-    long long holder[36];   /* big enough, but check on init */
+    /* big enough to hold wolfCrypt Sha384, but check on init */
+    long long holder[36 + (WC_ASYNC_DEV_SIZE / sizeof(long long))];
 } WOLFSSL_SHA512_CTX;
 
 WOLFSSL_API void wolfSSL_SHA512_Init(WOLFSSL_SHA512_CTX*);
diff --git a/wolfssl/test.h b/wolfssl/test.h
index d481c4b03..81fa0e8fa 100644
--- a/wolfssl/test.h
+++ b/wolfssl/test.h
@@ -10,7 +10,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #ifdef ATOMIC_USER
     #include 
@@ -19,6 +18,9 @@
 #endif
 #ifdef HAVE_PK_CALLBACKS
     #include 
+    #ifndef NO_RSA
+        #include 
+    #endif
     #ifdef HAVE_ECC
         #include 
     #endif /* HAVE_ECC */
diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h
old mode 100644
new mode 100755
index 2b3c4e576..785edc181
--- a/wolfssl/wolfcrypt/aes.h
+++ b/wolfssl/wolfcrypt/aes.h
@@ -73,6 +73,7 @@ typedef struct Aes {
     /* AESNI needs key first, rounds 2nd, not sure why yet */
     ALIGN16 word32 key[60];
     word32  rounds;
+    int     keylen;
 
     ALIGN16 word32 reg[AES_BLOCK_SIZE / sizeof(word32)];      /* for CBC mode */
     ALIGN16 word32 tmp[AES_BLOCK_SIZE / sizeof(word32)];      /* same         */
@@ -88,10 +89,9 @@ typedef struct Aes {
     byte use_aesni;
 #endif /* WOLFSSL_AESNI */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
-    #ifdef HAVE_CAVIUM
-        AesType type;                       /* aes key type */
-    #endif
+    const byte* asyncKey;
+    const byte* asyncIv;
+    WC_ASYNC_DEV asyncDev;
 #endif /* WOLFSSL_ASYNC_CRYPT */
 #ifdef WOLFSSL_AES_COUNTER
     word32  left;            /* unused bytes left from last call */
@@ -99,10 +99,6 @@ typedef struct Aes {
 #ifdef WOLFSSL_PIC32MZ_CRYPT
     word32 key_ce[AES_BLOCK_SIZE*2/sizeof(word32)] ;
     word32 iv_ce [AES_BLOCK_SIZE  /sizeof(word32)] ;
-    int    keylen ;
-#endif
-#ifdef WOLFSSL_TI_CRYPT
-    int    keylen ;
 #endif
     void*  heap; /* memory hint to use */
 } Aes;
@@ -115,7 +111,20 @@ typedef struct Gmac {
 #endif /* HAVE_AESGCM */
 #endif /* HAVE_FIPS */
 
-WOLFSSL_LOCAL int  wc_InitAes_h(Aes* aes, void* h);
+
+/* Authenticate cipher function prototypes */
+typedef int (*wc_AesAuthEncryptFunc)(Aes* aes, byte* out,
+                                   const byte* in, word32 sz,
+                                   const byte* iv, word32 ivSz,
+                                   byte* authTag, word32 authTagSz,
+                                   const byte* authIn, word32 authInSz);
+typedef int (*wc_AesAuthDecryptFunc)(Aes* aes, byte* out,
+                                   const byte* in, word32 sz,
+                                   const byte* iv, word32 ivSz,
+                                   const byte* authTag, word32 authTagSz,
+                                   const byte* authIn, word32 authInSz);
+
+/* AES-CBC */
 WOLFSSL_API int  wc_AesSetKey(Aes* aes, const byte* key, word32 len,
                               const byte* iv, int dir);
 WOLFSSL_API int  wc_AesSetIV(Aes* aes, const byte* iv);
@@ -187,10 +196,8 @@ WOLFSSL_API int wc_AesEcbDecrypt(Aes* aes, byte* out,
 
 WOLFSSL_API int wc_AesGetKeySize(Aes* aes, word32* keySize);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-     WOLFSSL_API int  wc_AesAsyncInit(Aes*, int);
-     WOLFSSL_API void wc_AesAsyncFree(Aes*);
-#endif
+WOLFSSL_API int  wc_AesInit(Aes*, void*, int);
+WOLFSSL_API void wc_AesFree(Aes*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/arc4.h b/wolfssl/wolfcrypt/arc4.h
index 752f1d062..aab0fb984 100644
--- a/wolfssl/wolfcrypt/arc4.h
+++ b/wolfssl/wolfcrypt/arc4.h
@@ -45,17 +45,16 @@ typedef struct Arc4 {
     byte y;
     byte state[ARC4_STATE_SIZE];
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
 #endif
+    void* heap;
 } Arc4;
 
-WOLFSSL_API void wc_Arc4Process(Arc4*, byte*, const byte*, word32);
-WOLFSSL_API void wc_Arc4SetKey(Arc4*, const byte*, word32);
+WOLFSSL_API int wc_Arc4Process(Arc4*, byte*, const byte*, word32);
+WOLFSSL_API int wc_Arc4SetKey(Arc4*, const byte*, word32);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_Arc4AsyncInit(Arc4*, int);
-    WOLFSSL_API void wc_Arc4AsyncFree(Arc4*);
-#endif
+WOLFSSL_API int  wc_Arc4Init(Arc4*, void*, int);
+WOLFSSL_API void wc_Arc4Free(Arc4*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/asn.h b/wolfssl/wolfcrypt/asn.h
index b7377b9fd..f4b0c5e0b 100644
--- a/wolfssl/wolfcrypt/asn.h
+++ b/wolfssl/wolfcrypt/asn.h
@@ -28,9 +28,6 @@
 #ifndef NO_ASN
 
 #include 
-#ifndef NO_RSA
-    #include 
-#endif
 
 /* fips declare of RsaPrivateKeyDecode @wc_fips */
 #if defined(HAVE_FIPS) && !defined(NO_RSA)
@@ -51,9 +48,7 @@
 #endif
 #include 
 #include    /* public interface */
-#ifdef HAVE_ECC
-    #include 
-#endif
+
 
 #ifdef __cplusplus
     extern "C" {
@@ -418,6 +413,60 @@ struct DecodedName {
     int     serialLen;
 };
 
+enum SignatureState {
+    SIG_STATE_BEGIN,
+    SIG_STATE_HASH,
+    SIG_STATE_KEY,
+    SIG_STATE_DO,
+    SIG_STATE_CHECK,
+};
+
+struct SignatureCtx {
+    void* heap;
+    byte* digest;
+#ifndef NO_RSA
+    byte* out;
+    byte* plain;
+#endif
+#ifdef HAVE_ECC
+    int verify;
+#endif
+    union {
+    #ifndef NO_RSA
+        struct RsaKey* rsa;
+    #endif
+    #ifdef HAVE_ECC
+        struct ecc_key* ecc;
+    #endif
+        void* ptr;
+    } key;
+    int devId;
+    int state;
+    int typeH;
+    int digestSz;
+    word32 keyOID;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV* asyncDev;
+#endif
+};
+
+enum CertSignState {
+    CERTSIGN_STATE_BEGIN,
+    CERTSIGN_STATE_DIGEST,
+    CERTSIGN_STATE_ENCODE,
+    CERTSIGN_STATE_DO,
+};
+
+struct CertSignCtx {
+    byte* sig;
+    byte* digest;
+    #ifndef NO_RSA
+        byte* encSig;
+        int encSigSz;
+    #endif
+    int state; /* enum CertSignState */
+};
+
 
 typedef struct DecodedCert DecodedCert;
 typedef struct DecodedName DecodedName;
@@ -425,6 +474,8 @@ typedef struct Signer      Signer;
 #ifdef WOLFSSL_TRUST_PEER_CERT
 typedef struct TrustedPeerCert TrustedPeerCert;
 #endif /* WOLFSSL_TRUST_PEER_CERT */
+typedef struct SignatureCtx SignatureCtx;
+typedef struct CertSignCtx  CertSignCtx;
 
 
 struct DecodedCert {
@@ -566,6 +617,9 @@ struct DecodedCert {
     char    extCertPolicies[MAX_CERTPOL_NB][MAX_CERTPOL_SZ];
     int     extCertPoliciesNb;
 #endif /* WOLFSSL_CERT_EXT */
+
+    Signer* ca;
+    SignatureCtx sigCtx;
 };
 
 
@@ -747,6 +801,10 @@ WOLFSSL_LOCAL int wc_CheckPrivateKey(byte* key, word32 keySz, DecodedCert* der);
                                        mp_int* r, mp_int* s);
 #endif
 
+WOLFSSL_LOCAL void InitSignatureCtx(SignatureCtx* sigCtx, void* heap, int devId);
+WOLFSSL_LOCAL void FreeSignatureCtx(SignatureCtx* sigCtx);
+
+
 #ifdef WOLFSSL_CERT_GEN
 
 enum cert_enums {
diff --git a/wolfssl/wolfcrypt/asn_public.h b/wolfssl/wolfcrypt/asn_public.h
index 1805deb26..f70a4bca7 100644
--- a/wolfssl/wolfcrypt/asn_public.h
+++ b/wolfssl/wolfcrypt/asn_public.h
@@ -25,17 +25,16 @@
 #define WOLF_CRYPT_ASN_PUBLIC_H
 
 #include 
-#ifdef HAVE_ECC
-    #include 
-#endif
-#if defined(WOLFSSL_CERT_GEN) && !defined(NO_RSA)
-    #include 
-#endif
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
+/* Opaque keys. Only key pointers are used for arguments */
+typedef struct ecc_key ecc_key;
+typedef struct RsaKey RsaKey;
+typedef struct WC_RNG WC_RNG;
+
 /* Certificate file Type */
 enum CertType {
     CERT_TYPE       = 0,
@@ -95,14 +94,8 @@ enum Ctc_Misc {
 #endif /* WOLFSSL_CERT_EXT */
 };
 
-#ifdef WOLFSSL_CERT_GEN
 
-#ifndef HAVE_ECC
-    typedef struct ecc_key ecc_key;
-#endif
-#ifdef NO_RSA
-    typedef struct RsaKey RsaKey;
-#endif
+#ifdef WOLFSSL_CERT_GEN
 
 typedef struct CertName {
     char country[CTC_NAME_SIZE];
diff --git a/wolfssl/wolfcrypt/des3.h b/wolfssl/wolfcrypt/des3.h
index 409aa81f7..6662501e5 100644
--- a/wolfssl/wolfcrypt/des3.h
+++ b/wolfssl/wolfcrypt/des3.h
@@ -80,11 +80,15 @@ typedef struct Des3 {
     word32 reg[DES_BLOCK_SIZE / sizeof(word32)];      /* for CBC mode */
     word32 tmp[DES_BLOCK_SIZE / sizeof(word32)];      /* same         */
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    const byte* key_raw;
+    const byte* iv_raw;
+    WC_ASYNC_DEV asyncDev;
 #endif
+    void* heap;
 } Des3;
 #endif /* HAVE_FIPS */
 
+
 WOLFSSL_API int  wc_Des_SetKey(Des* des, const byte* key,
                                const byte* iv, int dir);
 WOLFSSL_API void wc_Des_SetIV(Des* des, const byte* iv);
@@ -109,10 +113,8 @@ WOLFSSL_API int  wc_Des3_CbcEncrypt(Des3* des, byte* out,
 WOLFSSL_API int  wc_Des3_CbcDecrypt(Des3* des, byte* out,
                                     const byte* in,word32 sz);
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_Des3AsyncInit(Des3*, int);
-    WOLFSSL_API void wc_Des3AsyncFree(Des3*);
-#endif
+WOLFSSL_API int  wc_Des3Init(Des3*, void*, int);
+WOLFSSL_API void wc_Des3Free(Des3*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/dh.h b/wolfssl/wolfcrypt/dh.h
index 2410ab777..be40c5c11 100644
--- a/wolfssl/wolfcrypt/dh.h
+++ b/wolfssl/wolfcrypt/dh.h
@@ -34,14 +34,22 @@
     extern "C" {
 #endif
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
 
 /* Diffie-Hellman Key */
 typedef struct DhKey {
     mp_int p, g;                            /* group parameters  */
+    void* heap;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif
 } DhKey;
 
 
 WOLFSSL_API int wc_InitDhKey(DhKey* key);
+WOLFSSL_API int wc_InitDhKey_ex(DhKey* key, void* heap, int devId);
 WOLFSSL_API void wc_FreeDhKey(DhKey* key);
 
 WOLFSSL_API int wc_DhGenerateKeyPair(DhKey* key, WC_RNG* rng, byte* priv,
@@ -58,7 +66,6 @@ WOLFSSL_API int wc_DhParamsLoad(const byte* input, word32 inSz, byte* p,
                             word32* pInOutSz, byte* g, word32* gInOutSz);
 WOLFSSL_API int wc_DhCheckPubKey(DhKey* key, const byte* pub, word32 pubSz);
 
-
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h
index 520a22679..beb3ec54f 100644
--- a/wolfssl/wolfcrypt/ecc.h
+++ b/wolfssl/wolfcrypt/ecc.h
@@ -36,6 +36,9 @@
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
+    #ifdef WOLFSSL_CERT_GEN
+        #include 
+    #endif
 #endif
 
 #ifdef WOLFSSL_ATECC508A
@@ -105,7 +108,7 @@ enum {
     ECC_MAXSIZE_GEN = 74,   /* MAX Buffer size required when generating ECC keys*/
     ECC_MAX_PAD_SZ  = 4,    /* ECC maximum padding size */
     ECC_MAX_OID_LEN = 16,
-    ECC_MAX_SIG_SIZE= ((MAX_ECC_BYTES * 2) + SIG_HEADER_SZ)
+    ECC_MAX_SIG_SIZE= ((MAX_ECC_BYTES * 2) + ECC_MAX_PAD_SZ + SIG_HEADER_SZ)
 };
 
 /* Curve Types */
@@ -234,6 +237,7 @@ typedef struct alt_fp_int {
 } alt_fp_int;
 #endif /* ALT_ECC_SIZE */
 
+
 /* A point on an ECC curve, stored in Jacbobian format such that (x,y,z) =>
    (x/z^2, y/z^3, 1) when interpreted as affine */
 typedef struct {
@@ -276,10 +280,13 @@ typedef struct ecc_key {
     mp_int    k;        /* private key */
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    mp_int*   r;        /* sign/verify temps */
-    mp_int*   s;
-    AsyncCryptDev asyncDev;
-#endif
+    mp_int* r;          /* sign/verify temps */
+    mp_int* s;
+    WC_ASYNC_DEV asyncDev;
+    #ifdef WOLFSSL_CERT_GEN
+        CertSignCtx certSignCtx; /* context info for cert sign (MakeSignature) */
+    #endif
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } ecc_key;
 
 
@@ -547,14 +554,10 @@ WOLFSSL_API int wc_X963_KDF(enum wc_HashType type, const byte* secret,
 #endif
 
 #ifdef ECC_CACHE_CURVE
+WOLFSSL_API int wc_ecc_curve_cache_init(void);
 WOLFSSL_API void wc_ecc_curve_cache_free(void);
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int wc_ecc_async_handle(ecc_key* key,
-        WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
-    WOLFSSL_API int wc_ecc_async_wait(int ret, ecc_key* key);
-#endif
 
 #ifdef __cplusplus
     }    /* extern "C" */
diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h
index 075336911..df29accc4 100644
--- a/wolfssl/wolfcrypt/error-crypt.h
+++ b/wolfssl/wolfcrypt/error-crypt.h
@@ -186,6 +186,9 @@ enum {
     DH_CHECK_PUB_E      = -243,  /* DH Check Pub Key error */
     BAD_PATH_ERROR      = -244,  /* Bad path for opendir */
 
+    ASYNC_OP_E          = -245,  /* Async operation error */
+
+    WC_LAST_E           = -245,  /* Update this to indicate last error */
     MIN_CODE_E          = -300   /* errors -101 - -299 */
 
     /* add new companion error id strings for any new error codes
diff --git a/wolfssl/wolfcrypt/hash.h b/wolfssl/wolfcrypt/hash.h
index fa1883bc6..beaeb65ea 100644
--- a/wolfssl/wolfcrypt/hash.h
+++ b/wolfssl/wolfcrypt/hash.h
@@ -114,59 +114,32 @@ WOLFSSL_API int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type,
 
 #ifndef NO_MD5
 #include 
-WOLFSSL_API void wc_Md5GetHash(Md5*, byte*);
-WOLFSSL_API void wc_Md5RestorePos(Md5*, Md5*);
-#if defined(WOLFSSL_TI_HASH)
-    WOLFSSL_API void wc_Md5Free(Md5*);
-#else
-    #define wc_Md5Free(d)
-#endif
+WOLFSSL_API int wc_Md5Hash(const byte* data, word32 len, byte* hash);
 #endif
 
 #ifndef NO_SHA
 #include 
-WOLFSSL_API int wc_ShaGetHash(Sha*, byte*);
-WOLFSSL_API void wc_ShaRestorePos(Sha*, Sha*);
 WOLFSSL_API int wc_ShaHash(const byte*, word32, byte*);
-#if defined(WOLFSSL_TI_HASH)
-     WOLFSSL_API void wc_ShaFree(Sha*);
-#else
-    #define wc_ShaFree(d)
-#endif
 #endif
 
 #ifndef NO_SHA256
 #include 
-WOLFSSL_API int wc_Sha256GetHash(Sha256*, byte*);
-WOLFSSL_API void wc_Sha256RestorePos(Sha256*, Sha256*);
 WOLFSSL_API int wc_Sha256Hash(const byte*, word32, byte*);
-#if defined(WOLFSSL_TI_HASH)
-    WOLFSSL_API void wc_Sha256Free(Sha256*);
-#else
-    #define wc_Sha256Free(d)
-#endif
 
     #if defined(WOLFSSL_SHA224)
-        WOLFSSL_API int wc_Sha224GetHash(Sha224*, byte*);
         WOLFSSL_API int wc_Sha224Hash(const byte*, word32, byte*);
-        #define wc_Sha224Free(d)
     #endif /* defined(WOLFSSL_SHA224) */
 #endif
 
 #ifdef WOLFSSL_SHA512
 #include 
-WOLFSSL_API int wc_Sha512GetHash(Sha512*, byte*);
 WOLFSSL_API int wc_Sha512Hash(const byte*, word32, byte*);
-#define wc_Sha512Free(d)
 
     #if defined(WOLFSSL_SHA384)
-        WOLFSSL_API int wc_Sha384GetHash(Sha384*, byte*);
         WOLFSSL_API int wc_Sha384Hash(const byte*, word32, byte*);
-        #define wc_Sha384Free(d)
     #endif /* defined(WOLFSSL_SHA384) */
 #endif /* WOLFSSL_SHA512 */
 
-
 #ifdef __cplusplus
     } /* extern "C" */
 #endif
diff --git a/wolfssl/wolfcrypt/hmac.h b/wolfssl/wolfcrypt/hmac.h
index 1d4930664..bf7154a64 100644
--- a/wolfssl/wolfcrypt/hmac.h
+++ b/wolfssl/wolfcrypt/hmac.h
@@ -58,7 +58,7 @@
     extern "C" {
 #endif
 #ifndef HAVE_FIPS
-        
+
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
 #endif
@@ -95,7 +95,7 @@ enum {
 /* Select the largest available hash for the buffer size. */
 #if defined(WOLFSSL_SHA512)
     MAX_DIGEST_SIZE = SHA512_DIGEST_SIZE,
-    HMAC_BLOCK_SIZE = SHA512_BLOCK_SIZE
+    HMAC_BLOCK_SIZE = SHA512_BLOCK_SIZE,
 #elif defined(HAVE_BLAKE2)
     MAX_DIGEST_SIZE = BLAKE2B_OUTBYTES,
     HMAC_BLOCK_SIZE = BLAKE2B_BLOCKBYTES,
@@ -110,10 +110,10 @@ enum {
     HMAC_BLOCK_SIZE = SHA224_BLOCK_SIZE
 #elif !defined(NO_SHA)
     MAX_DIGEST_SIZE = SHA_DIGEST_SIZE,
-    HMAC_BLOCK_SIZE = SHA_BLOCK_SIZE
+    HMAC_BLOCK_SIZE = SHA_BLOCK_SIZE,
 #elif !defined(NO_MD5)
     MAX_DIGEST_SIZE = MD5_DIGEST_SIZE,
-    HMAC_BLOCK_SIZE = MD5_BLOCK_SIZE
+    HMAC_BLOCK_SIZE = MD5_BLOCK_SIZE,
 #else
     #error "You have to have some kind of hash if you want to use HMAC."
 #endif
@@ -122,27 +122,27 @@ enum {
 
 /* hash union */
 typedef union {
-    #ifndef NO_MD5
-        Md5 md5;
-    #endif
-    #ifndef NO_SHA
-        Sha sha;
-    #endif
-    #ifdef WOLFSSL_SHA224
-        Sha224 sha224;
-    #endif
-    #ifndef NO_SHA256
-        Sha256 sha256;
-    #endif
-    #ifdef WOLFSSL_SHA384
-        Sha384 sha384;
-    #endif
-    #ifdef WOLFSSL_SHA512
-        Sha512 sha512;
-    #endif
-    #ifdef HAVE_BLAKE2
-        Blake2b blake2b;
-    #endif
+#ifndef NO_MD5
+    Md5 md5;
+#endif
+#ifndef NO_SHA
+    Sha sha;
+#endif
+#ifdef WOLFSSL_SHA224
+    Sha224 sha224;
+#endif
+#ifndef NO_SHA256
+    Sha256 sha256;
+#endif
+#ifdef WOLFSSL_SHA512
+#ifdef WOLFSSL_SHA384
+    Sha384 sha384;
+#endif
+    Sha512 sha512;
+#endif
+#ifdef HAVE_BLAKE2
+    Blake2b blake2b;
+#endif
 } Hash;
 
 /* Hmac digest */
@@ -154,13 +154,14 @@ typedef struct Hmac {
     void*   heap;                 /* heap hint */
     byte    macType;              /* md5 sha or sha256 */
     byte    innerHashKeyed;       /* keyed flag */
+
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
+    byte         keyRaw[HMAC_BLOCK_SIZE];
+    word16       keyLen;          /* hmac key length */
     #ifdef HAVE_CAVIUM
-        word16   keyLen;          /* hmac key length */
-        word16   dataLen;
-        HashType type;            /* hmac key type */
         byte*    data;            /* buffered input data for one call */
+        word16   dataLen;
     #endif /* HAVE_CAVIUM */
 #endif /* WOLFSSL_ASYNC_CRYPT */
 } Hmac;
@@ -172,23 +173,17 @@ WOLFSSL_API int wc_HmacSetKey(Hmac*, int type, const byte* key, word32 keySz);
 WOLFSSL_API int wc_HmacUpdate(Hmac*, const byte*, word32);
 WOLFSSL_API int wc_HmacFinal(Hmac*, byte*);
 WOLFSSL_API int wc_HmacSizeByType(int type);
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_HmacAsyncInit(Hmac*, int);
-    WOLFSSL_API void wc_HmacAsyncFree(Hmac*);
-#endif
-
 
+WOLFSSL_API int wc_HmacInit(Hmac* hmac, void* heap, int devId);
+WOLFSSL_API void wc_HmacFree(Hmac*);
 
 WOLFSSL_API int wolfSSL_GetHmacMaxSize(void);
 
-
 #ifdef HAVE_HKDF
-
-WOLFSSL_API int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
-                    const byte* salt, word32 saltSz,
-                    const byte* info, word32 infoSz,
-                    byte* out, word32 outSz);
-
+    WOLFSSL_API int wc_HKDF(int type, const byte* inKey, word32 inKeySz,
+                        const byte* salt, word32 saltSz,
+                        const byte* info, word32 infoSz,
+                        byte* out, word32 outSz);
 #endif /* HAVE_HKDF */
 
 #ifdef __cplusplus
diff --git a/wolfssl/wolfcrypt/include.am b/wolfssl/wolfcrypt/include.am
index ca33c8b1e..92307a2b8 100644
--- a/wolfssl/wolfcrypt/include.am
+++ b/wolfssl/wolfcrypt/include.am
@@ -69,6 +69,16 @@ noinst_HEADERS+= \
                          wolfssl/wolfcrypt/port/nxp/ksdk_port.h \
                          wolfssl/wolfcrypt/port/atmel/atmel.h
 
-if BUILD_CAVIUM
-noinst_HEADERS+=         wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
+if BUILD_ASYNCCRYPT
+nobase_include_HEADERS+= wolfssl/wolfcrypt/async.h
 endif
+
+if BUILD_CAVIUM
+nobase_include_HEADERS+= wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
+endif
+
+if BUILD_INTEL_QA
+nobase_include_HEADERS+= wolfssl/wolfcrypt/port/intel/quickassist.h
+nobase_include_HEADERS+= wolfssl/wolfcrypt/port/intel/quickassist_mem.h
+endif
+
diff --git a/wolfssl/wolfcrypt/integer.h b/wolfssl/wolfcrypt/integer.h
index 543a832bc..9c7bc01b0 100644
--- a/wolfssl/wolfcrypt/integer.h
+++ b/wolfssl/wolfcrypt/integer.h
@@ -45,6 +45,10 @@
 
 #include 
 
+/* wolf big int and common functions */
+#include 
+
+
 #ifdef WOLFSSL_PUBLIC_MP
     #define MP_API   WOLFSSL_API
 #else
@@ -184,14 +188,20 @@ typedef int           mp_err;
    BITS_PER_DIGIT*2) */
 #define MP_WARRAY  (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
 
-/* the infamous mp_int structure */
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT;
+#endif
+
+/* the mp_int structure */
 typedef struct mp_int {
     int used, alloc, sign;
     mp_digit *dp;
-#ifdef WOLFSSL_ASYNC_CRYPT
-    byte* dpraw; /* Used for hardware crypto */
+
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT raw; /* unsigned binary (big endian) */
 #endif
 } mp_int;
+#define MP_INT_DEFINED
 
 /* callback for mp_prime_random, should fill dst with random bytes and return
    how many read [up to len] */
@@ -242,6 +252,7 @@ extern const char *mp_s_rmap;
 /* 6 functions needed by Rsa */
 MP_API int  mp_init (mp_int * a);
 MP_API void mp_clear (mp_int * a);
+MP_API void mp_free (mp_int * a);
 MP_API void mp_forcezero(mp_int * a);
 MP_API int  mp_unsigned_bin_size(mp_int * a);
 MP_API int  mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c);
diff --git a/wolfssl/wolfcrypt/md5.h b/wolfssl/wolfcrypt/md5.h
index 17783b173..27c690e4f 100644
--- a/wolfssl/wolfcrypt/md5.h
+++ b/wolfssl/wolfcrypt/md5.h
@@ -50,10 +50,15 @@ enum {
 };
 
 #if defined(WOLFSSL_PIC32MZ_HASH)
-#include "port/pic32/pic32mz-crypt.h"
+    #include "port/pic32/pic32mz-crypt.h"
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
 #endif
 
-#ifndef WOLFSSL_TI_HASH
+#ifdef WOLFSSL_TI_HASH
+    #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
+#else
 
 /* MD5 digest */
 typedef struct Md5 {
@@ -61,22 +66,29 @@ typedef struct Md5 {
     word32  loLen;     /* length in bytes   */
     word32  hiLen;     /* length in bytes   */
     word32  buffer[MD5_BLOCK_SIZE  / sizeof(word32)];
-    #if !defined(WOLFSSL_PIC32MZ_HASH)
+#if !defined(WOLFSSL_PIC32MZ_HASH)
     word32  digest[MD5_DIGEST_SIZE / sizeof(word32)];
-    #else
+#else
     word32  digest[PIC32_HASH_SIZE / sizeof(word32)];
-    pic32mz_desc desc ; /* Crypt Engine descriptor */
-    #endif
+    pic32mz_desc desc; /* Crypt Engine descriptor */
+#endif
+    void*   heap;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } Md5;
 
-#else /* WOLFSSL_TI_HASH */
-    #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
-#endif
+#endif /* WOLFSSL_TI_HASH */
+
+WOLFSSL_API int wc_InitMd5(Md5*);
+WOLFSSL_API int wc_InitMd5_ex(Md5*, void*, int);
+WOLFSSL_API int wc_Md5Update(Md5*, const byte*, word32);
+WOLFSSL_API int wc_Md5Final(Md5*, byte*);
+WOLFSSL_API void wc_Md5Free(Md5*);
+
+WOLFSSL_API int  wc_Md5GetHash(Md5*, byte*);
+WOLFSSL_API int  wc_Md5Copy(Md5*, Md5*);
 
-WOLFSSL_API void wc_InitMd5(Md5*);
-WOLFSSL_API void wc_Md5Update(Md5*, const byte*, word32);
-WOLFSSL_API void wc_Md5Final(Md5*, byte*);
-WOLFSSL_API int  wc_Md5Hash(const byte*, word32, byte*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/mem_track.h b/wolfssl/wolfcrypt/mem_track.h
index f24325eaf..ee916d8bc 100644
--- a/wolfssl/wolfcrypt/mem_track.h
+++ b/wolfssl/wolfcrypt/mem_track.h
@@ -34,7 +34,7 @@
  *
  * On startup call:
  * InitMemoryTracker();
- * 
+ *
  * When ready to dump the memory report call:
  * ShowMemoryTracker();
  *
@@ -196,6 +196,7 @@
         return ret;
     }
 
+#ifdef WOLFSSL_TRACK_MEMORY
     STATIC INLINE int InitMemoryTracker(void)
     {
         int ret = wolfSSL_SetAllocators(TrackMalloc, TrackFree, TrackRealloc);
@@ -230,6 +231,7 @@
                                        (unsigned long)ourMemStats.currentBytes);
     #endif
     }
+#endif
 
 #endif /* USE_WOLFSSL_MEMORY */
 
diff --git a/wolfssl/wolfcrypt/memory.h b/wolfssl/wolfcrypt/memory.h
index 96dce8bdd..9ecd9cd92 100644
--- a/wolfssl/wolfcrypt/memory.h
+++ b/wolfssl/wolfcrypt/memory.h
@@ -70,10 +70,14 @@
     #endif /* WOLFSSL_DEBUG_MEMORY */
 #endif /* WOLFSSL_STATIC_MEMORY */
 
-/* Public set function */
-WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb  malloc_function,
-                                    wolfSSL_Free_cb    free_function,
-                                    wolfSSL_Realloc_cb realloc_function);
+/* Public get/set functions */
+WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb,
+                                      wolfSSL_Free_cb,
+                                      wolfSSL_Realloc_cb);
+
+WOLFSSL_API int wolfSSL_GetAllocators(wolfSSL_Malloc_cb*,
+                                      wolfSSL_Free_cb*,
+                                      wolfSSL_Realloc_cb*);
 
 #ifdef WOLFSSL_STATIC_MEMORY
     #define WOLFSSL_STATIC_TIMEOUT 1
@@ -95,7 +99,7 @@ WOLFSSL_API int wolfSSL_SetAllocators(wolfSSL_Malloc_cb  malloc_function,
         #endif
     #endif
     #ifndef WOLFMEM_DIST
-        #define WOLFMEM_DIST    8,4,4,12,4,5,2,1,1
+        #define WOLFMEM_DIST    8,4,4,12,4,5,8,1,1
     #endif
 
     /* flags for loading static memory (one hot bit) */
diff --git a/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h b/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
deleted file mode 100644
index aed338f40..000000000
--- a/wolfssl/wolfcrypt/port/cavium/cavium_nitrox.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* cavium-nitrox.h
- *
- * Copyright (C) 2006-2016 wolfSSL Inc.
- *
- * This file is part of wolfSSL. (formerly known as CyaSSL)
- *
- * wolfSSL is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * wolfSSL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#ifndef _CAVIUM_NITROX_H_
-#define _CAVIUM_NITROX_H_
-
-#ifdef HAVE_CAVIUM
-
-#include 
-
-#ifndef HAVE_CAVIUM_V
-    #include "cavium_sysdep.h"
-#endif
-#include "cavium_common.h"
-#ifndef HAVE_CAVIUM_V
-    #include "cavium_ioctl.h"
-#else
-    #include "cavium_sym_crypto.h"
-    #include "cavium_asym_crypto.h"
-#endif
-#include 
-
-#define CAVIUM_SSL_GRP      0
-#define CAVIUM_DPORT        256
-
-/* Compatibility with older Cavium SDK's */
-#ifndef HAVE_CAVIUM_V
-    typedef int CspHandle;
-    typedef word32 CavReqId;
-
-    #define AES_128 AES_128_BIT
-    #define AES_192 AES_192_BIT
-    #define AES_256 AES_256_BIT
-#else
-    #define CAVIUM_DEV_ID       0
-    #define CAVIUM_BLOCKING     BLOCKING
-    #define CAVIUM_NON_BLOCKING NON_BLOCKING
-    #define CAVIUM_DIRECT       DMA_DIRECT_DIRECT
-    typedef Uint64 CavReqId;
-#endif
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #define CAVIUM_REQ_MODE CAVIUM_NON_BLOCKING
-#else
-    #define CAVIUM_REQ_MODE CAVIUM_BLOCKING
-#endif
-
-
-#ifdef WOLFSSL_ASYNC_CRYPT
-    #define CAVIUM_MAX_PENDING  90
-    #define CAVIUM_MAX_POLL     MAX_TO_POLL
-#endif
-
-
-typedef struct CaviumNitroxDev {
-    CspHandle   devId;                      /* nitrox device id */
-    ContextType type;                       /* Typically CONTEXT_SSL, but also ECC types */
-    Uint64      contextHandle;              /* nitrox context memory handle */
-    CavReqId    reqId;                      /* Current requestId */
-} CaviumNitroxDev;
-
-struct WOLF_EVENT;
-
-
-/* Wrapper API's */
-WOLFSSL_LOCAL int NitroxTranslateResponseCode(int ret);
-WOLFSSL_LOCAL CspHandle NitroxGetDeviceHandle(void);
-WOLFSSL_LOCAL CspHandle NitroxOpenDevice(int dma_mode, int dev_id);
-WOLFSSL_LOCAL int NitroxAllocContext(CaviumNitroxDev* nitrox, CspHandle devId,
-    ContextType type);
-WOLFSSL_LOCAL void NitroxFreeContext(CaviumNitroxDev* nitrox);
-WOLFSSL_LOCAL void NitroxCloseDevice(CspHandle devId);
-
-#if defined(WOLFSSL_ASYNC_CRYPT)
-WOLFSSL_LOCAL int NitroxCheckRequest(CspHandle devId, CavReqId reqId);
-WOLFSSL_LOCAL int NitroxCheckRequests(CspHandle devId,
-    CspMultiRequestStatusBuffer* req_stat_buf);
-#endif /* WOLFSSL_ASYNC_CRYPT */
-
-
-/* Crypto wrappers */
-#ifndef NO_RSA
-    struct RsaKey;
-    WOLFSSL_LOCAL int NitroxRsaExptMod(
-                            const byte* in, word32 inLen,
-                            byte* exponent, word32 expLen,
-                            byte* modulus, word32 modLen,
-                            byte* out, word32* outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaPublicEncrypt(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaPrivateDecrypt(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaSSL_Sign(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-    WOLFSSL_LOCAL int NitroxRsaSSL_Verify(const byte* in, word32 inLen,
-                                byte* out, word32 outLen, struct RsaKey* key);
-#endif /* !NO_RSA */
-
-#ifndef NO_AES
-    struct Aes;
-    WOLFSSL_LOCAL int NitroxAesSetKey(struct Aes* aes, const byte* key,
-                                                word32 length, const byte* iv);
-    #ifdef HAVE_AES_CBC
-        WOLFSSL_LOCAL int NitroxAesCbcEncrypt(struct Aes* aes, byte* out,
-                                                const byte* in, word32 length);
-    #ifdef HAVE_AES_DECRYPT
-        WOLFSSL_LOCAL int NitroxAesCbcDecrypt(struct Aes* aes, byte* out,
-                                                const byte* in, word32 length);
-    #endif /* HAVE_AES_DECRYPT */
-    #endif /* HAVE_AES_CBC */
-#endif /* !NO_AES */
-
-#ifndef NO_RC4
-    struct Arc4;
-    WOLFSSL_LOCAL void NitroxArc4SetKey(struct Arc4* arc4, const byte* key,
-                                                                word32 length);
-    WOLFSSL_LOCAL void NitroxArc4Process(struct Arc4* arc4, byte* out,
-                                                const byte* in, word32 length);
-#endif /* !NO_RC4 */
-
-#ifndef NO_DES3
-    struct Des3;
-    WOLFSSL_LOCAL int NitroxDes3SetKey(struct Des3* des3, const byte* key,
-                                                               const byte* iv);
-    WOLFSSL_LOCAL int NitroxDes3CbcEncrypt(struct Des3* des3, byte* out,
-                                                const byte* in, word32 length);
-    WOLFSSL_LOCAL int NitroxDes3CbcDecrypt(struct Des3* des3, byte* out,
-                                                const byte* in, word32 length);
-#endif /* !NO_DES3 */
-
-#ifndef NO_HMAC
-    struct Hmac;
-    WOLFSSL_LOCAL int NitroxHmacFinal(struct Hmac* hmac, byte* hash);
-    WOLFSSL_LOCAL int NitroxHmacUpdate(struct Hmac* hmac, const byte* msg,
-                                                                word32 length);
-    WOLFSSL_LOCAL int NitroxHmacSetKey(struct Hmac* hmac, int type,
-                                               const byte* key, word32 length);
-#endif /* NO_HMAC */
-
-#if !defined(HAVE_HASHDRBG) && !defined(NO_RC4)
-    WOLFSSL_API void NitroxRngGenerateBlock(WC_RNG* rng, byte* output, word32 sz);
-#endif
-
-
-#endif /* HAVE_CAVIUM */
-
-#endif /* _CAVIUM_NITROX_H_ */
diff --git a/wolfssl/wolfcrypt/port/ti/ti-hash.h b/wolfssl/wolfcrypt/port/ti/ti-hash.h
index c63a2ce20..93311a4e2 100644
--- a/wolfssl/wolfcrypt/port/ti/ti-hash.h
+++ b/wolfssl/wolfcrypt/port/ti/ti-hash.h
@@ -19,47 +19,43 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
- 
+
 #ifndef WOLF_CRYPT_TI_HASH_H
 #define WOLF_CRYPT_TI_HASH_H
 
 #include 
 
 #ifndef WOLFSSL_TI_INITBUFF
-#define WOLFSSL_TI_INITBUFF 64
+    #define WOLFSSL_TI_INITBUFF    64
 #endif
 
-#define WOLFSSL_MAX_HASH_SIZE  64
+#ifndef WOLFSSL_MAX_HASH_SIZE
+    #define WOLFSSL_MAX_HASH_SIZE  64
+#endif
 
 typedef struct {
-    byte   *msg ;
-    word32 used ;
-    word32 len ;
-    byte hash[WOLFSSL_MAX_HASH_SIZE] ;
-} wolfssl_TI_Hash ;
+    byte   *msg;
+    word32 used;
+    word32 len;
+    byte hash[WOLFSSL_MAX_HASH_SIZE];
+} wolfssl_TI_Hash;
 
 
 #ifndef TI_HASH_TEST
-#if !defined(NO_MD5)
-typedef wolfssl_TI_Hash Md5 ;
 
+#if !defined(NO_MD5)
+    typedef wolfssl_TI_Hash Md5;
 #endif
 #if !defined(NO_SHA)
-typedef wolfssl_TI_Hash Sha ;
+    typedef wolfssl_TI_Hash Sha;
 #endif
 #if !defined(NO_SHA256)
-typedef wolfssl_TI_Hash Sha256 ;
+    typedef wolfssl_TI_Hash Sha256;
+#endif
+#if defined(WOLFSSL_SHA224)
+    typedef wolfssl_TI_Hash Sha224;
 #endif
 
-#if defined(HAVE_SHA224)
-typedef wolfssl_TI_Hash Sha224 ;
-#define SHA224_DIGEST_SIZE  28
+#endif /* !TI_HASH_TEST */
 
-WOLFSSL_API int wc_InitSha224(Sha224* sha224) ;
-WOLFSSL_API int wc_Sha224Update(Sha224* sha224, const byte* data, word32 len) ;
-WOLFSSL_API int wc_Sha224Final(Sha224* sha224, byte* hash) ;
-WOLFSSL_API int wc_Sha224Hash(const byte* data, word32 len, byte*hash) ;
-
-#endif
-#endif
 #endif /* WOLF_CRYPT_TI_HASH_H  */
diff --git a/wolfssl/wolfcrypt/random.h b/wolfssl/wolfcrypt/random.h
index 75fc5ebd9..6a6f104e5 100644
--- a/wolfssl/wolfcrypt/random.h
+++ b/wolfssl/wolfcrypt/random.h
@@ -140,7 +140,8 @@ struct WC_RNG {
     byte status;
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
+    int devId;
 #endif
 };
 
@@ -165,7 +166,7 @@ int wc_GenerateSeed(OS_Seed* os, byte* seed, word32 sz);
 
 
 WOLFSSL_API int  wc_InitRng(WC_RNG*);
-WOLFSSL_API int  wc_InitRng_ex(WC_RNG* rng, void* heap);
+WOLFSSL_API int  wc_InitRng_ex(WC_RNG* rng, void* heap, int devId);
 WOLFSSL_API int  wc_RNG_GenerateBlock(WC_RNG*, byte*, word32 sz);
 WOLFSSL_API int  wc_RNG_GenerateByte(WC_RNG*, byte*);
 WOLFSSL_API int  wc_FreeRng(WC_RNG*);
diff --git a/wolfssl/wolfcrypt/rsa.h b/wolfssl/wolfcrypt/rsa.h
index d7f5ccaf9..66c46d109 100644
--- a/wolfssl/wolfcrypt/rsa.h
+++ b/wolfssl/wolfcrypt/rsa.h
@@ -55,6 +55,9 @@
 
 #ifdef WOLFSSL_ASYNC_CRYPT
     #include 
+    #ifdef WOLFSSL_CERT_GEN
+        #include 
+    #endif
 #endif
 
 enum {
@@ -80,18 +83,21 @@ enum {
 /* RSA */
 typedef struct RsaKey {
     mp_int n, e, d, p, q, dP, dQ, u;
-    int   type;                               /* public or private */
     void* heap;                               /* for user memory overrides */
+    byte* data;                               /* temp buffer for async RSA */
+    int   type;                               /* public or private */
     int   state;
-    byte*  tmp;                               /* temp buffer for async RSA */
-    word32 tmpLen;
-    byte   tmpIsAlloc;
+    word32 dataLen;
 #ifdef WC_RSA_BLINDING
     WC_RNG* rng;                              /* for PrivateDecrypt blinding */
 #endif
 #ifdef WOLFSSL_ASYNC_CRYPT
-    AsyncCryptDev asyncDev;
+    WC_ASYNC_DEV asyncDev;
+    #ifdef WOLFSSL_CERT_GEN
+        CertSignCtx certSignCtx; /* context info for cert sign (MakeSignature) */
+    #endif
 #endif /* WOLFSSL_ASYNC_CRYPT */
+    byte   dataIsAlloc;
 } RsaKey;
 #endif /*HAVE_FIPS */
 
@@ -163,11 +169,6 @@ WOLFSSL_API int  wc_RsaFlattenPublicKey(RsaKey*, byte*, word32*, byte*,
     WOLFSSL_API int wc_MakeRsaKey(RsaKey* key, int size, long e, WC_RNG* rng);
 #endif
 
-#ifdef WOLFSSL_ASYNC_CRYPT
-    WOLFSSL_API int  wc_RsaAsyncHandle(RsaKey* key, WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
-    WOLFSSL_API int  wc_RsaAsyncWait(int ret, RsaKey* key);
-#endif
-
 #endif /* HAVE_USER_RSA */
 
 #ifdef __cplusplus
diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h
index 6580338ce..4fc29414a 100644
--- a/wolfssl/wolfcrypt/settings.h
+++ b/wolfssl/wolfcrypt/settings.h
@@ -1455,11 +1455,25 @@ extern void uITRON4_free(void *p) ;
     #undef HAVE_WOLF_EVENT
     #define HAVE_WOLF_EVENT
 
+    #ifdef WOLFSSL_ASYNC_CRYPT_TEST
+        #define WC_ASYNC_DEV_SIZE 320+24
+    #else
+        #define WC_ASYNC_DEV_SIZE 320
+    #endif
+
     #if !defined(HAVE_CAVIUM) && !defined(HAVE_INTEL_QA) && \
         !defined(WOLFSSL_ASYNC_CRYPT_TEST)
         #error No async hardware defined with WOLFSSL_ASYNC_CRYPT!
     #endif
+
+    /* Enable ECC_CACHE_CURVE for ASYNC */
+    #if !defined(ECC_CACHE_CURVE)
+        #define ECC_CACHE_CURVE
+    #endif
 #endif /* WOLFSSL_ASYNC_CRYPT */
+#ifndef WC_ASYNC_DEV_SIZE
+    #define WC_ASYNC_DEV_SIZE 0
+#endif
 
 /* leantls checks */
 #ifdef WOLFSSL_LEANTLS
diff --git a/wolfssl/wolfcrypt/sha.h b/wolfssl/wolfcrypt/sha.h
index 6dbd91b87..5d5d0908f 100644
--- a/wolfssl/wolfcrypt/sha.h
+++ b/wolfssl/wolfcrypt/sha.h
@@ -34,13 +34,21 @@
 
 #ifdef FREESCALE_LTC_SHA
     #include "fsl_ltc.h"
-#endif 
+#endif
 
 #ifdef __cplusplus
     extern "C" {
 #endif
 
 #ifndef HAVE_FIPS /* avoid redefining structs */
+
+#ifdef WOLFSSL_PIC32MZ_HASH
+    #include "port/pic32/pic32mz-crypt.h"
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
 /* in bytes */
 enum {
 #if defined(STM32F2_HASH) || defined(STM32F4_HASH)
@@ -48,16 +56,16 @@ enum {
 #endif
     SHA              =  1,    /* hash type unique */
     SHA_BLOCK_SIZE   = 64,
+#ifdef WOLFSSL_PIC32MZ_HASH
+    SHA_DIGEST_SIZE  = PIC32_HASH_SIZE,
+#else
     SHA_DIGEST_SIZE  = 20,
+#endif
     SHA_PAD_SIZE     = 56
 };
 
-#ifdef WOLFSSL_PIC32MZ_HASH
-#include "port/pic32/pic32mz-crypt.h"
-#endif
 
 #ifndef WOLFSSL_TI_HASH
-      
 /* Sha digest */
 typedef struct Sha {
     #ifdef FREESCALE_LTC_SHA
@@ -67,24 +75,32 @@ typedef struct Sha {
         word32  loLen;     /* length in bytes   */
         word32  hiLen;     /* length in bytes   */
         word32  buffer[SHA_BLOCK_SIZE  / sizeof(word32)];
-        #ifndef WOLFSSL_PIC32MZ_HASH
-            word32  digest[SHA_DIGEST_SIZE / sizeof(word32)];
-        #else
-            word32  digest[PIC32_HASH_SIZE / sizeof(word32)];
-            pic32mz_desc desc; /* Crypt Engine descriptor */
-        #endif
-    #endif /* FREESCALE_LTC_SHA */
+        word32  digest[SHA_DIGEST_SIZE / sizeof(word32)];
+        void*   heap;
+    #ifdef WOLFSSL_PIC32MZ_HASH
+        pic32mz_desc desc; /* Crypt Engine descriptor */
+    #endif
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        WC_ASYNC_DEV asyncDev;
+    #endif /* WOLFSSL_ASYNC_CRYPT */
+#endif /* FREESCALE_LTC_SHA */
 } Sha;
 
-#else /* WOLFSSL_TI_HASH */
+#else
     #include "wolfssl/wolfcrypt/port/ti/ti-hash.h"
-#endif
+#endif /* WOLFSSL_TI_HASH */
+
 
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha(Sha*);
+WOLFSSL_API int wc_InitSha_ex(Sha* sha, void* heap, int devId);
 WOLFSSL_API int wc_ShaUpdate(Sha*, const byte*, word32);
 WOLFSSL_API int wc_ShaFinal(Sha*, byte*);
+WOLFSSL_API void wc_ShaFree(Sha*);
+
+WOLFSSL_API int wc_ShaGetHash(Sha*, byte*);
+WOLFSSL_API int wc_ShaCopy(Sha*, Sha*);
 
 #ifdef __cplusplus
     } /* extern "C" */
diff --git a/wolfssl/wolfcrypt/sha256.h b/wolfssl/wolfcrypt/sha256.h
index 997b0c1e1..4d8ef1f64 100644
--- a/wolfssl/wolfcrypt/sha256.h
+++ b/wolfssl/wolfcrypt/sha256.h
@@ -44,9 +44,13 @@
 #endif
 
 #ifndef HAVE_FIPS /* avoid redefinition of structs */
+
 #ifdef WOLFSSL_PIC32MZ_HASH
     #include "port/pic32/pic32mz-crypt.h"
 #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
 
 /* in bytes */
 enum {
@@ -69,9 +73,13 @@ typedef struct Sha256 {
     word32  buffLen;   /* in bytes          */
     word32  loLen;     /* length in bytes   */
     word32  hiLen;     /* length in bytes   */
-    #ifdef WOLFSSL_PIC32MZ_HASH
-        pic32mz_desc desc ; /* Crypt Engine descriptor */
-    #endif
+    void*   heap;
+#ifdef WOLFSSL_PIC32MZ_HASH
+    pic32mz_desc desc; /* Crypt Engine descriptor */
+#endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 #endif /* FREESCALE_LTC_SHA */
 } Sha256;
 
@@ -82,8 +90,13 @@ typedef struct Sha256 {
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha256(Sha256*);
+WOLFSSL_API int wc_InitSha256_ex(Sha256*, void*, int);
 WOLFSSL_API int wc_Sha256Update(Sha256*, const byte*, word32);
 WOLFSSL_API int wc_Sha256Final(Sha256*, byte*);
+WOLFSSL_API void wc_Sha256Free(Sha256*);
+
+WOLFSSL_API int wc_Sha256GetHash(Sha256*, byte*);
+WOLFSSL_API int wc_Sha256Copy(Sha256* src, Sha256* dst);
 
 #ifdef WOLFSSL_SHA224
 
@@ -100,8 +113,13 @@ typedef Sha256 Sha224;
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha224(Sha224*);
+WOLFSSL_API int wc_InitSha224_ex(Sha224*, void*, int);
 WOLFSSL_API int wc_Sha224Update(Sha224*, const byte*, word32);
 WOLFSSL_API int wc_Sha224Final(Sha224*, byte*);
+WOLFSSL_API void wc_Sha224Free(Sha224*);
+
+WOLFSSL_API int wc_Sha224GetHash(Sha224*, byte*);
+WOLFSSL_API int wc_Sha224Copy(Sha224* src, Sha224* dst);
 
 #endif /* WOLFSSL_SHA224 */
 
diff --git a/wolfssl/wolfcrypt/sha512.h b/wolfssl/wolfcrypt/sha512.h
index 2f53772e9..7fea27e6e 100644
--- a/wolfssl/wolfcrypt/sha512.h
+++ b/wolfssl/wolfcrypt/sha512.h
@@ -42,6 +42,10 @@
 
 #ifndef HAVE_FIPS /* avoid redefinition of structs */
 
+#ifdef WOLFSSL_ASYNC_CRYPT
+    #include 
+#endif
+
 /* in bytes */
 enum {
     SHA512              =   4,   /* hash type unique */
@@ -58,13 +62,22 @@ typedef struct Sha512 {
     word64  hiLen;     /* length in bytes   */
     word64  digest[SHA512_DIGEST_SIZE / sizeof(word64)];
     word64  buffer[SHA512_BLOCK_SIZE  / sizeof(word64)];
+    void*   heap;
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WC_ASYNC_DEV asyncDev;
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } Sha512;
 
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha512(Sha512*);
+WOLFSSL_API int wc_InitSha512_ex(Sha512*, void*, int);
 WOLFSSL_API int wc_Sha512Update(Sha512*, const byte*, word32);
 WOLFSSL_API int wc_Sha512Final(Sha512*, byte*);
+WOLFSSL_API void wc_Sha512Free(Sha512*);
+
+WOLFSSL_API int wc_Sha512GetHash(Sha512*, byte*);
+WOLFSSL_API int wc_Sha512Copy(Sha512* src, Sha512* dst);
 
 #if defined(WOLFSSL_SHA384)
 
@@ -81,8 +94,13 @@ typedef Sha512 Sha384;
 #endif /* HAVE_FIPS */
 
 WOLFSSL_API int wc_InitSha384(Sha384*);
+WOLFSSL_API int wc_InitSha384_ex(Sha384*, void*, int);
 WOLFSSL_API int wc_Sha384Update(Sha384*, const byte*, word32);
 WOLFSSL_API int wc_Sha384Final(Sha384*, byte*);
+WOLFSSL_API void wc_Sha384Free(Sha384*);
+
+WOLFSSL_API int wc_Sha384GetHash(Sha384*, byte*);
+WOLFSSL_API int wc_Sha384Copy(Sha384* src, Sha384* dst);
 
 #endif /* WOLFSSL_SHA384 */
 
diff --git a/wolfssl/wolfcrypt/tfm.h b/wolfssl/wolfcrypt/tfm.h
index 2b9faca38..bc989a159 100644
--- a/wolfssl/wolfcrypt/tfm.h
+++ b/wolfssl/wolfcrypt/tfm.h
@@ -43,6 +43,9 @@
 
 #include 
 
+/* wolf big int and common functions */
+#include 
+
 #ifdef __cplusplus
     extern "C" {
 #endif
@@ -288,16 +291,21 @@
 #define FP_YES        1   /* yes response */
 #define FP_NO         0   /* no response */
 
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT;
+#endif
+
 /* a FP type */
 typedef struct fp_int {
     int      used;
     int      sign;
-#if defined(ALT_ECC_SIZE) || defined(WOLFSSL_ASYNC_CRYPT)
+#if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
     int      size;
 #endif
     fp_digit dp[FP_SIZE];
-#ifdef WOLFSSL_ASYNC_CRYPT
-    byte *dpraw; /* Used for hardware crypto */
+
+#ifdef HAVE_WOLF_BIGINT
+    struct WC_BIGINT raw; /* unsigned binary (big endian) */
 #endif
 } fp_int;
 
@@ -380,6 +388,8 @@ typedef struct fp_int {
 void fp_init(fp_int *a);
 MP_API void fp_zero(fp_int *a);
 MP_API void fp_clear(fp_int *a); /* uses ForceZero to clear sensitive memory */
+MP_API void fp_forcezero (fp_int * a);
+MP_API void fp_free(fp_int* a);
 
 /* zero/even/odd ? */
 #define fp_iszero(a) (((a)->used == 0) ? FP_YES : FP_NO)
@@ -605,6 +615,7 @@ void fp_sqr_comba64(fp_int *a, fp_int *b);
 typedef fp_digit mp_digit;
 typedef fp_word  mp_word;
 typedef fp_int mp_int;
+#define MP_INT_DEFINED
 
 /* Constants */
 #define MP_LT   FP_LT   /* less than    */
@@ -627,8 +638,9 @@ typedef fp_int mp_int;
 #define mp_isneg(a)   fp_isneg(a)
 MP_API int  mp_init (mp_int * a);
 MP_API void mp_clear (mp_int * a);
-#define mp_forcezero(a) fp_clear(a)
-MP_API int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
+MP_API void mp_free (mp_int * a);
+MP_API void mp_forcezero (mp_int * a);
+MP_API int  mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e,
                          mp_int* f);
 
 MP_API int  mp_add (mp_int * a, mp_int * b, mp_int * c);
diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h
old mode 100644
new mode 100755
index 14dd39649..ece3517b2
--- a/wolfssl/wolfcrypt/types.h
+++ b/wolfssl/wolfcrypt/types.h
@@ -140,7 +140,7 @@
     #elif defined(__MWERKS__) && TARGET_CPU_PPC
         #define PPC_INTRINSICS
         #define FAST_ROTATE
-    #elif defined(__GNUC__) && defined(__i386__)
+    #elif defined(__GNUC__)  && (defined(__i386__) || defined(__x86_64__))
         /* GCC does peephole optimizations which should result in using rotate
            instructions  */
         #define FAST_ROTATE
@@ -178,7 +178,19 @@
 		WOLFSSL_API void* XMALLOC(size_t n, void* heap, int type);
 		WOLFSSL_API void* XREALLOC(void *p, size_t n, void* heap, int type);
 		WOLFSSL_API void XFREE(void *p, void* heap, int type);
-	#elif defined(XMALLOC_USER)
+	#elif defined(WOLFSSL_ASYNC_CRYPT) && defined(HAVE_INTEL_QA)
+        #include 
+        #undef USE_WOLFSSL_MEMORY
+        #ifdef WOLFSSL_DEBUG_MEMORY
+            #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t), __func__, __LINE__)
+            #define XFREE(p, h, t)       IntelQaFree((p), (h), (t), __func__, __LINE__)
+            #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t), __func__, __LINE__)
+        #else
+            #define XMALLOC(s, h, t)     IntelQaMalloc((s), (h), (t))
+            #define XFREE(p, h, t)       IntelQaFree((p), (h), (t))
+            #define XREALLOC(p, n, h, t) IntelQaRealloc((p), (n), (h), (t))
+        #endif /* WOLFSSL_DEBUG_MEMORY */
+    #elif defined(XMALLOC_USER)
 	    /* prototypes for user heap override functions */
 	    #include   /* for size_t */
 	    extern void *XMALLOC(size_t n, void* heap, int type);
@@ -222,6 +234,41 @@
         #endif /* WOLFSSL_STATIC_MEMORY */
 	#endif
 
+    /* declare/free variable handling for async */
+    #ifdef WOLFSSL_ASYNC_CRYPT
+        #define DECLARE_VAR(VAR_NAME, VAR_TYPE, VAR_SIZE, HEAP) \
+            VAR_TYPE* VAR_NAME = (VAR_TYPE*)XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT);
+        #define DECLARE_VAR_INIT(VAR_NAME, VAR_TYPE, VAR_SIZE, INIT_VALUE, HEAP) \
+            VAR_TYPE* VAR_NAME = ({ \
+                VAR_TYPE* ptr = XMALLOC(sizeof(VAR_TYPE) * VAR_SIZE, HEAP, DYNAMIC_TYPE_WOLF_BIGINT); \
+                if (ptr && INIT_VALUE) { \
+                    XMEMCPY(ptr, INIT_VALUE, sizeof(VAR_TYPE) * VAR_SIZE); \
+                } \
+                ptr; \
+            })
+        #define DECLARE_ARRAY(VAR_NAME, VAR_TYPE, VAR_ITEMS, VAR_SIZE, HEAP) \
+            VAR_TYPE* VAR_NAME[VAR_ITEMS]; \
+            int idx##VAR_NAME; \
+            for (idx##VAR_NAME=0; idx##VAR_NAME
@@ -284,69 +331,74 @@
 
 	/* memory allocation types for user hints */
 	enum {
-	    DYNAMIC_TYPE_CA           = 1,
-	    DYNAMIC_TYPE_CERT         = 2,
-	    DYNAMIC_TYPE_KEY          = 3,
-	    DYNAMIC_TYPE_FILE         = 4,
-	    DYNAMIC_TYPE_SUBJECT_CN   = 5,
-	    DYNAMIC_TYPE_PUBLIC_KEY   = 6,
-	    DYNAMIC_TYPE_SIGNER       = 7,
-	    DYNAMIC_TYPE_NONE         = 8,
-	    DYNAMIC_TYPE_BIGINT       = 9,
-	    DYNAMIC_TYPE_RSA          = 10,
-	    DYNAMIC_TYPE_METHOD       = 11,
-	    DYNAMIC_TYPE_OUT_BUFFER   = 12,
-	    DYNAMIC_TYPE_IN_BUFFER    = 13,
-	    DYNAMIC_TYPE_INFO         = 14,
-	    DYNAMIC_TYPE_DH           = 15,
-	    DYNAMIC_TYPE_DOMAIN       = 16,
-	    DYNAMIC_TYPE_SSL          = 17,
-	    DYNAMIC_TYPE_CTX          = 18,
-	    DYNAMIC_TYPE_WRITEV       = 19,
-	    DYNAMIC_TYPE_OPENSSL      = 20,
-	    DYNAMIC_TYPE_DSA          = 21,
-	    DYNAMIC_TYPE_CRL          = 22,
-	    DYNAMIC_TYPE_REVOKED      = 23,
-	    DYNAMIC_TYPE_CRL_ENTRY    = 24,
-	    DYNAMIC_TYPE_CERT_MANAGER = 25,
-	    DYNAMIC_TYPE_CRL_MONITOR  = 26,
-	    DYNAMIC_TYPE_OCSP_STATUS  = 27,
-	    DYNAMIC_TYPE_OCSP_ENTRY   = 28,
-	    DYNAMIC_TYPE_ALTNAME      = 29,
-	    DYNAMIC_TYPE_SUITES       = 30,
-	    DYNAMIC_TYPE_CIPHER       = 31,
-	    DYNAMIC_TYPE_RNG          = 32,
-	    DYNAMIC_TYPE_ARRAYS       = 33,
-	    DYNAMIC_TYPE_DTLS_POOL    = 34,
-	    DYNAMIC_TYPE_SOCKADDR     = 35,
-	    DYNAMIC_TYPE_LIBZ         = 36,
-	    DYNAMIC_TYPE_ECC          = 37,
-	    DYNAMIC_TYPE_TMP_BUFFER   = 38,
-	    DYNAMIC_TYPE_DTLS_MSG     = 39,
-	    DYNAMIC_TYPE_ASYNC_TMP    = 40,
-	    DYNAMIC_TYPE_ASYNC_RSA    = 41,
-	    DYNAMIC_TYPE_X509         = 42,
-	    DYNAMIC_TYPE_TLSX         = 43,
-	    DYNAMIC_TYPE_OCSP         = 44,
-	    DYNAMIC_TYPE_SIGNATURE    = 45,
-	    DYNAMIC_TYPE_HASHES       = 46,
-        DYNAMIC_TYPE_SRP          = 47,
-        DYNAMIC_TYPE_COOKIE_PWD   = 48,
-        DYNAMIC_TYPE_USER_CRYPTO  = 49,
-        DYNAMIC_TYPE_OCSP_REQUEST = 50,
-        DYNAMIC_TYPE_X509_EXT     = 51,
-        DYNAMIC_TYPE_X509_STORE   = 52,
-        DYNAMIC_TYPE_X509_CTX     = 53,
-        DYNAMIC_TYPE_URL          = 54,
-        DYNAMIC_TYPE_DTLS_FRAG    = 55,
-        DYNAMIC_TYPE_DTLS_BUFFER  = 56,
-        DYNAMIC_TYPE_SESSION_TICK = 57,
-        DYNAMIC_TYPE_PKCS         = 58,
-        DYNAMIC_TYPE_MUTEX        = 59,
-        DYNAMIC_TYPE_PKCS7        = 60,
+        DYNAMIC_TYPE_CA           = 1,
+        DYNAMIC_TYPE_CERT         = 2,
+        DYNAMIC_TYPE_KEY          = 3,
+        DYNAMIC_TYPE_FILE         = 4,
+        DYNAMIC_TYPE_SUBJECT_CN   = 5,
+        DYNAMIC_TYPE_PUBLIC_KEY   = 6,
+        DYNAMIC_TYPE_SIGNER       = 7,
+        DYNAMIC_TYPE_NONE         = 8,
+        DYNAMIC_TYPE_BIGINT       = 9,
+        DYNAMIC_TYPE_RSA          = 10,
+        DYNAMIC_TYPE_METHOD       = 11,
+        DYNAMIC_TYPE_OUT_BUFFER   = 12,
+        DYNAMIC_TYPE_IN_BUFFER    = 13,
+        DYNAMIC_TYPE_INFO         = 14,
+        DYNAMIC_TYPE_DH           = 15,
+        DYNAMIC_TYPE_DOMAIN       = 16,
+        DYNAMIC_TYPE_SSL          = 17,
+        DYNAMIC_TYPE_CTX          = 18,
+        DYNAMIC_TYPE_WRITEV       = 19,
+        DYNAMIC_TYPE_OPENSSL      = 20,
+        DYNAMIC_TYPE_DSA          = 21,
+        DYNAMIC_TYPE_CRL          = 22,
+        DYNAMIC_TYPE_REVOKED      = 23,
+        DYNAMIC_TYPE_CRL_ENTRY    = 24,
+        DYNAMIC_TYPE_CERT_MANAGER = 25,
+        DYNAMIC_TYPE_CRL_MONITOR  = 26,
+        DYNAMIC_TYPE_OCSP_STATUS  = 27,
+        DYNAMIC_TYPE_OCSP_ENTRY   = 28,
+        DYNAMIC_TYPE_ALTNAME      = 29,
+        DYNAMIC_TYPE_SUITES       = 30,
+        DYNAMIC_TYPE_CIPHER       = 31,
+        DYNAMIC_TYPE_RNG          = 32,
+        DYNAMIC_TYPE_ARRAYS       = 33,
+        DYNAMIC_TYPE_DTLS_POOL    = 34,
+        DYNAMIC_TYPE_SOCKADDR     = 35,
+        DYNAMIC_TYPE_LIBZ         = 36,
+        DYNAMIC_TYPE_ECC          = 37,
+        DYNAMIC_TYPE_TMP_BUFFER   = 38,
+        DYNAMIC_TYPE_DTLS_MSG     = 39,
+        DYNAMIC_TYPE_X509         = 40,
+        DYNAMIC_TYPE_TLSX         = 41,
+        DYNAMIC_TYPE_OCSP         = 42,
+        DYNAMIC_TYPE_SIGNATURE    = 43,
+        DYNAMIC_TYPE_HASHES       = 44,
+        DYNAMIC_TYPE_SRP          = 45,
+        DYNAMIC_TYPE_COOKIE_PWD   = 46,
+        DYNAMIC_TYPE_USER_CRYPTO  = 47,
+        DYNAMIC_TYPE_OCSP_REQUEST = 48,
+        DYNAMIC_TYPE_X509_EXT     = 49,
+        DYNAMIC_TYPE_X509_STORE   = 50,
+        DYNAMIC_TYPE_X509_CTX     = 51,
+        DYNAMIC_TYPE_URL          = 52,
+        DYNAMIC_TYPE_DTLS_FRAG    = 53,
+        DYNAMIC_TYPE_DTLS_BUFFER  = 54,
+        DYNAMIC_TYPE_SESSION_TICK = 55,
+        DYNAMIC_TYPE_PKCS         = 56,
+        DYNAMIC_TYPE_MUTEX        = 57,
+        DYNAMIC_TYPE_PKCS7        = 58,
+        DYNAMIC_TYPE_AES          = 59,
+        DYNAMIC_TYPE_WOLF_BIGINT  = 60,
         DYNAMIC_TYPE_ASN1         = 61,
         DYNAMIC_TYPE_LOG          = 62,
-        DYNAMIC_TYPE_WRITEDUP     = 63
+        DYNAMIC_TYPE_WRITEDUP     = 63,
+        DYNAMIC_TYPE_DH_BUFFER    = 64,
+        DYNAMIC_TYPE_HMAC         = 65,
+        DYNAMIC_TYPE_ASYNC        = 66,
+        DYNAMIC_TYPE_ASYNC_NUMA   = 67,
+        DYNAMIC_TYPE_ASYNC_NUMA64 = 68,
 	};
 
 	/* max error buffer string size */
@@ -397,7 +449,7 @@
 
 
     /* AESNI requires alignment and ARMASM gains some performance from it */
-    #if defined(WOLFSSL_AESNI) || defined(WOLFSSL_ARMASM)
+    #if defined(WOLFSSL_AESNI) || defined(WOLFSSL_ARMASM) || defined(USE_INTEL_SPEEDUP)
         #if !defined(ALIGN16)
             #if defined(__GNUC__)
                 #define ALIGN16 __attribute__ ( (aligned (16)))
@@ -410,6 +462,18 @@
             #endif
         #endif /* !ALIGN16 */
 
+        #if !defined (ALIGN32)
+            #if defined (__GNUC__)
+                #define ALIGN32 __attribute__ ( (aligned (32)))
+            #elif defined(_MSC_VER)
+                /* disable align warning, we want alignment ! */
+                #pragma warning(disable: 4324)
+                #define ALIGN32 __declspec (align (32))
+            #else
+                #define ALIGN32
+            #endif
+        #endif
+
        #if !defined(ALIGN32)
             #if defined(__GNUC__)
                 #define ALIGN32 __attribute__ ( (aligned (32)))
diff --git a/wolfssl/wolfcrypt/wolfevent.h b/wolfssl/wolfcrypt/wolfevent.h
index 5dbf16450..4691356bb 100644
--- a/wolfssl/wolfcrypt/wolfevent.h
+++ b/wolfssl/wolfcrypt/wolfevent.h
@@ -29,6 +29,9 @@
 #ifndef SINGLE_THREADED
     #include 
 #endif
+#ifdef HAVE_CAVIUM
+    #include 
+#endif
 
 typedef struct WOLFSSL WOLFSSL;
 typedef struct WOLF_EVENT WOLF_EVENT;
@@ -38,13 +41,12 @@ typedef unsigned short WOLF_EVENT_FLAG;
 
 typedef enum WOLF_EVENT_TYPE {
     WOLF_EVENT_TYPE_NONE,
-    #ifdef WOLFSSL_ASYNC_CRYPT
-        WOLF_EVENT_TYPE_ASYNC_ANY,
-        WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
-        WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,
-        WOLF_EVENT_TYPE_ASYNC_FIRST = WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
-        WOLF_EVENT_TYPE_ASYNC_LAST = WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,
-    #endif
+#ifdef WOLFSSL_ASYNC_CRYPT
+    WOLF_EVENT_TYPE_ASYNC_WOLFSSL,    /* context is WOLFSSL* */
+    WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,  /* context is WC_ASYNC_DEV */
+    WOLF_EVENT_TYPE_ASYNC_FIRST = WOLF_EVENT_TYPE_ASYNC_WOLFSSL,
+    WOLF_EVENT_TYPE_ASYNC_LAST = WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT,
+#endif /* WOLFSSL_ASYNC_CRYPT */
 } WOLF_EVENT_TYPE;
 
 struct WOLF_EVENT {
@@ -53,11 +55,20 @@ struct WOLF_EVENT {
     WOLF_EVENT*         prev;
 
     void*               context;
+    union {
+        void* ptr;
+#ifdef WOLFSSL_ASYNC_CRYPT
+        struct WC_ASYNC_DEV* async;
+#endif
+    } dev;
 #ifdef HAVE_CAVIUM
-    word64              reqId;
+    CavReqId            reqId;
 #endif
     int                 ret;    /* Async return code */
+    unsigned int        flags;
     WOLF_EVENT_TYPE     type;
+
+    /* event flags */
     WOLF_EVENT_FLAG     pending:1;
     WOLF_EVENT_FLAG     done:1;
     /* Future event flags can go here */
@@ -87,12 +98,16 @@ WOLFSSL_API int wolfEvent_Poll(WOLF_EVENT* event, WOLF_EVENT_FLAG flags);
 WOLFSSL_API int wolfEventQueue_Init(WOLF_EVENT_QUEUE* queue);
 WOLFSSL_API int wolfEventQueue_Push(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
 WOLFSSL_API int wolfEventQueue_Pop(WOLF_EVENT_QUEUE* queue, WOLF_EVENT** event);
-WOLFSSL_API int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
 WOLFSSL_API int wolfEventQueue_Poll(WOLF_EVENT_QUEUE* queue, void* context_filter,
     WOLF_EVENT** events, int maxEvents, WOLF_EVENT_FLAG flags, int* eventCount);
 WOLFSSL_API int wolfEventQueue_Count(WOLF_EVENT_QUEUE* queue);
 WOLFSSL_API void wolfEventQueue_Free(WOLF_EVENT_QUEUE* queue);
 
+/* the queue mutex must be locked prior to calling these */
+WOLFSSL_API int wolfEventQueue_Add(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
+WOLFSSL_API int wolfEventQueue_Remove(WOLF_EVENT_QUEUE* queue, WOLF_EVENT* event);
+
+
 #endif /* HAVE_WOLF_EVENT */
 
 
diff --git a/wolfssl/wolfcrypt/wolfmath.h b/wolfssl/wolfcrypt/wolfmath.h
index e6a348653..e32efc1b2 100644
--- a/wolfssl/wolfcrypt/wolfmath.h
+++ b/wolfssl/wolfcrypt/wolfmath.h
@@ -19,15 +19,43 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  */
 
+#if defined(HAVE_WOLF_BIGINT) && !defined(WOLF_BIGINT_DEFINED)
+    /* raw big integer */
+    typedef struct WC_BIGINT {
+        byte*   buf;
+        word32  len;
+        void*   heap;
+    } WC_BIGINT;
+
+    #define WOLF_BIGINT_DEFINED
+#endif
+
+
+/* only define functions if mp_int has been declared */
+#ifdef MP_INT_DEFINED
+
 #ifndef __WOLFMATH_H__
 #define __WOLFMATH_H__
 
+    /* common math functions */
+    int get_digit_count(mp_int* a);
+    mp_digit get_digit(mp_int* a, int n);
+    int get_rand_digit(WC_RNG* rng, mp_digit* d);
+    int mp_rand(mp_int* a, int digits, WC_RNG* rng);
 
-/* common math functions */
-WOLFSSL_LOCAL int get_digit_count(mp_int* a);
-WOLFSSL_LOCAL mp_digit get_digit(mp_int* a, int n);
-WOLFSSL_LOCAL int get_rand_digit(WC_RNG* rng, mp_digit* d);
-WOLFSSL_LOCAL int mp_rand(mp_int* a, int digits, WC_RNG* rng);
 
+    #ifdef HAVE_WOLF_BIGINT
+        void wc_bigint_init(WC_BIGINT* a);
+        int wc_bigint_alloc(WC_BIGINT* a, word32 sz);
+        int wc_bigint_from_unsigned_bin(WC_BIGINT* a, const byte* in, word32 inlen);
+        int wc_bigint_to_unsigned_bin(WC_BIGINT* a, byte* out, word32* outlen);
+        void wc_bigint_zero(WC_BIGINT* a);
+        void wc_bigint_free(WC_BIGINT* a);
+
+        int wc_mp_to_bigint(mp_int* src, WC_BIGINT* dst);
+        int wc_bigint_to_mp(WC_BIGINT* src, mp_int* dst);
+    #endif /* HAVE_WOLF_BIGINT */
 
 #endif /* __WOLFMATH_H__ */
+
+#endif /* MP_INT_DEFINED */