From 70d7b6e48be6aa20625de41f2c5e56bd42be469a Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Sat, 11 May 2024 02:36:27 -0500 Subject: [PATCH 01/10] add WOLFSSL_AESXTS_STREAM, --enable-aesxts-stream, wc_AesXtsEncryptStart(), wc_AesXtsDecryptStart(), wc_AesXtsEncryptUpdate(), wc_AesXtsDecryptUpdate(), and implement fixes in linuxkm/lkcapi_glue.c to use the streaming API when needed. also added support for 2*192 bit AES-XTS, needed for Linux kernel. --- configure.ac | 25 ++ linuxkm/lkcapi_glue.c | 209 ++++++++++- linuxkm/x86_vector_register_glue.c | 3 +- wolfcrypt/src/aes.c | 585 +++++++++++++++++++++++++++-- wolfssl/wolfcrypt/aes.h | 16 + 5 files changed, 784 insertions(+), 54 deletions(-) diff --git a/configure.ac b/configure.ac index 353110418..3d04ded62 100644 --- a/configure.ac +++ b/configure.ac @@ -939,6 +939,7 @@ then if test "$ENABLED_FIPS" = "no" || test "$HAVE_FIPS_VERSION" -ge 6 || test "$FIPS_VERSION" = "v5-dev"; then test "$enable_aesxts" = "" && enable_aesxts=yes + test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && enable_aesxts_stream=yes test "$enable_aessiv" = "" && enable_aessiv=yes fi @@ -1078,6 +1079,7 @@ then if test "$ENABLED_FIPS" = "no" || test "$HAVE_FIPS_VERSION" -ge 6 || test "$FIPS_VERSION" = "v5-dev"; then test "$enable_aesxts" = "" && enable_aesxts=yes + test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && enable_aesxts_stream=yes test "$enable_aessiv" = "" && enable_aessiv=yes fi @@ -4847,6 +4849,11 @@ AC_ARG_ENABLE([aesxts], [ ENABLED_AESXTS=$enableval ], [ ENABLED_AESXTS=no ] ) +AC_ARG_ENABLE([aesxts-stream], + [AS_HELP_STRING([--enable-aesxts-stream],[Enable wolfSSL AES-XTS support with streaming APIs (default: disabled)])], + [ ENABLED_AESXTS_STREAM=$enableval ], + [ ENABLED_AESXTS_STREAM=$ENABLED_AESXTS ] + ) # legacy old option name, for compatibility: AC_ARG_ENABLE([xts], @@ -5070,6 +5077,11 @@ AS_CASE([$FIPS_VERSION], AS_IF([test "x$ENABLED_AESXTS" = "xyes" && test "x$ENABLED_AESNI" = "xyes"], [AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AES_XTS"]) + AS_IF([test "x$ENABLED_AESXTS_STREAM" = "xno"], + [ENABLED_AESXTS_STREAM="yes"; AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESXTS_STREAM"]) + AS_IF([test "x$ENABLED_AESXTS_STREAM" = "xyes" && test "x$ENABLED_AESNI" = "xyes"], + [AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AESXTS_STREAM"]) + AS_IF([(test "$ENABLED_AESCCM" = "yes" && test "$HAVE_AESCCM_PORT" != "yes") || (test "$ENABLED_AESCTR" = "yes" && test "$HAVE_AESCTR_PORT" != "yes") || (test "$ENABLED_AESGCM" = "yes" && test "$HAVE_AESGCM_PORT" != "yes") || @@ -8360,6 +8372,7 @@ then test "$ENABLED_AESGCM_STREAM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: --enable-aesgcm-stream is required for LKCAPI.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESGCM" ;; 'xts(aes)') test "$ENABLED_AESXTS" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-XTS implementation not enabled.]) + test "$ENABLED_AESXTS_STREAM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: --enable-aesxts-stream is required for LKCAPI.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESXTS" ;; *) AC_MSG_ERROR([Unsupported LKCAPI algorithm "$lkcapi_alg".]) ;; esac @@ -9009,6 +9022,17 @@ then fi fi +if test "$ENABLED_AESXTS_STREAM" != "no" +then + if test "$ENABLED_AESXTS" = "no" + then + AC_MSG_ERROR([AES-XTS streaming enabled but AES-XTS is disabled]) + else + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESXTS_STREAM" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AESXTS_STREAM" + fi +fi + if test "$ENABLED_IOTSAFE" != "no" then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_IOTSAFE" @@ -9769,6 +9793,7 @@ echo " * AES-CTR: $ENABLED_AESCTR" echo " * AES-CFB: $ENABLED_AESCFB" echo " * AES-OFB: $ENABLED_AESOFB" echo " * AES-XTS: $ENABLED_AESXTS" +echo " * AES-XTS streaming: $ENABLED_AESXTS_STREAM" echo " * AES-SIV: $ENABLED_AESSIV" echo " * AES-EAX: $ENABLED_AESEAX" echo " * AES Bitspliced: $ENABLED_AESBS" diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index c38d7d866..e1c1d6492 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -790,6 +790,10 @@ static int gcmAesAead_loaded = 0; (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESXTS)) +#ifndef WOLFSSL_AESGCM_STREAM + #error LKCAPI registration of AES-XTS requires WOLFSSL_AESXTS_STREAM (--enable-aesxts-stream). +#endif + struct km_AesXtsCtx { XtsAes *aesXts; /* allocated in km_AesXtsInitCommon() to assure alignment * for AESNI. @@ -835,6 +839,16 @@ static int km_AesXtsSetKey(struct crypto_skcipher *tfm, const u8 *in_key, int err; struct km_AesXtsCtx * ctx = crypto_skcipher_ctx(tfm); + /* filter bad keysizes here, to avoid console noise from + * CONFIG_CRYPTO_MANAGER_EXTRA_TESTS. + */ + if ((key_len != (AES_128_KEY_SIZE*2)) && + (key_len != (AES_192_KEY_SIZE*2)) && + (key_len != (AES_256_KEY_SIZE*2))) + { + return -EINVAL; + } + err = wc_AesXtsSetKeyNoInit(ctx->aesXts, in_key, key_len, AES_ENCRYPTION_AND_DECRYPTION); @@ -852,7 +866,6 @@ static int km_AesXtsSetKey(struct crypto_skcipher *tfm, const u8 *in_key, static int km_AesXtsEncrypt(struct skcipher_request *req) { int err = 0; - struct crypto_skcipher * tfm = NULL; struct km_AesXtsCtx * ctx = NULL; struct skcipher_walk walk; @@ -861,6 +874,9 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) tfm = crypto_skcipher_reqtfm(req); ctx = crypto_skcipher_ctx(tfm); + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + err = skcipher_walk_virt(&walk, req, false); if (unlikely(err)) { @@ -869,10 +885,9 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) return err; } - while ((nbytes = walk.nbytes) != 0) { + if (walk.nbytes == walk.total) { err = wc_AesXtsEncrypt(ctx->aesXts, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - walk.iv, walk.ivsize); + walk.src.virt.addr, walk.nbytes, walk.iv, walk.ivsize); if (unlikely(err)) { pr_err("%s: wc_AesXtsEncrypt failed: %d\n", @@ -880,12 +895,91 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) return -EINVAL; } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + err = skcipher_walk_done(&walk, 0); + + } else { + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + byte tweak_block[AES_BLOCK_SIZE]; + + if (tail > 0) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + + err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err; + } else { + tail = 0; + } + + err = wc_AesXtsEncryptStart(ctx->aesXts, walk.iv, walk.ivsize, + tweak_block); if (unlikely(err)) { - pr_err("%s: skcipher_walk_done failed: %d\n", + pr_err("%s: wc_AesXtsEncryptStart failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); - return err; + return -EINVAL; + } + + while ((nbytes = walk.nbytes) != 0) { + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + err = wc_AesXtsEncryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + tweak_block); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsEncryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (unlikely(err)) { + pr_err("%s: skcipher_walk_done failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return err; + } + } + + if (unlikely(tail > 0 && !err)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + err = wc_AesXtsEncryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, + tweak_block); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsEncryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, 0); } } @@ -903,6 +997,9 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) tfm = crypto_skcipher_reqtfm(req); ctx = crypto_skcipher_ctx(tfm); + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + err = skcipher_walk_virt(&walk, req, false); if (unlikely(err)) { @@ -911,26 +1008,106 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) return err; } - while ((nbytes = walk.nbytes) != 0) { - err = wc_AesXtsDecrypt(ctx->aesXts, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - walk.iv, walk.ivsize); + if (walk.nbytes == walk.total) { + + err = wc_AesXtsDecrypt(ctx->aesXts, + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv, walk.ivsize); if (unlikely(err)) { - pr_err("%s: wc_AesCbcDecrypt failed: %d\n", + pr_err("%s: wc_AesXtsDecrypt failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); return -EINVAL; } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + err = skcipher_walk_done(&walk, 0); + + } else { + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + byte tweak_block[AES_BLOCK_SIZE]; + + if (unlikely(tail > 0)) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + + err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err; + } else { + tail = 0; + } + + err = wc_AesXtsDecryptStart(ctx->aesXts, walk.iv, walk.ivsize, + tweak_block); if (unlikely(err)) { - pr_err("%s: skcipher_walk_done failed: %d\n", + pr_err("%s: wc_AesXtsDecryptStart failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); - return err; + return -EINVAL; } - } + while ((nbytes = walk.nbytes) != 0) { + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + tweak_block); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (unlikely(err)) { + pr_err("%s: skcipher_walk_done failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return err; + } + } + + if (unlikely(tail > 0 && !err)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, + tweak_block); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, 0); + } + + } return err; } diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c index ea84201c3..8f0ffb4ca 100644 --- a/linuxkm/x86_vector_register_glue.c +++ b/linuxkm/x86_vector_register_glue.c @@ -189,8 +189,7 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) * dependency loop on intelasm builds, we allocate here. * this is not thread-safe and doesn't need to be. */ - int ret = allocate_wolfcrypt_linuxkm_fpu_states(); - if (ret != 0) + if ((! create_p) || (allocate_wolfcrypt_linuxkm_fpu_states() != 0)) #endif { if (_warned_on_null == 0) { diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index f53428299..fdef450fa 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12336,12 +12336,16 @@ int wc_AesXtsSetKeyNoInit(XtsAes* aes, const byte* key, word32 len, int dir) return BAD_FUNC_ARG; } - keySz = len/2; - if (keySz != AES_128_KEY_SIZE && keySz != AES_256_KEY_SIZE) { + if ((len != (AES_128_KEY_SIZE*2)) && + (len != (AES_192_KEY_SIZE*2)) && + (len != (AES_256_KEY_SIZE*2))) + { WOLFSSL_MSG("Unsupported key size"); return WC_KEY_SIZE_E; } + keySz = len/2; + #ifdef HAVE_FIPS if (XMEMCMP(key, key + keySz, keySz) == 0) { WOLFSSL_MSG("FIPS AES-XTS main and tweak keys must differ"); @@ -12590,7 +12594,6 @@ static WARN_UNUSED_RESULT int _AesXtsHelper( } #endif /* HAVE_AES_ECB */ - /* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. * * xaes AES keys to use for block encrypt/decrypt @@ -12605,25 +12608,20 @@ static WARN_UNUSED_RESULT int _AesXtsHelper( static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i) { - int ret = 0; + int ret; word32 blocks = (sz / AES_BLOCK_SIZE); Aes *aes = &xaes->aes; - Aes *tweak = &xaes->tweak; - byte tmp[AES_BLOCK_SIZE]; - - XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES - * key setup passed to encrypt direct*/ - - ret = wc_AesEncryptDirect(tweak, tmp, i); + byte tweak_block[AES_BLOCK_SIZE]; + ret = wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); if (ret != 0) return ret; #ifdef HAVE_AES_ECB /* encrypt all of buffer at once when possible */ if (in != out) { /* can not handle inline */ - XMEMCPY(out, tmp, AES_BLOCK_SIZE); - if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) + XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(&xaes->aes, out, in, sz, AES_ENCRYPTION)) != 0) return ret; } #endif @@ -12639,23 +12637,23 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, byte buf[AES_BLOCK_SIZE]; XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(aes, out, buf); if (ret != 0) return ret; } - xorbuf(out, tmp, AES_BLOCK_SIZE); + xorbuf(out, tweak_block, AES_BLOCK_SIZE); /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tmp[j] >> 7) & 0x01; - tmp[j] = (byte)((tmp[j] << 1) + carry); + tmpC = (tweak_block[j] >> 7) & 0x01; + tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); carry = tmpC; } if (carry) { - tmp[0] ^= GF_XTS; + tweak_block[0] ^= GF_XTS; } in += AES_BLOCK_SIZE; @@ -12684,15 +12682,123 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, XMEMCPY(out, buf2, sz); } - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); if (ret == 0) - xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE); + xorbuf(out - AES_BLOCK_SIZE, tweak_block, AES_BLOCK_SIZE); } return ret; } +#ifdef WOLFSSL_AESXTS_STREAM + +/* streaming AES-XTS. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * i value to use for tweak + * + * returns 0 on success + */ +static int AesXtsEncryptStart_sw(XtsAes* xaes, const byte* i, byte *tweak_block) { + return wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); +} + +/* streaming AES-XTS. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * + * returns 0 on success + */ +/* Software AES - XTS Encrypt */ +static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, + byte *tweak_block) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + Aes *aes = &xaes->aes; + +#if 0 +#ifdef HAVE_AES_ECB + /* encrypt all of buffer at once when possible */ + if (in != out) { /* can not handle inline */ + XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) + return ret; + } +#endif +#endif + + while (blocks > 0) { + word32 j; + byte carry = 0; + +#if 0 && defined(HAVE_AES_ECB) + if (in == out) +#endif + { /* check for if inline */ + byte buf[AES_BLOCK_SIZE]; + + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + ret = wc_AesEncryptDirect(aes, out, buf); + if (ret != 0) + return ret; + } + xorbuf(out, tweak_block, AES_BLOCK_SIZE); + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tweak_block[j] >> 7) & 0x01; + tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); + carry = tmpC; + } + if (carry) { + tweak_block[0] ^= GF_XTS; + } + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + + /* stealing operation of XTS to handle left overs */ + if (sz > 0) { + byte buf[AES_BLOCK_SIZE]; + + XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ + return BUFFER_E; + } + if (in != out) { + XMEMCPY(out, buf, sz); + XMEMCPY(buf, in, sz); + } + else { + byte buf2[AES_BLOCK_SIZE]; + + XMEMCPY(buf2, buf, sz); + XMEMCPY(buf, in, sz); + XMEMCPY(out, buf2, sz); + } + + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + ret = wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); + if (ret == 0) + xorbuf(out - AES_BLOCK_SIZE, tweak_block, AES_BLOCK_SIZE); + } + + return ret; +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + /* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. * * xaes AES keys to use for block encrypt/decrypt @@ -12773,6 +12879,136 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, return ret; } +#ifdef WOLFSSL_AESXTS_STREAM + +int wc_AesXtsEncryptStart(XtsAes* xaes, const byte* i, word32 iSz, + byte *tweak_block) +{ + int ret; + + Aes *aes; + + if ((xaes == NULL) || (tweak_block == NULL)) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + aes = &xaes->aes; + + if (aes->keylen == 0) { + WOLFSSL_MSG("wc_AesXtsEncrypt called with unset encryption key."); + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + { +#if 0 && defined(WOLFSSL_AESNI) + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_encrypt_start_avx1(i, + (const byte*)xaes->tweak.key, + tweak_block, + (int)xaes->tweak.rounds); + ret = 0; + } + else +#endif + { + AES_XTS_encrypt_start_aesni(i, + (const byte*)xaes->tweak.key, + tweak_block, + (int)xaes->tweak.rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* 0 && defined(WOLFSSL_AESNI) */ + { + ret = AesXtsEncryptStart_sw(xaes, i, tweak_block); + } + } + + return ret; +} + +/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, + byte *tweak_block) +{ + int ret; + +#if 0 && defined(WOLFSSL_AESNI) + Aes *aes; +#endif + + if (xaes == NULL || out == NULL || in == NULL || tweak_block == NULL) { + return BAD_FUNC_ARG; + } + +#if 0 && defined(WOLFSSL_AESNI) + aes = &xaes->aes; +#endif + + if (sz < AES_BLOCK_SIZE) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + { +#if 0 && defined(WOLFSSL_AESNI) + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_encrypt_update_avx1(in, out, sz, + (const byte*)aes->key, + tweak_block, + (int)aes->rounds); + ret = 0; + } + else +#endif + { + AES_XTS_encrypt_update_aesni(in, out, sz, + (const byte*)aes->key, + tweak_block, + (int)aes->rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* 0 && defined(WOLFSSL_AESNI) */ + { + ret = AesXtsEncryptUpdate_sw(xaes, out, in, sz, tweak_block); + } + } + + return ret; +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + /* Same process as encryption but use aes_decrypt key. * * xaes AES keys to use for block encrypt/decrypt @@ -12794,16 +13030,12 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #else Aes *aes = &xaes->aes; #endif - Aes *tweak = &xaes->tweak; word32 j; byte carry = 0; - byte tmp[AES_BLOCK_SIZE]; + byte tweak_block[AES_BLOCK_SIZE]; byte stl = (sz % AES_BLOCK_SIZE); - XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES - * key setup passed to decrypt direct*/ - - ret = wc_AesEncryptDirect(tweak, tmp, i); + ret = wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); if (ret != 0) return ret; @@ -12816,7 +13048,7 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef HAVE_AES_ECB /* decrypt all of buffer at once when possible */ if (in != out) { /* can not handle inline */ - XMEMCPY(out, tmp, AES_BLOCK_SIZE); + XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) return ret; } @@ -12830,23 +13062,23 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, byte buf[AES_BLOCK_SIZE]; XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); ret = wc_AesDecryptDirect(aes, out, buf); if (ret != 0) return ret; } - xorbuf(out, tmp, AES_BLOCK_SIZE); + xorbuf(out, tweak_block, AES_BLOCK_SIZE); /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tmp[j] >> 7) & 0x01; - tmp[j] = (byte)((tmp[j] << 1) + carry); + tmpC = (tweak_block[j] >> 7) & 0x01; + tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); carry = tmpC; } if (carry) { - tmp[0] ^= GF_XTS; + tweak_block[0] ^= GF_XTS; } carry = 0; @@ -12865,8 +13097,8 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tmp[j] >> 7) & 0x01; - tmp2[j] = (byte)((tmp[j] << 1) + carry); + tmpC = (tweak_block[j] >> 7) & 0x01; + tmp2[j] = (byte)((tweak_block[j] << 1) + carry); carry = tmpC; } if (carry) { @@ -12894,17 +13126,152 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, XMEMCPY(buf, in, sz); XMEMCPY(out, tmp2, sz); - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); ret = wc_AesDecryptDirect(aes, tmp2, buf); if (ret != 0) return ret; - xorbuf(tmp2, tmp, AES_BLOCK_SIZE); + xorbuf(tmp2, tweak_block, AES_BLOCK_SIZE); XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); } return ret; } +#ifdef WOLFSSL_AESXTS_STREAM + +static int AesXtsDecryptStart_sw(XtsAes* xaes, const byte* i, + byte *tweak_block) +{ + return wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); +} + +/* Same process as encryption but use aes_decrypt key. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * + * returns 0 on success + */ +/* Software AES - XTS Decrypt */ +static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, byte *tweak_block) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); +#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + Aes *aes = &xaes->aes_decrypt; +#else + Aes *aes = &xaes->aes; +#endif + word32 j; + byte carry = 0; + byte stl = (sz % AES_BLOCK_SIZE); + + /* if Stealing then break out of loop one block early to handle special + * case */ + if (stl > 0) { + blocks--; + } + +#if 0 +#ifdef HAVE_AES_ECB + /* decrypt all of buffer at once when possible */ + if (in != out) { /* can not handle inline */ + XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) + return ret; + } +#endif +#endif /* 0 */ + + while (blocks > 0) { +#if 0 && defined(HAVE_AES_ECB) + if (in == out) +#endif + { /* check for if inline */ + byte buf[AES_BLOCK_SIZE]; + + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + ret = wc_AesDecryptDirect(aes, out, buf); + if (ret != 0) + return ret; + } + xorbuf(out, tweak_block, AES_BLOCK_SIZE); + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tweak_block[j] >> 7) & 0x01; + tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); + carry = tmpC; + } + if (carry) { + tweak_block[0] ^= GF_XTS; + } + carry = 0; + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + + /* stealing operation of XTS to handle left overs */ + if (sz >= AES_BLOCK_SIZE) { + byte buf[AES_BLOCK_SIZE]; + byte tmp2[AES_BLOCK_SIZE]; + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tweak_block[j] >> 7) & 0x01; + tmp2[j] = (byte)((tweak_block[j] << 1) + carry); + carry = tmpC; + } + if (carry) { + tmp2[0] ^= GF_XTS; + } + + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp2, AES_BLOCK_SIZE); + ret = wc_AesDecryptDirect(aes, out, buf); + if (ret != 0) + return ret; + xorbuf(out, tmp2, AES_BLOCK_SIZE); + + /* tmp2 holds partial | last */ + XMEMCPY(tmp2, out, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + + /* Make buffer with end of cipher text | last */ + XMEMCPY(buf, tmp2, AES_BLOCK_SIZE); + if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ + return BUFFER_E; + } + XMEMCPY(buf, in, sz); + XMEMCPY(out, tmp2, sz); + + xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + ret = wc_AesDecryptDirect(aes, tmp2, buf); + if (ret != 0) + return ret; + xorbuf(tmp2, tweak_block, AES_BLOCK_SIZE); + XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); + } + + return ret; +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + /* Same process as encryption but Aes key is AES_DECRYPTION type. * * xaes AES keys to use for block encrypt/decrypt @@ -12987,6 +13354,152 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, return ret; } } + +#ifdef WOLFSSL_AESXTS_STREAM + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsDecryptStart(XtsAes* xaes, const byte* i, word32 iSz, + byte *tweak_block) +{ + int ret; + Aes *aes; + + if (xaes == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + aes = &xaes->aes_decrypt; +#else + aes = &xaes->aes; +#endif + + if (aes->keylen == 0) { + WOLFSSL_MSG("wc_AesXtsDecrypt called with unset decryption key."); + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + { +#if 0 && defined(WOLFSSL_AESNI) + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_decrypt_start_avx1(i, + (const byte*)xaes->tweak.key, + tweak_block, + (int)xaes->tweak.rounds); + ret = 0; + } + else +#endif + { + AES_XTS_decrypt_start_aesni(i, + (const byte*)xaes->tweak.key, + tweak_block, + (int)xaes->tweak.rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* 0 && defined(WOLFSSL_AESNI) */ + { + ret = AesXtsDecryptStart_sw(xaes, i, tweak_block); + } + + } + + return ret; +} + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, + byte *tweak_block) +{ + int ret; +#if 0 && defined(WOLFSSL_AESNI) + Aes *aes; +#endif + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + +#if 0 && defined(WOLFSSL_AESNI) +#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + aes = &xaes->aes_decrypt; +#else + aes = &xaes->aes; +#endif +#endif + + if (sz < AES_BLOCK_SIZE) { + WOLFSSL_MSG("Cipher text input too small for decryption"); + return BAD_FUNC_ARG; + } + + { +#if 0 && defined(WOLFSSL_AESNI) + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_decrypt_update_avx1(in, out, sz, + (const byte*)aes->key, + tweak_block, + (int)aes->rounds); + ret = 0; + } + else +#endif + { + AES_XTS_decrypt_update_aesni(in, out, sz, + (const byte*)aes->key, + tweak_block, + (int)aes->rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* 0 && defined(WOLFSSL_AESNI) */ + { + ret = AesXtsDecryptUpdate_sw(xaes, out, in, sz, tweak_block); + } + } + + return ret; +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + #endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_HW_CRYPTO */ /* Same as wc_AesXtsEncryptSector but the sector gets incremented by one every diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index 81d135b0a..a7efaa581 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -669,6 +669,22 @@ WOLFSSL_API int wc_AesXtsDecryptConsecutiveSectors(XtsAes* aes, byte* out, const byte* in, word32 sz, word64 sector, word32 sectorSz); +#ifdef WOLFSSL_AESXTS_STREAM + +WOLFSSL_API int wc_AesXtsEncryptStart(XtsAes* aes, const byte* i, word32 iSz, + byte *tweak_block); + +WOLFSSL_API int wc_AesXtsDecryptStart(XtsAes* aes, const byte* i, word32 iSz, + byte *tweak_block); + +WOLFSSL_API int wc_AesXtsEncryptUpdate(XtsAes* aes, byte* out, + const byte* in, word32 sz, byte *tweak_block); + +WOLFSSL_API int wc_AesXtsDecryptUpdate(XtsAes* aes, byte* out, + const byte* in, word32 sz, byte *tweak_block); + +#endif /* WOLFSSL_AESXTS_STREAM */ + WOLFSSL_API int wc_AesXtsFree(XtsAes* aes); #endif From 3ad5ec4e0ad6b7bcef39b3d9427e3a28068dac36 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Sun, 12 May 2024 17:17:54 -0500 Subject: [PATCH 02/10] make --enable-linuxkm-lkcapi-register require --enable-experimental, except for the known-good --enable-linuxkm-lkcapi-register="xts(aes)". --- configure.ac | 6 +++++- linuxkm/lkcapi_glue.c | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 3d04ded62..bd9acdec8 100644 --- a/configure.ac +++ b/configure.ac @@ -8363,13 +8363,17 @@ then for lkcapi_alg in $(echo "$ENABLED_LINUXKM_LKCAPI_REGISTER" | tr ',' ' ') do case "$lkcapi_alg" in - all) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_ALL" ;; + all) test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) + AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_ALL" ;; 'cbc(aes)') test "$ENABLED_AESCBC" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-CBC implementation not enabled.]) + test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESCBC" ;; 'cfb(aes)') test "$ENABLED_AESCFB" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-CFB implementation not enabled.]) + test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESCFB" ;; 'gcm(aes)') test "$ENABLED_AESGCM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-GCM implementation not enabled.]) test "$ENABLED_AESGCM_STREAM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: --enable-aesgcm-stream is required for LKCAPI.]) + test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESGCM" ;; 'xts(aes)') test "$ENABLED_AESXTS" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-XTS implementation not enabled.]) test "$ENABLED_AESXTS_STREAM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: --enable-aesxts-stream is required for LKCAPI.]) diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index e1c1d6492..3c9d58c4f 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -75,16 +75,25 @@ #if defined(HAVE_AES_CBC) && \ (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESCBC)) +#ifndef WOLFSSL_EXPERIMENTAL_SETTINGS + #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS +#endif static int linuxkm_test_aescbc(void); #endif #if defined(WOLFSSL_AES_CFB) && \ (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESCFB)) +#ifndef WOLFSSL_EXPERIMENTAL_SETTINGS + #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS +#endif static int linuxkm_test_aescfb(void); #endif #if defined(HAVE_AESGCM) && \ (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESGCM)) +#ifndef WOLFSSL_EXPERIMENTAL_SETTINGS + #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS +#endif static int linuxkm_test_aesgcm(void); #endif #if defined(WOLFSSL_AES_XTS) && \ From 9e06524c6f8b51bc4726e945ba8c8e7f20ae5ae0 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Mon, 13 May 2024 16:11:37 -0500 Subject: [PATCH 03/10] wolfcrypt/src/aes.c: add prototypes and linkages for AES_XTS_{encrypt,decrypt}_{start,update}_{avx1,aesni}. --- wolfcrypt/src/aes.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index fdef450fa..3c912ce25 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12529,12 +12529,28 @@ void AES_XTS_encrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* i, const unsigned char* key, const unsigned char* key2, int nr) XASM_LINK("AES_XTS_encrypt_aesni"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_encrypt_start_aesni(const unsigned char* i, const unsigned char* tweak_key, + unsigned char *tweak_block, int tweak_nr) + XASM_LINK("AES_XTS_encrypt_start_aesni"); +void AES_XTS_encrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *tweak_block, int nr) + XASM_LINK("AES_XTS_encrypt_update_aesni"); +#endif #ifdef HAVE_INTEL_AVX1 void AES_XTS_encrypt_avx1(const unsigned char *in, unsigned char *out, word32 sz, const unsigned char* i, const unsigned char* key, const unsigned char* key2, int nr) XASM_LINK("AES_XTS_encrypt_avx1"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_encrypt_start_avx1(const unsigned char* i, const unsigned char* tweak_key, + unsigned char *tweak_block, int tweak_nr) + XASM_LINK("AES_XTS_encrypt_start_avx1"); +void AES_XTS_encrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *tweak_block, int nr) + XASM_LINK("AES_XTS_encrypt_update_avx1"); +#endif #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_AES_DECRYPT @@ -12542,12 +12558,28 @@ void AES_XTS_decrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* i, const unsigned char* key, const unsigned char* key2, int nr) XASM_LINK("AES_XTS_decrypt_aesni"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_decrypt_start_aesni(const unsigned char* i, const unsigned char* tweak_key, + unsigned char *tweak_block, int tweak_nr) + XASM_LINK("AES_XTS_decrypt_start_aesni"); +void AES_XTS_decrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *tweak_block, int nr) + XASM_LINK("AES_XTS_decrypt_update_aesni"); +#endif #ifdef HAVE_INTEL_AVX1 void AES_XTS_decrypt_avx1(const unsigned char *in, unsigned char *out, word32 sz, const unsigned char* i, const unsigned char* key, const unsigned char* key2, int nr) XASM_LINK("AES_XTS_decrypt_avx1"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_decrypt_start_avx1(const unsigned char* i, const unsigned char* tweak_key, + unsigned char *tweak_block, int tweak_nr) + XASM_LINK("AES_XTS_decrypt_start_avx1"); +void AES_XTS_decrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *tweak_block, int nr) + XASM_LINK("AES_XTS_decrypt_update_avx1"); +#endif #endif /* HAVE_INTEL_AVX1 */ #endif /* HAVE_AES_DECRYPT */ From f874d8753d0b62715b1ac199ce5043191f967a95 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Mon, 13 May 2024 23:36:22 -0500 Subject: [PATCH 04/10] AES-XTS-streaming: refactor API to eliminate caller-supplied tweak_block. instead, caller-supplied iv is used as a readwrite buffer. --- linuxkm/lkcapi_glue.c | 16 ++-- wolfcrypt/src/aes.c | 170 ++++++++++++++++++++-------------------- wolfssl/wolfcrypt/aes.h | 10 +-- 3 files changed, 93 insertions(+), 103 deletions(-) diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index 3c9d58c4f..b9711560d 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -909,7 +909,6 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) } else { int tail = req->cryptlen % AES_BLOCK_SIZE; struct skcipher_request subreq; - byte tweak_block[AES_BLOCK_SIZE]; if (tail > 0) { int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; @@ -931,8 +930,7 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) tail = 0; } - err = wc_AesXtsEncryptStart(ctx->aesXts, walk.iv, walk.ivsize, - tweak_block); + err = wc_AesXtsEncryptStart(ctx->aesXts, walk.iv, walk.ivsize); if (unlikely(err)) { pr_err("%s: wc_AesXtsEncryptStart failed: %d\n", @@ -946,7 +944,7 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) err = wc_AesXtsEncryptUpdate(ctx->aesXts, walk.dst.virt.addr, walk.src.virt.addr, nbytes, - tweak_block); + walk.iv); if (unlikely(err)) { pr_err("%s: wc_AesXtsEncryptUpdate failed: %d\n", @@ -980,7 +978,7 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) err = wc_AesXtsEncryptUpdate(ctx->aesXts, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, - tweak_block); + walk.iv); if (unlikely(err)) { pr_err("%s: wc_AesXtsEncryptUpdate failed: %d\n", @@ -1034,7 +1032,6 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) } else { int tail = req->cryptlen % AES_BLOCK_SIZE; struct skcipher_request subreq; - byte tweak_block[AES_BLOCK_SIZE]; if (unlikely(tail > 0)) { int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; @@ -1056,8 +1053,7 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) tail = 0; } - err = wc_AesXtsDecryptStart(ctx->aesXts, walk.iv, walk.ivsize, - tweak_block); + err = wc_AesXtsDecryptStart(ctx->aesXts, walk.iv, walk.ivsize); if (unlikely(err)) { pr_err("%s: wc_AesXtsDecryptStart failed: %d\n", @@ -1071,7 +1067,7 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, walk.src.virt.addr, nbytes, - tweak_block); + walk.iv); if (unlikely(err)) { pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", @@ -1105,7 +1101,7 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, - tweak_block); + walk.iv); if (unlikely(err)) { pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 3c912ce25..cce13b1a8 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12530,25 +12530,25 @@ void AES_XTS_encrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* key2, int nr) XASM_LINK("AES_XTS_encrypt_aesni"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_encrypt_start_aesni(const unsigned char* i, const unsigned char* tweak_key, - unsigned char *tweak_block, int tweak_nr) +void AES_XTS_encrypt_start_aesni(unsigned char* i, const unsigned char* tweak_key, + int tweak_nr) XASM_LINK("AES_XTS_encrypt_start_aesni"); void AES_XTS_encrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, - const unsigned char* key, unsigned char *tweak_block, int nr) + const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_encrypt_update_aesni"); #endif #ifdef HAVE_INTEL_AVX1 void AES_XTS_encrypt_avx1(const unsigned char *in, unsigned char *out, - word32 sz, const unsigned char* i, - const unsigned char* key, const unsigned char* key2, - int nr) - XASM_LINK("AES_XTS_encrypt_avx1"); + word32 sz, const unsigned char* i, + const unsigned char* key, const unsigned char* key2, + int nr) + XASM_LINK("AES_XTS_encrypt_avx1"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_encrypt_start_avx1(const unsigned char* i, const unsigned char* tweak_key, - unsigned char *tweak_block, int tweak_nr) +void AES_XTS_encrypt_start_avx1(unsigned char* i, const unsigned char* tweak_key, + int tweak_nr) XASM_LINK("AES_XTS_encrypt_start_avx1"); void AES_XTS_encrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, - const unsigned char* key, unsigned char *tweak_block, int nr) + const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_encrypt_update_avx1"); #endif #endif /* HAVE_INTEL_AVX1 */ @@ -12559,25 +12559,25 @@ void AES_XTS_decrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* key2, int nr) XASM_LINK("AES_XTS_decrypt_aesni"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_decrypt_start_aesni(const unsigned char* i, const unsigned char* tweak_key, - unsigned char *tweak_block, int tweak_nr) +void AES_XTS_decrypt_start_aesni(unsigned char* i, const unsigned char* tweak_key, + int tweak_nr) XASM_LINK("AES_XTS_decrypt_start_aesni"); void AES_XTS_decrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, - const unsigned char* key, unsigned char *tweak_block, int nr) + const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_decrypt_update_aesni"); #endif #ifdef HAVE_INTEL_AVX1 void AES_XTS_decrypt_avx1(const unsigned char *in, unsigned char *out, - word32 sz, const unsigned char* i, - const unsigned char* key, const unsigned char* key2, - int nr) - XASM_LINK("AES_XTS_decrypt_avx1"); + word32 sz, const unsigned char* i, + const unsigned char* key, const unsigned char* key2, + int nr) + XASM_LINK("AES_XTS_decrypt_avx1"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_decrypt_start_avx1(const unsigned char* i, const unsigned char* tweak_key, - unsigned char *tweak_block, int tweak_nr) +void AES_XTS_decrypt_start_avx1(unsigned char* i, const unsigned char* tweak_key, + int tweak_nr) XASM_LINK("AES_XTS_decrypt_start_avx1"); void AES_XTS_decrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, - const unsigned char* key, unsigned char *tweak_block, int nr) + const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_decrypt_update_avx1"); #endif #endif /* HAVE_INTEL_AVX1 */ @@ -12725,18 +12725,21 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef WOLFSSL_AESXTS_STREAM -/* streaming AES-XTS. (XTS) XEX encryption with Tweak and cipher text Stealing. +/* Block-streaming AES-XTS tweak setup. * * xaes AES keys to use for block encrypt/decrypt - * i value to use for tweak + * i readwrite value to use for tweak * * returns 0 on success */ -static int AesXtsEncryptStart_sw(XtsAes* xaes, const byte* i, byte *tweak_block) { - return wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); +static int AesXtsEncryptStart_sw(XtsAes* xaes, byte* i) { + return wc_AesEncryptDirect(&xaes->tweak, i, i); } -/* streaming AES-XTS. (XTS) XEX encryption with Tweak and cipher text Stealing. +/* Block-streaming AES-XTS. + * + * Supply block-aligned input data with successive calls. Final call need not + * be block aligned. * * xaes AES keys to use for block encrypt/decrypt * out output buffer to hold cipher text @@ -12746,22 +12749,22 @@ static int AesXtsEncryptStart_sw(XtsAes* xaes, const byte* i, byte *tweak_block) * returns 0 on success */ /* Software AES - XTS Encrypt */ -static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, - byte *tweak_block) +static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, + byte *i) { int ret = 0; word32 blocks = (sz / AES_BLOCK_SIZE); Aes *aes = &xaes->aes; -#if 0 -#ifdef HAVE_AES_ECB +#if 0 && defined(HAVE_AES_ECB) /* encrypt all of buffer at once when possible */ - if (in != out) { /* can not handle inline */ - XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); + if ((in != out) && ((sz & (AES_BLOCK_SIZE - 1)) == 0)) { /* can not handle inline */ + XMEMCPY(out, i, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) return ret; + XMEMCPY(i, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); } -#endif #endif while (blocks > 0) { @@ -12769,29 +12772,29 @@ static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, word3 byte carry = 0; #if 0 && defined(HAVE_AES_ECB) - if (in == out) + if ((in == out) || ((sz & (AES_BLOCK_SIZE - 1)) != 0)) #endif { /* check for if inline */ byte buf[AES_BLOCK_SIZE]; XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(aes, out, buf); if (ret != 0) return ret; } - xorbuf(out, tweak_block, AES_BLOCK_SIZE); + xorbuf(out, i, AES_BLOCK_SIZE); /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tweak_block[j] >> 7) & 0x01; - tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); + tmpC = (i[j] >> 7) & 0x01; + i[j] = (byte)((i[j] << 1) + carry); carry = tmpC; } if (carry) { - tweak_block[0] ^= GF_XTS; + i[0] ^= GF_XTS; } in += AES_BLOCK_SIZE; @@ -12820,10 +12823,10 @@ static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, word3 XMEMCPY(out, buf2, sz); } - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); if (ret == 0) - xorbuf(out - AES_BLOCK_SIZE, tweak_block, AES_BLOCK_SIZE); + xorbuf(out - AES_BLOCK_SIZE, i, AES_BLOCK_SIZE); } return ret; @@ -12913,14 +12916,13 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef WOLFSSL_AESXTS_STREAM -int wc_AesXtsEncryptStart(XtsAes* xaes, const byte* i, word32 iSz, - byte *tweak_block) +int wc_AesXtsEncryptStart(XtsAes* xaes, byte* i, word32 iSz) { int ret; Aes *aes; - if ((xaes == NULL) || (tweak_block == NULL)) { + if ((xaes == NULL) || (i == NULL)) { return BAD_FUNC_ARG; } @@ -12947,7 +12949,6 @@ int wc_AesXtsEncryptStart(XtsAes* xaes, const byte* i, word32 iSz, if (IS_INTEL_AVX1(intel_flags)) { AES_XTS_encrypt_start_avx1(i, (const byte*)xaes->tweak.key, - tweak_block, (int)xaes->tweak.rounds); ret = 0; } @@ -12956,7 +12957,6 @@ int wc_AesXtsEncryptStart(XtsAes* xaes, const byte* i, word32 iSz, { AES_XTS_encrypt_start_aesni(i, (const byte*)xaes->tweak.key, - tweak_block, (int)xaes->tweak.rounds); ret = 0; } @@ -12965,7 +12965,7 @@ int wc_AesXtsEncryptStart(XtsAes* xaes, const byte* i, word32 iSz, else #endif /* 0 && defined(WOLFSSL_AESNI) */ { - ret = AesXtsEncryptStart_sw(xaes, i, tweak_block); + ret = AesXtsEncryptStart_sw(xaes, i); } } @@ -12985,7 +12985,7 @@ int wc_AesXtsEncryptStart(XtsAes* xaes, const byte* i, word32 iSz, * returns 0 on success */ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, - byte *tweak_block) + byte *i) { int ret; @@ -12993,7 +12993,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, Aes *aes; #endif - if (xaes == NULL || out == NULL || in == NULL || tweak_block == NULL) { + if (xaes == NULL || out == NULL || in == NULL || i == NULL) { return BAD_FUNC_ARG; } @@ -13014,7 +13014,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, if (IS_INTEL_AVX1(intel_flags)) { AES_XTS_encrypt_update_avx1(in, out, sz, (const byte*)aes->key, - tweak_block, + i, (int)aes->rounds); ret = 0; } @@ -13023,7 +13023,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, { AES_XTS_encrypt_update_aesni(in, out, sz, (const byte*)aes->key, - tweak_block, + i, (int)aes->rounds); ret = 0; } @@ -13032,7 +13032,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, else #endif /* 0 && defined(WOLFSSL_AESNI) */ { - ret = AesXtsEncryptUpdate_sw(xaes, out, in, sz, tweak_block); + ret = AesXtsEncryptUpdate_sw(xaes, out, in, sz, i); } } @@ -13171,13 +13171,17 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef WOLFSSL_AESXTS_STREAM -static int AesXtsDecryptStart_sw(XtsAes* xaes, const byte* i, - byte *tweak_block) +static int AesXtsDecryptStart_sw(XtsAes* xaes, byte* i) { - return wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); + return wc_AesEncryptDirect(&xaes->tweak, i, i); } -/* Same process as encryption but use aes_decrypt key. +/* Block-streaming AES-XTS. + * + * Same process as encryption but use decrypt key. + * + * Supply block-aligned input data with successive calls. Final call need not + * be block aligned. * * xaes AES keys to use for block encrypt/decrypt * out output buffer to hold plain text @@ -13189,7 +13193,7 @@ static int AesXtsDecryptStart_sw(XtsAes* xaes, const byte* i, */ /* Software AES - XTS Decrypt */ static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, - word32 sz, byte *tweak_block) + word32 sz, byte *i) { int ret = 0; word32 blocks = (sz / AES_BLOCK_SIZE); @@ -13208,42 +13212,41 @@ static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, blocks--; } -#if 0 -#ifdef HAVE_AES_ECB +#if 0 && defined(HAVE_AES_ECB) /* decrypt all of buffer at once when possible */ - if (in != out) { /* can not handle inline */ - XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); + if ((in != out) && ((sz & (AES_BLOCK_SIZE - 1)) == 0)) { /* can not handle inline */ + XMEMCPY(out, i, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) return ret; + XMEMCPY(i, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); } #endif -#endif /* 0 */ while (blocks > 0) { #if 0 && defined(HAVE_AES_ECB) - if (in == out) + if ((in == out) || ((sz & (AES_BLOCK_SIZE - 1)) != 0)) #endif { /* check for if inline */ byte buf[AES_BLOCK_SIZE]; XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesDecryptDirect(aes, out, buf); if (ret != 0) return ret; } - xorbuf(out, tweak_block, AES_BLOCK_SIZE); + xorbuf(out, i, AES_BLOCK_SIZE); /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tweak_block[j] >> 7) & 0x01; - tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); + tmpC = (i[j] >> 7) & 0x01; + i[j] = (byte)((i[j] << 1) + carry); carry = tmpC; } if (carry) { - tweak_block[0] ^= GF_XTS; + i[0] ^= GF_XTS; } carry = 0; @@ -13262,8 +13265,8 @@ static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tweak_block[j] >> 7) & 0x01; - tmp2[j] = (byte)((tweak_block[j] << 1) + carry); + tmpC = (i[j] >> 7) & 0x01; + tmp2[j] = (byte)((i[j] << 1) + carry); carry = tmpC; } if (carry) { @@ -13291,11 +13294,11 @@ static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, XMEMCPY(buf, in, sz); XMEMCPY(out, tmp2, sz); - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesDecryptDirect(aes, tmp2, buf); if (ret != 0) return ret; - xorbuf(tmp2, tweak_block, AES_BLOCK_SIZE); + xorbuf(tmp2, i, AES_BLOCK_SIZE); XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); } @@ -13392,17 +13395,14 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, /* Same process as encryption but Aes key is AES_DECRYPTION type. * * xaes AES keys to use for block encrypt/decrypt - * out output buffer to hold plain text - * in input cipher text buffer to decrypt - * sz size of both out and in buffers - * i value to use for tweak + * i readwrite value to use for tweak * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input * adds a sanity check on how the user calls the function. + * tweak_block buffer of size AES_BLOCK_SIZE to use for tweak state * * returns 0 on success */ -int wc_AesXtsDecryptStart(XtsAes* xaes, const byte* i, word32 iSz, - byte *tweak_block) +int wc_AesXtsDecryptStart(XtsAes* xaes, byte* i, word32 iSz) { int ret; Aes *aes; @@ -13434,7 +13434,6 @@ int wc_AesXtsDecryptStart(XtsAes* xaes, const byte* i, word32 iSz, if (IS_INTEL_AVX1(intel_flags)) { AES_XTS_decrypt_start_avx1(i, (const byte*)xaes->tweak.key, - tweak_block, (int)xaes->tweak.rounds); ret = 0; } @@ -13443,7 +13442,6 @@ int wc_AesXtsDecryptStart(XtsAes* xaes, const byte* i, word32 iSz, { AES_XTS_decrypt_start_aesni(i, (const byte*)xaes->tweak.key, - tweak_block, (int)xaes->tweak.rounds); ret = 0; } @@ -13452,7 +13450,7 @@ int wc_AesXtsDecryptStart(XtsAes* xaes, const byte* i, word32 iSz, else #endif /* 0 && defined(WOLFSSL_AESNI) */ { - ret = AesXtsDecryptStart_sw(xaes, i, tweak_block); + ret = AesXtsDecryptStart_sw(xaes, i); } } @@ -13466,14 +13464,12 @@ int wc_AesXtsDecryptStart(XtsAes* xaes, const byte* i, word32 iSz, * out output buffer to hold plain text * in input cipher text buffer to decrypt * sz size of both out and in buffers - * i value to use for tweak - * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input - * adds a sanity check on how the user calls the function. + * i tweak buffer of size AES_BLOCK_SIZE. * * returns 0 on success */ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, - byte *tweak_block) + byte *i) { int ret; #if 0 && defined(WOLFSSL_AESNI) @@ -13505,7 +13501,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, if (IS_INTEL_AVX1(intel_flags)) { AES_XTS_decrypt_update_avx1(in, out, sz, (const byte*)aes->key, - tweak_block, + i, (int)aes->rounds); ret = 0; } @@ -13514,7 +13510,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, { AES_XTS_decrypt_update_aesni(in, out, sz, (const byte*)aes->key, - tweak_block, + i, (int)aes->rounds); ret = 0; } @@ -13523,7 +13519,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, else #endif /* 0 && defined(WOLFSSL_AESNI) */ { - ret = AesXtsDecryptUpdate_sw(xaes, out, in, sz, tweak_block); + ret = AesXtsDecryptUpdate_sw(xaes, out, in, sz, i); } } diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index a7efaa581..df0636dfe 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -671,17 +671,15 @@ WOLFSSL_API int wc_AesXtsDecryptConsecutiveSectors(XtsAes* aes, #ifdef WOLFSSL_AESXTS_STREAM -WOLFSSL_API int wc_AesXtsEncryptStart(XtsAes* aes, const byte* i, word32 iSz, - byte *tweak_block); +WOLFSSL_API int wc_AesXtsEncryptStart(XtsAes* aes, byte* i, word32 iSz); -WOLFSSL_API int wc_AesXtsDecryptStart(XtsAes* aes, const byte* i, word32 iSz, - byte *tweak_block); +WOLFSSL_API int wc_AesXtsDecryptStart(XtsAes* aes, byte* i, word32 iSz); WOLFSSL_API int wc_AesXtsEncryptUpdate(XtsAes* aes, byte* out, - const byte* in, word32 sz, byte *tweak_block); + const byte* in, word32 sz, byte *i); WOLFSSL_API int wc_AesXtsDecryptUpdate(XtsAes* aes, byte* out, - const byte* in, word32 sz, byte *tweak_block); + const byte* in, word32 sz, byte *i); #endif /* WOLFSSL_AESXTS_STREAM */ From 643f472cfb45f54be9cdfb8fe838f00e6853ee49 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Tue, 14 May 2024 15:17:50 +1000 Subject: [PATCH 05/10] AES-XTS ASM x64: Add Intel x64 implementation of streaming Changed APIs from wc_AesXts*Start -> wc_AesXts*Init. Enabled ASM for x64 in aes.c. AesXtsDecryptStart_sw same as AesXtsEncryptStart_sw so changed them to AesXtsInit_sw. --- wolfcrypt/src/aes.c | 73 +- wolfcrypt/src/aes_xts_asm.S | 1339 +++++++++++++++++++++++++++++++- wolfcrypt/src/aes_xts_asm.asm | 1359 ++++++++++++++++++++++++++++++++- wolfssl/wolfcrypt/aes.h | 4 +- 4 files changed, 2724 insertions(+), 51 deletions(-) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index cce13b1a8..796683234 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12530,9 +12530,9 @@ void AES_XTS_encrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* key2, int nr) XASM_LINK("AES_XTS_encrypt_aesni"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_encrypt_start_aesni(unsigned char* i, const unsigned char* tweak_key, +void AES_XTS_init_aesni(unsigned char* i, const unsigned char* tweak_key, int tweak_nr) - XASM_LINK("AES_XTS_encrypt_start_aesni"); + XASM_LINK("AES_XTS_init_aesni"); void AES_XTS_encrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_encrypt_update_aesni"); @@ -12544,9 +12544,9 @@ void AES_XTS_encrypt_avx1(const unsigned char *in, unsigned char *out, int nr) XASM_LINK("AES_XTS_encrypt_avx1"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_encrypt_start_avx1(unsigned char* i, const unsigned char* tweak_key, +void AES_XTS_init_avx1(unsigned char* i, const unsigned char* tweak_key, int tweak_nr) - XASM_LINK("AES_XTS_encrypt_start_avx1"); + XASM_LINK("AES_XTS_init_avx1"); void AES_XTS_encrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_encrypt_update_avx1"); @@ -12559,9 +12559,6 @@ void AES_XTS_decrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* key2, int nr) XASM_LINK("AES_XTS_decrypt_aesni"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_decrypt_start_aesni(unsigned char* i, const unsigned char* tweak_key, - int tweak_nr) - XASM_LINK("AES_XTS_decrypt_start_aesni"); void AES_XTS_decrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_decrypt_update_aesni"); @@ -12573,9 +12570,6 @@ void AES_XTS_decrypt_avx1(const unsigned char *in, unsigned char *out, int nr) XASM_LINK("AES_XTS_decrypt_avx1"); #ifdef WOLFSSL_AESXTS_STREAM -void AES_XTS_decrypt_start_avx1(unsigned char* i, const unsigned char* tweak_key, - int tweak_nr) - XASM_LINK("AES_XTS_decrypt_start_avx1"); void AES_XTS_decrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, const unsigned char* key, unsigned char *i, int nr) XASM_LINK("AES_XTS_decrypt_update_avx1"); @@ -12732,7 +12726,7 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, * * returns 0 on success */ -static int AesXtsEncryptStart_sw(XtsAes* xaes, byte* i) { +static int AesXtsInit_sw(XtsAes* xaes, byte* i) { return wc_AesEncryptDirect(&xaes->tweak, i, i); } @@ -12916,7 +12910,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef WOLFSSL_AESXTS_STREAM -int wc_AesXtsEncryptStart(XtsAes* xaes, byte* i, word32 iSz) +int wc_AesXtsEncryptInit(XtsAes* xaes, byte* i, word32 iSz) { int ret; @@ -12942,30 +12936,28 @@ int wc_AesXtsEncryptStart(XtsAes* xaes, byte* i, word32 iSz) } { -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); #if defined(HAVE_INTEL_AVX1) if (IS_INTEL_AVX1(intel_flags)) { - AES_XTS_encrypt_start_avx1(i, - (const byte*)xaes->tweak.key, - (int)xaes->tweak.rounds); + AES_XTS_init_avx1(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); ret = 0; } else #endif { - AES_XTS_encrypt_start_aesni(i, - (const byte*)xaes->tweak.key, - (int)xaes->tweak.rounds); + AES_XTS_init_aesni(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); ret = 0; } RESTORE_VECTOR_REGISTERS(); } else -#endif /* 0 && defined(WOLFSSL_AESNI) */ +#endif /* WOLFSSL_AESNI */ { - ret = AesXtsEncryptStart_sw(xaes, i); + ret = AesXtsInit_sw(xaes, i); } } @@ -12989,7 +12981,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, { int ret; -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI Aes *aes; #endif @@ -12997,7 +12989,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, return BAD_FUNC_ARG; } -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI aes = &xaes->aes; #endif @@ -13007,7 +12999,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, } { -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); #if defined(HAVE_INTEL_AVX1) @@ -13030,7 +13022,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, RESTORE_VECTOR_REGISTERS(); } else -#endif /* 0 && defined(WOLFSSL_AESNI) */ +#endif /* WOLFSSL_AESNI */ { ret = AesXtsEncryptUpdate_sw(xaes, out, in, sz, i); } @@ -13171,11 +13163,6 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef WOLFSSL_AESXTS_STREAM -static int AesXtsDecryptStart_sw(XtsAes* xaes, byte* i) -{ - return wc_AesEncryptDirect(&xaes->tweak, i, i); -} - /* Block-streaming AES-XTS. * * Same process as encryption but use decrypt key. @@ -13402,7 +13389,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, * * returns 0 on success */ -int wc_AesXtsDecryptStart(XtsAes* xaes, byte* i, word32 iSz) +int wc_AesXtsDecryptInit(XtsAes* xaes, byte* i, word32 iSz) { int ret; Aes *aes; @@ -13427,30 +13414,28 @@ int wc_AesXtsDecryptStart(XtsAes* xaes, byte* i, word32 iSz) } { -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); #if defined(HAVE_INTEL_AVX1) if (IS_INTEL_AVX1(intel_flags)) { - AES_XTS_decrypt_start_avx1(i, - (const byte*)xaes->tweak.key, - (int)xaes->tweak.rounds); + AES_XTS_init_avx1(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); ret = 0; } else #endif { - AES_XTS_decrypt_start_aesni(i, - (const byte*)xaes->tweak.key, - (int)xaes->tweak.rounds); + AES_XTS_init_aesni(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); ret = 0; } RESTORE_VECTOR_REGISTERS(); } else -#endif /* 0 && defined(WOLFSSL_AESNI) */ +#endif /* WOLFSSL_AESNI */ { - ret = AesXtsDecryptStart_sw(xaes, i); + ret = AesXtsInit_sw(xaes, i); } } @@ -13472,7 +13457,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, byte *i) { int ret; -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI Aes *aes; #endif @@ -13480,7 +13465,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, return BAD_FUNC_ARG; } -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS aes = &xaes->aes_decrypt; #else @@ -13494,7 +13479,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, } { -#if 0 && defined(WOLFSSL_AESNI) +#ifdef WOLFSSL_AESNI if (aes->use_aesni) { SAVE_VECTOR_REGISTERS(return _svr_ret;); #if defined(HAVE_INTEL_AVX1) @@ -13517,7 +13502,7 @@ int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, RESTORE_VECTOR_REGISTERS(); } else -#endif /* 0 && defined(WOLFSSL_AESNI) */ +#endif /* WOLFSSL_AESNI */ { ret = AesXtsDecryptUpdate_sw(xaes, out, in, sz, i); } diff --git a/wolfcrypt/src/aes_xts_asm.S b/wolfcrypt/src/aes_xts_asm.S index fedead84f..f65c01525 100644 --- a/wolfcrypt/src/aes_xts_asm.S +++ b/wolfcrypt/src/aes_xts_asm.S @@ -1,6 +1,6 @@ /* aes_xts_asm.S */ /* - * Copyright (C) 2006-2023 wolfSSL Inc. + * Copyright (C) 2006-2024 wolfSSL Inc. * * This file is part of wolfSSL. * @@ -48,6 +48,59 @@ #ifdef WOLFSSL_AES_XTS #ifdef WOLFSSL_X86_64_BUILD #ifndef __APPLE__ +.text +.globl AES_XTS_init_aesni +.type AES_XTS_init_aesni,@function +.align 16 +AES_XTS_init_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_init_aesni +.p2align 4 +_AES_XTS_init_aesni: +#endif /* __APPLE__ */ + movdqu (%rdi), %xmm0 + # aes_enc_block + pxor (%rsi), %xmm0 + movdqu 16(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 32(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 48(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 64(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 80(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 96(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 112(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 128(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 144(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + cmpl $11, %edx + movdqu 160(%rsi), %xmm2 + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc %xmm2, %xmm0 + movdqu 176(%rsi), %xmm3 + aesenc %xmm3, %xmm0 + cmpl $13, %edx + movdqu 192(%rsi), %xmm2 + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc %xmm2, %xmm0 + movdqu 208(%rsi), %xmm3 + aesenc %xmm3, %xmm0 + movdqu 224(%rsi), %xmm2 +L_AES_XTS_init_aesni_tweak_aes_enc_block_last: + aesenclast %xmm2, %xmm0 + movdqu %xmm0, (%rdi) + repz retq +#ifndef __APPLE__ +.size AES_XTS_init_aesni,.-AES_XTS_init_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ .data #else .section __DATA,__data @@ -378,6 +431,291 @@ L_AES_XTS_encrypt_aesni_done_enc: #endif /* __APPLE__ */ #ifndef __APPLE__ .text +.globl AES_XTS_encrypt_update_aesni +.type AES_XTS_encrypt_update_aesni,@function +.align 16 +AES_XTS_encrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_encrypt_update_aesni +.p2align 4 +_AES_XTS_encrypt_update_aesni: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $0x40, %rsp + movdqu L_aes_xts_gc_xts(%rip), %xmm12 + movdqu (%r8), %xmm0 + xorl %r12d, %r12d + cmpl $0x40, %eax + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_aesni_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_encrypt_update_aesni_enc_64: + # 64 bytes of input + # aes_enc_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + movdqu (%rcx), %xmm8 + movdqu 16(%rcx), %xmm9 + movdqu 32(%rcx), %xmm10 + movdqu 48(%rcx), %xmm11 + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm1 + psrad $31, %xmm4 + pslld $0x01, %xmm1 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm4 + movdqa %xmm1, %xmm2 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm2 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm3 + psrad $31, %xmm4 + pslld $0x01, %xmm3 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm3 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + # aes_enc_block + movdqu (%r10), %xmm4 + pxor %xmm4, %xmm8 + pxor %xmm4, %xmm9 + pxor %xmm4, %xmm10 + pxor %xmm4, %xmm11 + movdqu 16(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 32(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 48(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 64(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 80(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 96(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 112(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 128(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 144(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + cmpl $11, %r9d + movdqu 160(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 176(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + cmpl $13, %r9d + movdqu 192(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 208(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 224(%r10), %xmm4 +L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last: + aesenclast %xmm4, %xmm8 + aesenclast %xmm4, %xmm9 + aesenclast %xmm4, %xmm10 + aesenclast %xmm4, %xmm11 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm0 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_aesni_enc_64 +L_AES_XTS_encrypt_update_aesni_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_encrypt_update_aesni_done_enc + subl %r12d, %r11d + cmpl $16, %r11d + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_aesni_last_15 + andl $0xfffffff0, %r11d + # 16 bytes of input +L_AES_XTS_encrypt_update_aesni_enc_16: + leaq (%rdi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + pxor %xmm0, %xmm8 + # aes_enc_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesenc %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesenc %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesenc %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_aesni_aes_enc_block_last: + aesenclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) + movdqa %xmm0, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_aesni_enc_16 + cmpl %eax, %r12d + je L_AES_XTS_encrypt_update_aesni_done_enc +L_AES_XTS_encrypt_update_aesni_last_15: + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + addq $16, %r12 + movdqu %xmm8, (%rsp) + xorq %rdx, %rdx +L_AES_XTS_encrypt_update_aesni_last_15_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_encrypt_update_aesni_last_15_byte_loop + subq %rdx, %r12 + movdqu (%rsp), %xmm8 + subq $16, %r12 + pxor %xmm0, %xmm8 + # aes_enc_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesenc %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesenc %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesenc %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last: + aesenclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) +L_AES_XTS_encrypt_update_aesni_done_enc: + movdqu %xmm0, (%r8) + addq $0x40, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_encrypt_update_aesni,.-AES_XTS_encrypt_update_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_XTS_decrypt_aesni .type AES_XTS_decrypt_aesni,@function .align 16 @@ -752,8 +1090,401 @@ L_AES_XTS_decrypt_aesni_done_dec: #ifndef __APPLE__ .size AES_XTS_decrypt_aesni,.-AES_XTS_decrypt_aesni #endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_XTS_decrypt_update_aesni +.type AES_XTS_decrypt_update_aesni,@function +.align 16 +AES_XTS_decrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_decrypt_update_aesni +.p2align 4 +_AES_XTS_decrypt_update_aesni: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $16, %rsp + movdqu L_aes_xts_gc_xts(%rip), %xmm12 + movdqu (%r8), %xmm0 + xorl %r12d, %r12d + movl %eax, %r11d + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_aesni_mul16_64 + subl $16, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_aesni_last_31_start +L_AES_XTS_decrypt_update_aesni_mul16_64: + cmpl $0x40, %r11d + jl L_AES_XTS_decrypt_update_aesni_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_decrypt_update_aesni_dec_64: + # 64 bytes of input + # aes_dec_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + movdqu (%rcx), %xmm8 + movdqu 16(%rcx), %xmm9 + movdqu 32(%rcx), %xmm10 + movdqu 48(%rcx), %xmm11 + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm1 + psrad $31, %xmm4 + pslld $0x01, %xmm1 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm4 + movdqa %xmm1, %xmm2 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm2 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm3 + psrad $31, %xmm4 + pslld $0x01, %xmm3 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm3 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + # aes_dec_block + movdqu (%r10), %xmm4 + pxor %xmm4, %xmm8 + pxor %xmm4, %xmm9 + pxor %xmm4, %xmm10 + pxor %xmm4, %xmm11 + movdqu 16(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 32(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 48(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 64(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 80(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 96(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 112(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 128(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 144(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + cmpl $11, %r9d + movdqu 160(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 176(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + cmpl $13, %r9d + movdqu 192(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 208(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 224(%r10), %xmm4 +L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last: + aesdeclast %xmm4, %xmm8 + aesdeclast %xmm4, %xmm9 + aesdeclast %xmm4, %xmm10 + aesdeclast %xmm4, %xmm11 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm0 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_aesni_dec_64 +L_AES_XTS_decrypt_update_aesni_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_decrypt_update_aesni_done_dec + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_aesni_mul16 + subl $16, %r11d + subl %r12d, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_aesni_last_31_start + addl %r12d, %r11d +L_AES_XTS_decrypt_update_aesni_mul16: +L_AES_XTS_decrypt_update_aesni_dec_16: + # 16 bytes of input + leaq (%rdi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + pxor %xmm0, %xmm8 + # aes_dec_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesdec %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesdec %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesdec %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_aesni_aes_dec_block_last: + aesdeclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) + movdqa %xmm0, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_aesni_dec_16 + cmpl %eax, %r12d + je L_AES_XTS_decrypt_update_aesni_done_dec +L_AES_XTS_decrypt_update_aesni_last_31_start: + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm7 + psrad $31, %xmm4 + pslld $0x01, %xmm7 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm7 + leaq (%rdi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + pxor %xmm7, %xmm8 + # aes_dec_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesdec %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesdec %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesdec %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last: + aesdeclast %xmm5, %xmm8 + pxor %xmm7, %xmm8 + movdqu %xmm8, (%rsp) + addq $16, %r12 + xorq %rdx, %rdx +L_AES_XTS_decrypt_update_aesni_last_31_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_decrypt_update_aesni_last_31_byte_loop + subq %rdx, %r12 + movdqu (%rsp), %xmm8 + pxor %xmm0, %xmm8 + # aes_dec_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesdec %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesdec %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesdec %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last: + aesdeclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) +L_AES_XTS_decrypt_update_aesni_done_dec: + movdqu %xmm0, (%r8) + addq $16, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_decrypt_update_aesni,.-AES_XTS_decrypt_update_aesni +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX1 #ifndef __APPLE__ +.text +.globl AES_XTS_init_avx1 +.type AES_XTS_init_avx1,@function +.align 16 +AES_XTS_init_avx1: +#else +.section __TEXT,__text +.globl _AES_XTS_init_avx1 +.p2align 4 +_AES_XTS_init_avx1: +#endif /* __APPLE__ */ + movl %edx, %eax + vmovdqu (%rdi), %xmm0 + # aes_enc_block + vpxor (%rsi), %xmm0, %xmm0 + vmovdqu 16(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 32(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 48(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 64(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 80(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 96(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 112(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 128(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 144(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + cmpl $11, %eax + vmovdqu 160(%rsi), %xmm2 + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 176(%rsi), %xmm3 + vaesenc %xmm3, %xmm0, %xmm0 + cmpl $13, %eax + vmovdqu 192(%rsi), %xmm2 + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 208(%rsi), %xmm3 + vaesenc %xmm3, %xmm0, %xmm0 + vmovdqu 224(%rsi), %xmm2 +L_AES_XTS_init_avx1_tweak_aes_enc_block_last: + vaesenclast %xmm2, %xmm0, %xmm0 + vmovdqu %xmm0, (%rdi) + repz retq +#ifndef __APPLE__ +.size AES_XTS_init_avx1,.-AES_XTS_init_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ .data #else .section __DATA,__data @@ -1066,7 +1797,6 @@ L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last: leaq (%rsi,%r13,1), %rcx vmovdqu %xmm8, (%rcx) L_AES_XTS_encrypt_avx1_done_enc: - vzeroupper addq $0x40, %rsp popq %r13 popq %r12 @@ -1076,6 +1806,282 @@ L_AES_XTS_encrypt_avx1_done_enc: #endif /* __APPLE__ */ #ifndef __APPLE__ .text +.globl AES_XTS_encrypt_update_avx1 +.type AES_XTS_encrypt_update_avx1,@function +.align 16 +AES_XTS_encrypt_update_avx1: +#else +.section __TEXT,__text +.globl _AES_XTS_encrypt_update_avx1 +.p2align 4 +_AES_XTS_encrypt_update_avx1: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $0x40, %rsp + vmovdqu L_avx1_aes_xts_gc_xts(%rip), %xmm12 + vmovdqu (%r8), %xmm0 + xorl %r12d, %r12d + cmpl $0x40, %eax + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_avx1_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_encrypt_update_avx1_enc_64: + # 64 bytes of input + # aes_enc_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + vmovdqu (%rcx), %xmm8 + vmovdqu 16(%rcx), %xmm9 + vmovdqu 32(%rcx), %xmm10 + vmovdqu 48(%rcx), %xmm11 + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm1 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm1, %xmm1 + vpsrad $31, %xmm1, %xmm4 + vpslld $0x01, %xmm1, %xmm2 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vpsrad $31, %xmm2, %xmm4 + vpslld $0x01, %xmm2, %xmm3 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + # aes_enc_block + vmovdqu (%r10), %xmm4 + vpxor %xmm4, %xmm8, %xmm8 + vpxor %xmm4, %xmm9, %xmm9 + vpxor %xmm4, %xmm10, %xmm10 + vpxor %xmm4, %xmm11, %xmm11 + vmovdqu 16(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 32(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 48(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 64(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 80(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 96(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 112(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 128(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 144(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 176(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 208(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 224(%r10), %xmm4 +L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last: + vaesenclast %xmm4, %xmm8, %xmm8 + vaesenclast %xmm4, %xmm9, %xmm9 + vaesenclast %xmm4, %xmm10, %xmm10 + vaesenclast %xmm4, %xmm11, %xmm11 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vpsrad $31, %xmm3, %xmm4 + vpslld $0x01, %xmm3, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_avx1_enc_64 +L_AES_XTS_encrypt_update_avx1_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_encrypt_update_avx1_done_enc + subl %r12d, %r11d + cmpl $16, %r11d + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_avx1_last_15 + andl $0xfffffff0, %r11d + # 16 bytes of input +L_AES_XTS_encrypt_update_avx1_enc_16: + leaq (%rdi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + # aes_enc_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_avx1_aes_enc_block_last: + vaesenclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_avx1_enc_16 + cmpl %eax, %r12d + je L_AES_XTS_encrypt_update_avx1_done_enc +L_AES_XTS_encrypt_update_avx1_last_15: + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + addq $16, %r12 + vmovdqu %xmm8, (%rsp) + xorq %rdx, %rdx +L_AES_XTS_encrypt_update_avx1_last_15_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_encrypt_update_avx1_last_15_byte_loop + subq %rdx, %r12 + vmovdqu (%rsp), %xmm8 + subq $16, %r12 + vpxor %xmm0, %xmm8, %xmm8 + # aes_enc_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last: + vaesenclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) +L_AES_XTS_encrypt_update_avx1_done_enc: + vmovdqu %xmm0, (%r8) + addq $0x40, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_encrypt_update_avx1,.-AES_XTS_encrypt_update_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_XTS_decrypt_avx1 .type AES_XTS_decrypt_avx1,@function .align 16 @@ -1432,7 +2438,6 @@ L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last: leaq (%rsi,%r13,1), %rcx vmovdqu %xmm8, (%rcx) L_AES_XTS_decrypt_avx1_done_dec: - vzeroupper addq $16, %rsp popq %r13 popq %r12 @@ -1440,6 +2445,334 @@ L_AES_XTS_decrypt_avx1_done_dec: #ifndef __APPLE__ .size AES_XTS_decrypt_avx1,.-AES_XTS_decrypt_avx1 #endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_XTS_decrypt_update_avx1 +.type AES_XTS_decrypt_update_avx1,@function +.align 16 +AES_XTS_decrypt_update_avx1: +#else +.section __TEXT,__text +.globl _AES_XTS_decrypt_update_avx1 +.p2align 4 +_AES_XTS_decrypt_update_avx1: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $16, %rsp + vmovdqu L_avx1_aes_xts_gc_xts(%rip), %xmm12 + vmovdqu (%r8), %xmm0 + xorl %r12d, %r12d + movl %eax, %r11d + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_avx1_mul16_64 + subl $16, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_avx1_last_31_start +L_AES_XTS_decrypt_update_avx1_mul16_64: + cmpl $0x40, %r11d + jl L_AES_XTS_decrypt_update_avx1_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_decrypt_update_avx1_dec_64: + # 64 bytes of input + # aes_dec_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + vmovdqu (%rcx), %xmm8 + vmovdqu 16(%rcx), %xmm9 + vmovdqu 32(%rcx), %xmm10 + vmovdqu 48(%rcx), %xmm11 + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm1 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm1, %xmm1 + vpsrad $31, %xmm1, %xmm4 + vpslld $0x01, %xmm1, %xmm2 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vpsrad $31, %xmm2, %xmm4 + vpslld $0x01, %xmm2, %xmm3 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + # aes_dec_block + vmovdqu (%r10), %xmm4 + vpxor %xmm4, %xmm8, %xmm8 + vpxor %xmm4, %xmm9, %xmm9 + vpxor %xmm4, %xmm10, %xmm10 + vpxor %xmm4, %xmm11, %xmm11 + vmovdqu 16(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 32(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 48(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 64(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 80(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 96(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 112(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 128(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 144(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 176(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 208(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 224(%r10), %xmm4 +L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last: + vaesdeclast %xmm4, %xmm8, %xmm8 + vaesdeclast %xmm4, %xmm9, %xmm9 + vaesdeclast %xmm4, %xmm10, %xmm10 + vaesdeclast %xmm4, %xmm11, %xmm11 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vpsrad $31, %xmm3, %xmm4 + vpslld $0x01, %xmm3, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_avx1_dec_64 +L_AES_XTS_decrypt_update_avx1_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_decrypt_update_avx1_done_dec + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_avx1_mul16 + subl $16, %r11d + subl %r12d, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_avx1_last_31_start + addl %r12d, %r11d +L_AES_XTS_decrypt_update_avx1_mul16: +L_AES_XTS_decrypt_update_avx1_dec_16: + # 16 bytes of input + leaq (%rdi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + # aes_dec_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_avx1_aes_dec_block_last: + vaesdeclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_avx1_dec_16 + cmpl %eax, %r12d + je L_AES_XTS_decrypt_update_avx1_done_dec +L_AES_XTS_decrypt_update_avx1_last_31_start: + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm7 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm7, %xmm7 + leaq (%rdi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + vpxor %xmm7, %xmm8, %xmm8 + # aes_dec_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last: + vaesdeclast %xmm5, %xmm8, %xmm8 + vpxor %xmm7, %xmm8, %xmm8 + vmovdqu %xmm8, (%rsp) + addq $16, %r12 + xorq %rdx, %rdx +L_AES_XTS_decrypt_update_avx1_last_31_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_decrypt_update_avx1_last_31_byte_loop + subq %rdx, %r12 + vmovdqu (%rsp), %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + # aes_dec_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last: + vaesdeclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) +L_AES_XTS_decrypt_update_avx1_done_dec: + vmovdqu %xmm0, (%r8) + addq $16, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_decrypt_update_avx1,.-AES_XTS_decrypt_update_avx1 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #endif /* WOLFSSL_X86_64_BUILD */ #endif /* WOLFSSL_AES_XTS */ diff --git a/wolfcrypt/src/aes_xts_asm.asm b/wolfcrypt/src/aes_xts_asm.asm index 3185ec224..7194a06b8 100644 --- a/wolfcrypt/src/aes_xts_asm.asm +++ b/wolfcrypt/src/aes_xts_asm.asm @@ -40,6 +40,48 @@ IFNDEF _WIN64 _WIN64 = 1 ENDIF +_text SEGMENT READONLY PARA +AES_XTS_init_aesni PROC + movdqu xmm0, OWORD PTR [rcx] + ; aes_enc_block + pxor xmm0, [rdx] + movdqu xmm2, OWORD PTR [rdx+16] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+32] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+48] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+64] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+80] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+96] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+112] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+128] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+144] + aesenc xmm0, xmm2 + cmp r8d, 11 + movdqu xmm2, OWORD PTR [rdx+160] + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm2 + movdqu xmm3, OWORD PTR [rdx+176] + aesenc xmm0, xmm3 + cmp r8d, 13 + movdqu xmm2, OWORD PTR [rdx+192] + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm2 + movdqu xmm3, OWORD PTR [rdx+208] + aesenc xmm0, xmm3 + movdqu xmm2, OWORD PTR [rdx+224] +L_AES_XTS_init_aesni_tweak_aes_enc_block_last: + aesenclast xmm0, xmm2 + movdqu OWORD PTR [rcx], xmm0 + ret +AES_XTS_init_aesni ENDP +_text ENDS _DATA SEGMENT ALIGN 16 L_aes_xts_gc_xts DWORD 135,1,1,1 @@ -379,6 +421,302 @@ L_AES_XTS_encrypt_aesni_done_enc: AES_XTS_encrypt_aesni ENDP _text ENDS _text SEGMENT READONLY PARA +AES_XTS_encrypt_update_aesni PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 176 + movdqu OWORD PTR [rsp+64], xmm6 + movdqu OWORD PTR [rsp+80], xmm7 + movdqu OWORD PTR [rsp+96], xmm8 + movdqu OWORD PTR [rsp+112], xmm9 + movdqu OWORD PTR [rsp+128], xmm10 + movdqu OWORD PTR [rsp+144], xmm11 + movdqu OWORD PTR [rsp+160], xmm12 + movdqu xmm12, OWORD PTR L_aes_xts_gc_xts + movdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + cmp eax, 64 + mov r11d, eax + jl L_AES_XTS_encrypt_update_aesni_done_64 + and r11d, 4294967232 +L_AES_XTS_encrypt_update_aesni_enc_64: + ; 64 bytes of input + ; aes_enc_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + movdqu xmm8, OWORD PTR [rcx] + movdqu xmm9, OWORD PTR [rcx+16] + movdqu xmm10, OWORD PTR [rcx+32] + movdqu xmm11, OWORD PTR [rcx+48] + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + psrad xmm4, 31 + pslld xmm1, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm1, xmm4 + movdqa xmm4, xmm1 + movdqa xmm2, xmm1 + psrad xmm4, 31 + pslld xmm2, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm2, xmm4 + movdqa xmm4, xmm2 + movdqa xmm3, xmm2 + psrad xmm4, 31 + pslld xmm3, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm3, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + ; aes_enc_block + movdqu xmm4, OWORD PTR [r10] + pxor xmm8, xmm4 + pxor xmm9, xmm4 + pxor xmm10, xmm4 + pxor xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+16] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+32] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+48] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+64] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+80] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+96] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+112] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+128] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+144] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + cmp r9d, 11 + movdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+176] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + cmp r9d, 13 + movdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+208] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last: + aesenclast xmm8, xmm4 + aesenclast xmm9, xmm4 + aesenclast xmm10, xmm4 + aesenclast xmm11, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + movdqu OWORD PTR [rdx], xmm8 + movdqu OWORD PTR [rdx+16], xmm9 + movdqu OWORD PTR [rdx+32], xmm10 + movdqu OWORD PTR [rdx+48], xmm11 + movdqa xmm4, xmm3 + movdqa xmm0, xmm3 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_aesni_enc_64 +L_AES_XTS_encrypt_update_aesni_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_encrypt_update_aesni_done_enc + sub r11d, r12d + cmp r11d, 16 + mov r11d, eax + jl L_AES_XTS_encrypt_update_aesni_last_15 + and r11d, 4294967280 + ; 16 bytes of input +L_AES_XTS_encrypt_update_aesni_enc_16: + lea rcx, QWORD PTR [rdi+r12] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm0 + ; aes_enc_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesenc xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesenc xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesenc xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_aesni_aes_enc_block_last: + aesenclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 + movdqa xmm4, xmm0 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_aesni_enc_16 + cmp r12d, eax + je L_AES_XTS_encrypt_update_aesni_done_enc +L_AES_XTS_encrypt_update_aesni_last_15: + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + movdqu xmm8, OWORD PTR [rcx] + add r12, 16 + movdqu OWORD PTR [rsp], xmm8 + xor rdx, rdx +L_AES_XTS_encrypt_update_aesni_last_15_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_encrypt_update_aesni_last_15_byte_loop + sub r12, rdx + movdqu xmm8, OWORD PTR [rsp] + sub r12, 16 + pxor xmm8, xmm0 + ; aes_enc_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesenc xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesenc xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesenc xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last: + aesenclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_encrypt_update_aesni_done_enc: + movdqu OWORD PTR [r8], xmm0 + movdqu xmm6, OWORD PTR [rsp+64] + movdqu xmm7, OWORD PTR [rsp+80] + movdqu xmm8, OWORD PTR [rsp+96] + movdqu xmm9, OWORD PTR [rsp+112] + movdqu xmm10, OWORD PTR [rsp+128] + movdqu xmm11, OWORD PTR [rsp+144] + movdqu xmm12, OWORD PTR [rsp+160] + add rsp, 176 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_encrypt_update_aesni ENDP +_text ENDS +_text SEGMENT READONLY PARA AES_XTS_decrypt_aesni PROC push rdi push rsi @@ -765,7 +1103,400 @@ L_AES_XTS_decrypt_aesni_done_dec: ret AES_XTS_decrypt_aesni ENDP _text ENDS +_text SEGMENT READONLY PARA +AES_XTS_decrypt_update_aesni PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 128 + movdqu OWORD PTR [rsp+16], xmm6 + movdqu OWORD PTR [rsp+32], xmm7 + movdqu OWORD PTR [rsp+48], xmm8 + movdqu OWORD PTR [rsp+64], xmm9 + movdqu OWORD PTR [rsp+80], xmm10 + movdqu OWORD PTR [rsp+96], xmm11 + movdqu OWORD PTR [rsp+112], xmm12 + movdqu xmm12, OWORD PTR L_aes_xts_gc_xts + movdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + mov r11d, eax + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_aesni_mul16_64 + sub r11d, 16 + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_aesni_last_31_start +L_AES_XTS_decrypt_update_aesni_mul16_64: + cmp r11d, 64 + jl L_AES_XTS_decrypt_update_aesni_done_64 + and r11d, 4294967232 +L_AES_XTS_decrypt_update_aesni_dec_64: + ; 64 bytes of input + ; aes_dec_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + movdqu xmm8, OWORD PTR [rcx] + movdqu xmm9, OWORD PTR [rcx+16] + movdqu xmm10, OWORD PTR [rcx+32] + movdqu xmm11, OWORD PTR [rcx+48] + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + psrad xmm4, 31 + pslld xmm1, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm1, xmm4 + movdqa xmm4, xmm1 + movdqa xmm2, xmm1 + psrad xmm4, 31 + pslld xmm2, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm2, xmm4 + movdqa xmm4, xmm2 + movdqa xmm3, xmm2 + psrad xmm4, 31 + pslld xmm3, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm3, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + ; aes_dec_block + movdqu xmm4, OWORD PTR [r10] + pxor xmm8, xmm4 + pxor xmm9, xmm4 + pxor xmm10, xmm4 + pxor xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+16] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+32] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+48] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+64] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+80] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+96] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+112] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+128] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+144] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + cmp r9d, 11 + movdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+176] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + cmp r9d, 13 + movdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+208] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last: + aesdeclast xmm8, xmm4 + aesdeclast xmm9, xmm4 + aesdeclast xmm10, xmm4 + aesdeclast xmm11, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + movdqu OWORD PTR [rdx], xmm8 + movdqu OWORD PTR [rdx+16], xmm9 + movdqu OWORD PTR [rdx+32], xmm10 + movdqu OWORD PTR [rdx+48], xmm11 + movdqa xmm4, xmm3 + movdqa xmm0, xmm3 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_aesni_dec_64 +L_AES_XTS_decrypt_update_aesni_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_decrypt_update_aesni_done_dec + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_aesni_mul16 + sub r11d, 16 + sub r11d, r12d + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_aesni_last_31_start + add r11d, r12d +L_AES_XTS_decrypt_update_aesni_mul16: +L_AES_XTS_decrypt_update_aesni_dec_16: + ; 16 bytes of input + lea rcx, QWORD PTR [rdi+r12] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm0 + ; aes_dec_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesdec xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesdec xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 + movdqa xmm4, xmm0 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_aesni_dec_16 + cmp r12d, eax + je L_AES_XTS_decrypt_update_aesni_done_dec +L_AES_XTS_decrypt_update_aesni_last_31_start: + movdqa xmm4, xmm0 + movdqa xmm7, xmm0 + psrad xmm4, 31 + pslld xmm7, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm7, xmm4 + lea rcx, QWORD PTR [rdi+r12] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm7 + ; aes_dec_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesdec xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesdec xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm7 + movdqu OWORD PTR [rsp], xmm8 + add r12, 16 + xor rdx, rdx +L_AES_XTS_decrypt_update_aesni_last_31_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_decrypt_update_aesni_last_31_byte_loop + sub r12, rdx + movdqu xmm8, OWORD PTR [rsp] + pxor xmm8, xmm0 + ; aes_dec_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesdec xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesdec xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm0 + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_decrypt_update_aesni_done_dec: + movdqu OWORD PTR [r8], xmm0 + movdqu xmm6, OWORD PTR [rsp+16] + movdqu xmm7, OWORD PTR [rsp+32] + movdqu xmm8, OWORD PTR [rsp+48] + movdqu xmm9, OWORD PTR [rsp+64] + movdqu xmm10, OWORD PTR [rsp+80] + movdqu xmm11, OWORD PTR [rsp+96] + movdqu xmm12, OWORD PTR [rsp+112] + add rsp, 128 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_decrypt_update_aesni ENDP +_text ENDS IFDEF HAVE_INTEL_AVX1 +_text SEGMENT READONLY PARA +AES_XTS_init_avx1 PROC + mov eax, r8d + vmovdqu xmm0, OWORD PTR [rcx] + ; aes_enc_block + vpxor xmm0, xmm0, [rdx] + vmovdqu xmm2, OWORD PTR [rdx+16] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+32] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+48] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+64] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+80] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+96] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+112] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+128] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+144] + vaesenc xmm0, xmm0, xmm2 + cmp eax, 11 + vmovdqu xmm2, OWORD PTR [rdx+160] + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm3, OWORD PTR [rdx+176] + vaesenc xmm0, xmm0, xmm3 + cmp eax, 13 + vmovdqu xmm2, OWORD PTR [rdx+192] + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm3, OWORD PTR [rdx+208] + vaesenc xmm0, xmm0, xmm3 + vmovdqu xmm2, OWORD PTR [rdx+224] +L_AES_XTS_init_avx1_tweak_aes_enc_block_last: + vaesenclast xmm0, xmm0, xmm2 + vmovdqu OWORD PTR [rcx], xmm0 + ret +AES_XTS_init_avx1 ENDP +_text ENDS _DATA SEGMENT ALIGN 16 L_avx1_aes_xts_gc_xts DWORD 135,1,1,1 @@ -1080,7 +1811,6 @@ L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last: lea rcx, QWORD PTR [rsi+r13] vmovdqu OWORD PTR [rcx], xmm8 L_AES_XTS_encrypt_avx1_done_enc: - vzeroupper vmovdqu xmm6, OWORD PTR [rsp+64] vmovdqu xmm7, OWORD PTR [rsp+80] vmovdqu xmm8, OWORD PTR [rsp+96] @@ -1097,6 +1827,293 @@ L_AES_XTS_encrypt_avx1_done_enc: AES_XTS_encrypt_avx1 ENDP _text ENDS _text SEGMENT READONLY PARA +AES_XTS_encrypt_update_avx1 PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 176 + vmovdqu OWORD PTR [rsp+64], xmm6 + vmovdqu OWORD PTR [rsp+80], xmm7 + vmovdqu OWORD PTR [rsp+96], xmm8 + vmovdqu OWORD PTR [rsp+112], xmm9 + vmovdqu OWORD PTR [rsp+128], xmm10 + vmovdqu OWORD PTR [rsp+144], xmm11 + vmovdqu OWORD PTR [rsp+160], xmm12 + vmovdqu xmm12, OWORD PTR L_avx1_aes_xts_gc_xts + vmovdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + cmp eax, 64 + mov r11d, eax + jl L_AES_XTS_encrypt_update_avx1_done_64 + and r11d, 4294967232 +L_AES_XTS_encrypt_update_avx1_enc_64: + ; 64 bytes of input + ; aes_enc_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vmovdqu xmm9, OWORD PTR [rcx+16] + vmovdqu xmm10, OWORD PTR [rcx+32] + vmovdqu xmm11, OWORD PTR [rcx+48] + vpsrad xmm4, xmm0, 31 + vpslld xmm1, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm1, xmm1, xmm4 + vpsrad xmm4, xmm1, 31 + vpslld xmm2, xmm1, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm2, xmm2, xmm4 + vpsrad xmm4, xmm2, 31 + vpslld xmm3, xmm2, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm3, xmm3, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + ; aes_enc_block + vmovdqu xmm4, OWORD PTR [r10] + vpxor xmm8, xmm8, xmm4 + vpxor xmm9, xmm9, xmm4 + vpxor xmm10, xmm10, xmm4 + vpxor xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+16] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+32] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+48] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+64] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+80] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+96] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+112] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+128] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+144] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + cmp r9d, 11 + vmovdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+176] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + cmp r9d, 13 + vmovdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+208] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm4 + vaesenclast xmm9, xmm9, xmm4 + vaesenclast xmm10, xmm10, xmm4 + vaesenclast xmm11, xmm11, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + vmovdqu OWORD PTR [rdx], xmm8 + vmovdqu OWORD PTR [rdx+16], xmm9 + vmovdqu OWORD PTR [rdx+32], xmm10 + vmovdqu OWORD PTR [rdx+48], xmm11 + vpsrad xmm4, xmm3, 31 + vpslld xmm0, xmm3, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_avx1_enc_64 +L_AES_XTS_encrypt_update_avx1_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_encrypt_update_avx1_done_enc + sub r11d, r12d + cmp r11d, 16 + mov r11d, eax + jl L_AES_XTS_encrypt_update_avx1_last_15 + and r11d, 4294967280 + ; 16 bytes of input +L_AES_XTS_encrypt_update_avx1_enc_16: + lea rcx, QWORD PTR [rdi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm0 + ; aes_enc_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesenc xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesenc xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesenc xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_avx1_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 + vpsrad xmm4, xmm0, 31 + vpslld xmm0, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_avx1_enc_16 + cmp r12d, eax + je L_AES_XTS_encrypt_update_avx1_done_enc +L_AES_XTS_encrypt_update_avx1_last_15: + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + add r12, 16 + vmovdqu OWORD PTR [rsp], xmm8 + xor rdx, rdx +L_AES_XTS_encrypt_update_avx1_last_15_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_encrypt_update_avx1_last_15_byte_loop + sub r12, rdx + vmovdqu xmm8, OWORD PTR [rsp] + sub r12, 16 + vpxor xmm8, xmm8, xmm0 + ; aes_enc_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesenc xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesenc xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesenc xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_encrypt_update_avx1_done_enc: + vmovdqu OWORD PTR [r8], xmm0 + vmovdqu xmm6, OWORD PTR [rsp+64] + vmovdqu xmm7, OWORD PTR [rsp+80] + vmovdqu xmm8, OWORD PTR [rsp+96] + vmovdqu xmm9, OWORD PTR [rsp+112] + vmovdqu xmm10, OWORD PTR [rsp+128] + vmovdqu xmm11, OWORD PTR [rsp+144] + vmovdqu xmm12, OWORD PTR [rsp+160] + add rsp, 176 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_encrypt_update_avx1 ENDP +_text ENDS +_text SEGMENT READONLY PARA AES_XTS_decrypt_avx1 PROC push rdi push rsi @@ -1457,7 +2474,6 @@ L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last: lea rcx, QWORD PTR [rsi+r13] vmovdqu OWORD PTR [rcx], xmm8 L_AES_XTS_decrypt_avx1_done_dec: - vzeroupper vmovdqu xmm6, OWORD PTR [rsp+16] vmovdqu xmm7, OWORD PTR [rsp+32] vmovdqu xmm8, OWORD PTR [rsp+48] @@ -1473,5 +2489,344 @@ L_AES_XTS_decrypt_avx1_done_dec: ret AES_XTS_decrypt_avx1 ENDP _text ENDS +_text SEGMENT READONLY PARA +AES_XTS_decrypt_update_avx1 PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 128 + vmovdqu OWORD PTR [rsp+16], xmm6 + vmovdqu OWORD PTR [rsp+32], xmm7 + vmovdqu OWORD PTR [rsp+48], xmm8 + vmovdqu OWORD PTR [rsp+64], xmm9 + vmovdqu OWORD PTR [rsp+80], xmm10 + vmovdqu OWORD PTR [rsp+96], xmm11 + vmovdqu OWORD PTR [rsp+112], xmm12 + vmovdqu xmm12, OWORD PTR L_avx1_aes_xts_gc_xts + vmovdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + mov r11d, eax + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_avx1_mul16_64 + sub r11d, 16 + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_avx1_last_31_start +L_AES_XTS_decrypt_update_avx1_mul16_64: + cmp r11d, 64 + jl L_AES_XTS_decrypt_update_avx1_done_64 + and r11d, 4294967232 +L_AES_XTS_decrypt_update_avx1_dec_64: + ; 64 bytes of input + ; aes_dec_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vmovdqu xmm9, OWORD PTR [rcx+16] + vmovdqu xmm10, OWORD PTR [rcx+32] + vmovdqu xmm11, OWORD PTR [rcx+48] + vpsrad xmm4, xmm0, 31 + vpslld xmm1, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm1, xmm1, xmm4 + vpsrad xmm4, xmm1, 31 + vpslld xmm2, xmm1, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm2, xmm2, xmm4 + vpsrad xmm4, xmm2, 31 + vpslld xmm3, xmm2, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm3, xmm3, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + ; aes_dec_block + vmovdqu xmm4, OWORD PTR [r10] + vpxor xmm8, xmm8, xmm4 + vpxor xmm9, xmm9, xmm4 + vpxor xmm10, xmm10, xmm4 + vpxor xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + cmp r9d, 11 + vmovdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + cmp r9d, 13 + vmovdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm4 + vaesdeclast xmm9, xmm9, xmm4 + vaesdeclast xmm10, xmm10, xmm4 + vaesdeclast xmm11, xmm11, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + vmovdqu OWORD PTR [rdx], xmm8 + vmovdqu OWORD PTR [rdx+16], xmm9 + vmovdqu OWORD PTR [rdx+32], xmm10 + vmovdqu OWORD PTR [rdx+48], xmm11 + vpsrad xmm4, xmm3, 31 + vpslld xmm0, xmm3, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_avx1_dec_64 +L_AES_XTS_decrypt_update_avx1_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_decrypt_update_avx1_done_dec + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_avx1_mul16 + sub r11d, 16 + sub r11d, r12d + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_avx1_last_31_start + add r11d, r12d +L_AES_XTS_decrypt_update_avx1_mul16: +L_AES_XTS_decrypt_update_avx1_dec_16: + ; 16 bytes of input + lea rcx, QWORD PTR [rdi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm0 + ; aes_dec_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 + vpsrad xmm4, xmm0, 31 + vpslld xmm0, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_avx1_dec_16 + cmp r12d, eax + je L_AES_XTS_decrypt_update_avx1_done_dec +L_AES_XTS_decrypt_update_avx1_last_31_start: + vpsrad xmm4, xmm0, 31 + vpslld xmm7, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm7, xmm7, xmm4 + lea rcx, QWORD PTR [rdi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm7 + ; aes_dec_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm7 + vmovdqu OWORD PTR [rsp], xmm8 + add r12, 16 + xor rdx, rdx +L_AES_XTS_decrypt_update_avx1_last_31_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_decrypt_update_avx1_last_31_byte_loop + sub r12, rdx + vmovdqu xmm8, OWORD PTR [rsp] + vpxor xmm8, xmm8, xmm0 + ; aes_dec_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_decrypt_update_avx1_done_dec: + vmovdqu OWORD PTR [r8], xmm0 + vmovdqu xmm6, OWORD PTR [rsp+16] + vmovdqu xmm7, OWORD PTR [rsp+32] + vmovdqu xmm8, OWORD PTR [rsp+48] + vmovdqu xmm9, OWORD PTR [rsp+64] + vmovdqu xmm10, OWORD PTR [rsp+80] + vmovdqu xmm11, OWORD PTR [rsp+96] + vmovdqu xmm12, OWORD PTR [rsp+112] + add rsp, 128 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_decrypt_update_avx1 ENDP +_text ENDS ENDIF END diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index df0636dfe..772e6e8eb 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -671,9 +671,9 @@ WOLFSSL_API int wc_AesXtsDecryptConsecutiveSectors(XtsAes* aes, #ifdef WOLFSSL_AESXTS_STREAM -WOLFSSL_API int wc_AesXtsEncryptStart(XtsAes* aes, byte* i, word32 iSz); +WOLFSSL_API int wc_AesXtsEncryptInit(XtsAes* aes, byte* i, word32 iSz); -WOLFSSL_API int wc_AesXtsDecryptStart(XtsAes* aes, byte* i, word32 iSz); +WOLFSSL_API int wc_AesXtsDecryptInit(XtsAes* aes, byte* i, word32 iSz); WOLFSSL_API int wc_AesXtsEncryptUpdate(XtsAes* aes, byte* out, const byte* in, word32 sz, byte *i); From 4f1f7b3a4d1ce93665c617cafb92e3ff52fa20dd Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Tue, 14 May 2024 01:01:47 -0500 Subject: [PATCH 06/10] linuxkm/lkcapi_glue.c: update names of wc_AesXts{En,De}cryptInit(). wolfcrypt/src/aes.c: activate _AesXtsHelper() in AesXts{En,De}cryptUpdate_sw(). --- linuxkm/lkcapi_glue.c | 8 ++++---- wolfcrypt/src/aes.c | 18 ++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index b9711560d..467aa4856 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -930,10 +930,10 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) tail = 0; } - err = wc_AesXtsEncryptStart(ctx->aesXts, walk.iv, walk.ivsize); + err = wc_AesXtsEncryptInit(ctx->aesXts, walk.iv, walk.ivsize); if (unlikely(err)) { - pr_err("%s: wc_AesXtsEncryptStart failed: %d\n", + pr_err("%s: wc_AesXtsEncryptInit failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); return -EINVAL; } @@ -1053,10 +1053,10 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) tail = 0; } - err = wc_AesXtsDecryptStart(ctx->aesXts, walk.iv, walk.ivsize); + err = wc_AesXtsDecryptInit(ctx->aesXts, walk.iv, walk.ivsize); if (unlikely(err)) { - pr_err("%s: wc_AesXtsDecryptStart failed: %d\n", + pr_err("%s: wc_AesXtsDecryptInit failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); return -EINVAL; } diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 796683234..f9b448112 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12751,13 +12751,12 @@ static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, word32 blocks = (sz / AES_BLOCK_SIZE); Aes *aes = &xaes->aes; -#if 0 && defined(HAVE_AES_ECB) +#ifdef HAVE_AES_ECB /* encrypt all of buffer at once when possible */ - if ((in != out) && ((sz & (AES_BLOCK_SIZE - 1)) == 0)) { /* can not handle inline */ + if (in != out) { /* can not handle inline */ XMEMCPY(out, i, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) return ret; - XMEMCPY(i, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); } #endif @@ -12765,8 +12764,8 @@ static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, word32 j; byte carry = 0; -#if 0 && defined(HAVE_AES_ECB) - if ((in == out) || ((sz & (AES_BLOCK_SIZE - 1)) != 0)) +#ifdef HAVE_AES_ECB + if (in == out) #endif { /* check for if inline */ byte buf[AES_BLOCK_SIZE]; @@ -13199,19 +13198,18 @@ static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, blocks--; } -#if 0 && defined(HAVE_AES_ECB) +#ifdef HAVE_AES_ECB /* decrypt all of buffer at once when possible */ - if ((in != out) && ((sz & (AES_BLOCK_SIZE - 1)) == 0)) { /* can not handle inline */ + if (in != out) { /* can not handle inline */ XMEMCPY(out, i, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) return ret; - XMEMCPY(i, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); } #endif while (blocks > 0) { -#if 0 && defined(HAVE_AES_ECB) - if ((in == out) || ((sz & (AES_BLOCK_SIZE - 1)) != 0)) +#ifdef HAVE_AES_ECB + if (in == out) #endif { /* check for if inline */ byte buf[AES_BLOCK_SIZE]; From 8392748cda3e3c0e9071e50cc31e6ec989f49bc4 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Tue, 14 May 2024 12:40:24 -0500 Subject: [PATCH 07/10] wolfcrypt/src/aes.c: de-deduplicate code, AesXts{En,De}crypt_sw() vs AesXts{En,De}cryptUpdate_sw(). --- wolfcrypt/src/aes.c | 206 ++++---------------------------------------- 1 file changed, 17 insertions(+), 189 deletions(-) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index f9b448112..931a69f3f 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12631,90 +12631,21 @@ static WARN_UNUSED_RESULT int _AesXtsHelper( * returns 0 on success */ /* Software AES - XTS Encrypt */ + +static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, + byte *i); static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i) { int ret; - word32 blocks = (sz / AES_BLOCK_SIZE); - Aes *aes = &xaes->aes; byte tweak_block[AES_BLOCK_SIZE]; ret = wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); if (ret != 0) return ret; -#ifdef HAVE_AES_ECB - /* encrypt all of buffer at once when possible */ - if (in != out) { /* can not handle inline */ - XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); - if ((ret = _AesXtsHelper(&xaes->aes, out, in, sz, AES_ENCRYPTION)) != 0) - return ret; - } -#endif - - while (blocks > 0) { - word32 j; - byte carry = 0; - -#ifdef HAVE_AES_ECB - if (in == out) -#endif - { /* check for if inline */ - byte buf[AES_BLOCK_SIZE]; - - XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); - ret = wc_AesEncryptDirect(aes, out, buf); - if (ret != 0) - return ret; - } - xorbuf(out, tweak_block, AES_BLOCK_SIZE); - - /* multiply by shift left and propagate carry */ - for (j = 0; j < AES_BLOCK_SIZE; j++) { - byte tmpC; - - tmpC = (tweak_block[j] >> 7) & 0x01; - tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); - carry = tmpC; - } - if (carry) { - tweak_block[0] ^= GF_XTS; - } - - in += AES_BLOCK_SIZE; - out += AES_BLOCK_SIZE; - sz -= AES_BLOCK_SIZE; - blocks--; - } - - /* stealing operation of XTS to handle left overs */ - if (sz > 0) { - byte buf[AES_BLOCK_SIZE]; - - XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ - return BUFFER_E; - } - if (in != out) { - XMEMCPY(out, buf, sz); - XMEMCPY(buf, in, sz); - } - else { - byte buf2[AES_BLOCK_SIZE]; - - XMEMCPY(buf2, buf, sz); - XMEMCPY(buf, in, sz); - XMEMCPY(out, buf2, sz); - } - - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); - ret = wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); - if (ret == 0) - xorbuf(out - AES_BLOCK_SIZE, tweak_block, AES_BLOCK_SIZE); - } - - return ret; + return AesXtsEncryptUpdate_sw(xaes, out, in, sz, tweak_block); } #ifdef WOLFSSL_AESXTS_STREAM @@ -12726,10 +12657,12 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, * * returns 0 on success */ -static int AesXtsInit_sw(XtsAes* xaes, byte* i) { +static int AesXtsInitTweak_sw(XtsAes* xaes, byte* i) { return wc_AesEncryptDirect(&xaes->tweak, i, i); } +#endif /* WOLFSSL_AESXTS_STREAM */ + /* Block-streaming AES-XTS. * * Supply block-aligned input data with successive calls. Final call need not @@ -12825,8 +12758,6 @@ static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, return ret; } -#endif /* WOLFSSL_AESXTS_STREAM */ - /* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. * * xaes AES keys to use for block encrypt/decrypt @@ -12956,7 +12887,7 @@ int wc_AesXtsEncryptInit(XtsAes* xaes, byte* i, word32 iSz) else #endif /* WOLFSSL_AESNI */ { - ret = AesXtsInit_sw(xaes, i); + ret = AesXtsInitTweak_sw(xaes, i); } } @@ -13032,6 +12963,7 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, #endif /* WOLFSSL_AESXTS_STREAM */ + /* Same process as encryption but use aes_decrypt key. * * xaes AES keys to use for block encrypt/decrypt @@ -13043,125 +12975,23 @@ int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, * returns 0 on success */ /* Software AES - XTS Decrypt */ + +static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, byte *i); + static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i) { - int ret = 0; - word32 blocks = (sz / AES_BLOCK_SIZE); -#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS - Aes *aes = &xaes->aes_decrypt; -#else - Aes *aes = &xaes->aes; -#endif - word32 j; - byte carry = 0; + int ret; byte tweak_block[AES_BLOCK_SIZE]; - byte stl = (sz % AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); if (ret != 0) return ret; - /* if Stealing then break out of loop one block early to handle special - * case */ - if (stl > 0) { - blocks--; - } - -#ifdef HAVE_AES_ECB - /* decrypt all of buffer at once when possible */ - if (in != out) { /* can not handle inline */ - XMEMCPY(out, tweak_block, AES_BLOCK_SIZE); - if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) - return ret; - } -#endif - - while (blocks > 0) { -#ifdef HAVE_AES_ECB - if (in == out) -#endif - { /* check for if inline */ - byte buf[AES_BLOCK_SIZE]; - - XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); - ret = wc_AesDecryptDirect(aes, out, buf); - if (ret != 0) - return ret; - } - xorbuf(out, tweak_block, AES_BLOCK_SIZE); - - /* multiply by shift left and propagate carry */ - for (j = 0; j < AES_BLOCK_SIZE; j++) { - byte tmpC; - - tmpC = (tweak_block[j] >> 7) & 0x01; - tweak_block[j] = (byte)((tweak_block[j] << 1) + carry); - carry = tmpC; - } - if (carry) { - tweak_block[0] ^= GF_XTS; - } - carry = 0; - - in += AES_BLOCK_SIZE; - out += AES_BLOCK_SIZE; - sz -= AES_BLOCK_SIZE; - blocks--; - } - - /* stealing operation of XTS to handle left overs */ - if (sz >= AES_BLOCK_SIZE) { - byte buf[AES_BLOCK_SIZE]; - byte tmp2[AES_BLOCK_SIZE]; - - /* multiply by shift left and propagate carry */ - for (j = 0; j < AES_BLOCK_SIZE; j++) { - byte tmpC; - - tmpC = (tweak_block[j] >> 7) & 0x01; - tmp2[j] = (byte)((tweak_block[j] << 1) + carry); - carry = tmpC; - } - if (carry) { - tmp2[0] ^= GF_XTS; - } - - XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tmp2, AES_BLOCK_SIZE); - ret = wc_AesDecryptDirect(aes, out, buf); - if (ret != 0) - return ret; - xorbuf(out, tmp2, AES_BLOCK_SIZE); - - /* tmp2 holds partial | last */ - XMEMCPY(tmp2, out, AES_BLOCK_SIZE); - in += AES_BLOCK_SIZE; - out += AES_BLOCK_SIZE; - sz -= AES_BLOCK_SIZE; - - /* Make buffer with end of cipher text | last */ - XMEMCPY(buf, tmp2, AES_BLOCK_SIZE); - if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ - return BUFFER_E; - } - XMEMCPY(buf, in, sz); - XMEMCPY(out, tmp2, sz); - - xorbuf(buf, tweak_block, AES_BLOCK_SIZE); - ret = wc_AesDecryptDirect(aes, tmp2, buf); - if (ret != 0) - return ret; - xorbuf(tmp2, tweak_block, AES_BLOCK_SIZE); - XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); - } - - return ret; + return AesXtsDecryptUpdate_sw(xaes, out, in, sz, tweak_block); } -#ifdef WOLFSSL_AESXTS_STREAM - /* Block-streaming AES-XTS. * * Same process as encryption but use decrypt key. @@ -13290,8 +13120,6 @@ static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, return ret; } -#endif /* WOLFSSL_AESXTS_STREAM */ - /* Same process as encryption but Aes key is AES_DECRYPTION type. * * xaes AES keys to use for block encrypt/decrypt @@ -13433,7 +13261,7 @@ int wc_AesXtsDecryptInit(XtsAes* xaes, byte* i, word32 iSz) else #endif /* WOLFSSL_AESNI */ { - ret = AesXtsInit_sw(xaes, i); + ret = AesXtsInitTweak_sw(xaes, i); } } From 2fe366cc743d27c4496578bcd5541b39713fc0ec Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Tue, 14 May 2024 18:23:22 -0500 Subject: [PATCH 08/10] wolfcrypt/test/test.c: add test coverage for WOLFSSL_AESXTS_STREAM. linuxkm/lkcapi_glue.c: typographic cleanups, and failsafe error return constructs when skcipher_walk_virt() returns zero walk.nbytes. wolfcrypt/src/aes.c: additional comments and inline documentation. .github/workflows/openvpn.yml: disable test on master branch. --- .github/workflows/openvpn.yml | 3 +- linuxkm/lkcapi_glue.c | 52 +++--- wolfcrypt/src/aes.c | 22 ++- wolfcrypt/test/test.c | 316 +++++++++++++++++++++++++++++++++- 4 files changed, 363 insertions(+), 30 deletions(-) diff --git a/.github/workflows/openvpn.yml b/.github/workflows/openvpn.yml index 97243cb9e..10a20b065 100644 --- a/.github/workflows/openvpn.yml +++ b/.github/workflows/openvpn.yml @@ -39,7 +39,8 @@ jobs: fail-fast: false matrix: # List of refs to test - ref: [ release/2.6, v2.6.0, master ] + # disabled master on 20240514 -- see https://github.com/wolfSSL/wolfssl/issues/7508 + ref: [ release/2.6, v2.6.0 ] name: ${{ matrix.ref }} runs-on: ubuntu-latest # This should be a safe limit for the tests to run. diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index 467aa4856..746edc929 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -925,7 +925,7 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); if (!walk.nbytes) - return err; + return err ? : -EINVAL; } else { tail = 0; } @@ -939,6 +939,9 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) } while ((nbytes = walk.nbytes) != 0) { + /* if this isn't the final call, pass block-aligned data to prevent + * end-of-message ciphertext stealing. + */ if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); @@ -961,7 +964,7 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) } } - if (unlikely(tail > 0 && !err)) { + if (unlikely(tail > 0)) { struct scatterlist sg_src[2], sg_dst[2]; struct scatterlist *src, *dst; @@ -1048,7 +1051,7 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); if (!walk.nbytes) - return err; + return err ? : -EINVAL; } else { tail = 0; } @@ -1062,6 +1065,9 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) } while ((nbytes = walk.nbytes) != 0) { + /* if this isn't the final call, pass block-aligned data to prevent + * end-of-message ciphertext stealing. + */ if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); @@ -1084,32 +1090,32 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) } } - if (unlikely(tail > 0 && !err)) { - struct scatterlist sg_src[2], sg_dst[2]; - struct scatterlist *src, *dst; + if (unlikely(tail > 0)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; - dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); - if (req->dst != req->src) - dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); - skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, - req->iv); + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); - err = skcipher_walk_virt(&walk, &subreq, false); - if (err) - return err; + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; - err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, - walk.src.virt.addr, walk.nbytes, - walk.iv); + err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, + walk.iv); - if (unlikely(err)) { - pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", - crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); - return -EINVAL; - } + if (unlikely(err)) { + pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } - err = skcipher_walk_done(&walk, 0); + err = skcipher_walk_done(&walk, 0); } } diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 931a69f3f..e19ec0eed 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12840,6 +12840,15 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef WOLFSSL_AESXTS_STREAM +/* Block-streaming AES-XTS. + * + * xaes AES keys to use for block encrypt/decrypt + * i readwrite value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ int wc_AesXtsEncryptInit(XtsAes* xaes, byte* i, word32 iSz) { int ret; @@ -12894,12 +12903,15 @@ int wc_AesXtsEncryptInit(XtsAes* xaes, byte* i, word32 iSz) return ret; } -/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. +/* Block-streaming AES-XTS + * + * Note that sz must be greater than AES_BLOCK_SIZE in each call, and must be a + * multiple of AES_BLOCK_SIZE in all but the final call. * * xaes AES keys to use for block encrypt/decrypt * out output buffer to hold cipher text * in input plain text buffer to encrypt - * sz size of both out and in buffers + * sz size of both out and in buffers -- must be >= AES_BLOCK_SIZE. * i value to use for tweak * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input * adds a sanity check on how the user calls the function. @@ -13211,7 +13223,6 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, * i readwrite value to use for tweak * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input * adds a sanity check on how the user calls the function. - * tweak_block buffer of size AES_BLOCK_SIZE to use for tweak state * * returns 0 on success */ @@ -13269,7 +13280,10 @@ int wc_AesXtsDecryptInit(XtsAes* xaes, byte* i, word32 iSz) return ret; } -/* Same process as encryption but Aes key is AES_DECRYPTION type. +/* Block-streaming AES-XTS + * + * Note that sz must be greater than AES_BLOCK_SIZE in each call, and must be a + * multiple of AES_BLOCK_SIZE in all but the final call. * * xaes AES keys to use for block encrypt/decrypt * out output buffer to hold plain text diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index e1bde7b1b..e07329a6d 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -9863,6 +9863,9 @@ static wc_test_ret_t aes_xts_128_test(void) wc_test_ret_t ret = 0; unsigned char buf[AES_BLOCK_SIZE * 2 + 8]; unsigned char cipher[AES_BLOCK_SIZE * 2 + 8]; +#ifdef WOLFSSL_AESXTS_STREAM + unsigned char i_copy[AES_BLOCK_SIZE]; +#endif /* 128 key tests */ WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { @@ -9987,6 +9990,34 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(c2, buf, sizeof(c2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i2, sizeof(i2)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p2 + AES_BLOCK_SIZE, sizeof(p2) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10015,6 +10046,27 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p1, sizeof(p1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c1, buf, sizeof(c1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10040,6 +10092,27 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(cp2, cipher, sizeof(cp2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, pp, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(cp2, buf, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10073,6 +10146,27 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(pp, buf, sizeof(pp))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, cipher, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10104,9 +10198,30 @@ static wc_test_ret_t aes_xts_128_test(void) #endif if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - if (XMEMCMP(p1, buf, AES_BLOCK_SIZE)) + if (XMEMCMP(p1, buf, sizeof(p1))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c1, sizeof(c1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10176,6 +10291,34 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(c3, buf, sizeof(c3))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p3 + AES_BLOCK_SIZE, sizeof(p3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c3, buf, sizeof(c3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + ret = wc_AesXtsSetKeyNoInit(aes, k3, sizeof(k3), AES_DECRYPTION); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -10192,6 +10335,34 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(p3, buf, sizeof(p3))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf + AES_BLOCK_SIZE, c3 + AES_BLOCK_SIZE, sizeof(c3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p3, buf, sizeof(p3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #endif /* !HAVE_FIPS */ #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ @@ -10206,6 +10377,9 @@ static wc_test_ret_t aes_xts_128_test(void) #endif int i; int j; +#ifdef WOLFSSL_AESXTS_STREAM + int k; +#endif #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) if (large_input == NULL) ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); @@ -10247,6 +10421,71 @@ static wc_test_ret_t aes_xts_128_test(void) } } } + +#ifdef WOLFSSL_AESXTS_STREAM + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); #endif @@ -10282,6 +10521,9 @@ static wc_test_ret_t aes_xts_256_test(void) wc_test_ret_t ret = 0; unsigned char buf[AES_BLOCK_SIZE * 3]; unsigned char cipher[AES_BLOCK_SIZE * 3]; +#ifdef WOLFSSL_AESXTS_STREAM + unsigned char i_copy[AES_BLOCK_SIZE]; +#endif /* 256 key tests */ WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { @@ -10389,6 +10631,34 @@ static wc_test_ret_t aes_xts_256_test(void) if (XMEMCMP(c2, buf, sizeof(c2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i2, sizeof(i2)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p2 + AES_BLOCK_SIZE, sizeof(p2) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + XMEMSET(buf, 0, sizeof(buf)); ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -10402,6 +10672,27 @@ static wc_test_ret_t aes_xts_256_test(void) if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p1, sizeof(p1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c1, buf, sizeof(c1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + /* partial block encryption test */ XMEMSET(cipher, 0, sizeof(cipher)); ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); @@ -10441,9 +10732,30 @@ static wc_test_ret_t aes_xts_256_test(void) #endif if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - if (XMEMCMP(p1, buf, AES_BLOCK_SIZE)) + if (XMEMCMP(p1, buf, sizeof(p1))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c1, sizeof(c1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + XMEMSET(buf, 0, sizeof(buf)); ret = wc_AesXtsSetKeyNoInit(aes, k2, sizeof(k2), AES_DECRYPTION); if (ret != 0) From 4331bc092be1d2a4162dbd818c554d511a658822 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Tue, 14 May 2024 19:24:27 -0500 Subject: [PATCH 09/10] configure.ac: on armasm, disable ENABLED_AESXTS_STREAM by default (not implemented). --- configure.ac | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index bd9acdec8..09bfa9094 100644 --- a/configure.ac +++ b/configure.ac @@ -939,7 +939,7 @@ then if test "$ENABLED_FIPS" = "no" || test "$HAVE_FIPS_VERSION" -ge 6 || test "$FIPS_VERSION" = "v5-dev"; then test "$enable_aesxts" = "" && enable_aesxts=yes - test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && enable_aesxts_stream=yes + test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && (test "$enable_armasm" = "" || test "$enable_armasm" = "no") && enable_aesxts_stream=yes test "$enable_aessiv" = "" && enable_aessiv=yes fi @@ -1079,7 +1079,7 @@ then if test "$ENABLED_FIPS" = "no" || test "$HAVE_FIPS_VERSION" -ge 6 || test "$FIPS_VERSION" = "v5-dev"; then test "$enable_aesxts" = "" && enable_aesxts=yes - test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && enable_aesxts_stream=yes + test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && (test "$enable_armasm" = "" || test "$enable_armasm" = "no") && enable_aesxts_stream=yes test "$enable_aessiv" = "" && enable_aessiv=yes fi @@ -4849,10 +4849,16 @@ AC_ARG_ENABLE([aesxts], [ ENABLED_AESXTS=$enableval ], [ ENABLED_AESXTS=no ] ) + +AS_IF([test "$ENABLED_AESXTS" = "yes" && test "$ENABLED_ARMASM" = "no"], + [ ENABLED_AESXTS_STREAM_DEFAULT=yes ], + [ ENABLED_AESXTS_STREAM_DEFAULT=no ] + ) + AC_ARG_ENABLE([aesxts-stream], [AS_HELP_STRING([--enable-aesxts-stream],[Enable wolfSSL AES-XTS support with streaming APIs (default: disabled)])], [ ENABLED_AESXTS_STREAM=$enableval ], - [ ENABLED_AESXTS_STREAM=$ENABLED_AESXTS ] + [ ENABLED_AESXTS_STREAM=$ENABLED_AESXTS_STREAM_DEFAULT ] ) # legacy old option name, for compatibility: @@ -5077,10 +5083,8 @@ AS_CASE([$FIPS_VERSION], AS_IF([test "x$ENABLED_AESXTS" = "xyes" && test "x$ENABLED_AESNI" = "xyes"], [AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AES_XTS"]) - AS_IF([test "x$ENABLED_AESXTS_STREAM" = "xno"], - [ENABLED_AESXTS_STREAM="yes"; AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESXTS_STREAM"]) - AS_IF([test "x$ENABLED_AESXTS_STREAM" = "xyes" && test "x$ENABLED_AESNI" = "xyes"], - [AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AESXTS_STREAM"]) + AS_IF([test "x$ENABLED_AESXTS_STREAM" = "xno" && ! (test "$ENABLED_ARMASM" = "yes" || test "$ENABLED_ARMASM_CRYPTO" = "no")], + [ENABLED_AESXTS_STREAM="yes"]) AS_IF([(test "$ENABLED_AESCCM" = "yes" && test "$HAVE_AESCCM_PORT" != "yes") || (test "$ENABLED_AESCTR" = "yes" && test "$HAVE_AESCTR_PORT" != "yes") || From 1469aab109b68b8787b846fff5a5902e31eea91a Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Wed, 15 May 2024 00:45:51 -0500 Subject: [PATCH 10/10] linuxkm/lkcapi_glue.c: add native test coverage for WOLFSSL_AESXTS_STREAM. wolfcrypt/test/test.c: * add WOLFSSL_AESXTS_STREAM testing to the LARGE_XTS_SZ exercise in aes_xts_128_test(). * add the LARGE_XTS_SZ exercise to aes_xts_256_test(). * add aes_xts_192_test(). * fix -Werror=frame-larger-than=2048 in ed25519_test(). --- linuxkm/lkcapi_glue.c | 168 +++++++- wolfcrypt/test/test.c | 953 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1102 insertions(+), 19 deletions(-) diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index 746edc929..047f3258b 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -799,7 +799,7 @@ static int gcmAesAead_loaded = 0; (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESXTS)) -#ifndef WOLFSSL_AESGCM_STREAM +#ifndef WOLFSSL_AESXTS_STREAM #error LKCAPI registration of AES-XTS requires WOLFSSL_AESXTS_STREAM (--enable-aesxts-stream). #endif @@ -2022,6 +2022,25 @@ static int aes_xts_128_test(void) XMEMSET(buf, 0, AES_XTS_128_TEST_BUF_SIZ); + XMEMCPY(iv, i2, sizeof(i2)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, + p2 + AES_BLOCK_SIZE, + sizeof(p2) - AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if (XMEMCMP(c2, buf, sizeof(c2))) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + + XMEMSET(buf, 0, AES_XTS_128_TEST_BUF_SIZ); + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) goto out; @@ -2173,6 +2192,7 @@ static int aes_xts_128_test(void) #define LARGE_XTS_SZ 1024 int i; int j; + int k; large_input = (byte *)XMALLOC(LARGE_XTS_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -2184,6 +2204,38 @@ static int aes_xts_128_test(void) for (i = 0; i < (int)LARGE_XTS_SZ; i++) large_input[i] = (byte)i; + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + goto out; + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + goto out; + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + if (ret != 0) + goto out; + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ for (j = 16; j < (int)LARGE_XTS_SZ; j++) { ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -2192,14 +2244,20 @@ static int aes_xts_128_test(void) sizeof(i1)); if (ret != 0) goto out; - ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); if (ret != 0) goto out; - ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, - sizeof(i1)); + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, iv, sizeof(iv)); if (ret != 0) goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } for (i = 0; i < j; i++) { if (large_input[i] != (byte)i) { ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; @@ -2425,6 +2483,7 @@ static int aes_xts_256_test(void) struct crypto_skcipher *tfm = NULL; struct skcipher_request *req = NULL; u8 iv[AES_BLOCK_SIZE]; + byte* large_input = NULL; /* 256 key tests */ static const unsigned char k1[] = { @@ -2543,6 +2602,25 @@ static int aes_xts_256_test(void) goto out; } + XMEMSET(buf, 0, AES_XTS_256_TEST_BUF_SIZ); + + XMEMCPY(iv, i2, sizeof(i2)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, + p2 + AES_BLOCK_SIZE, + sizeof(p2) - AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if (XMEMCMP(c2, buf, sizeof(c2))) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + XMEMSET(buf, 0, AES_XTS_256_TEST_BUF_SIZ); ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -2596,6 +2674,85 @@ static int aes_xts_256_test(void) goto out; } + { + #define LARGE_XTS_SZ 1024 + int i; + int j; + int k; + + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (large_input == NULL) { + ret = MEMORY_E; + goto out; + } + + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + goto out; + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + goto out; + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + if (ret != 0) + goto out; + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + goto out; + ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + if (ret != 0) + goto out; + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + goto out; + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + } + } + } + /* now the kernel crypto part */ enc2 = XMALLOC(sizeof(p1), NULL, DYNAMIC_TYPE_AES); @@ -2775,6 +2932,9 @@ static int aes_xts_256_test(void) out: + if (large_input) + XFREE(large_input, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (aes_inited) wc_AesXtsFree(aes); diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index e07329a6d..d67acc32e 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -9866,6 +9866,15 @@ static wc_test_ret_t aes_xts_128_test(void) #ifdef WOLFSSL_AESXTS_STREAM unsigned char i_copy[AES_BLOCK_SIZE]; #endif +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + #define LARGE_XTS_SZ 1024 + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + byte* large_input = NULL; + #else + byte large_input[LARGE_XTS_SZ]; + #endif +#endif /* 128 key tests */ WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { @@ -10368,19 +10377,14 @@ static wc_test_ret_t aes_xts_128_test(void) #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ !defined(WOLFSSL_AFALG) { - #define LARGE_XTS_SZ 1024 - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) - byte* large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - #else - byte large_input[LARGE_XTS_SZ]; - #endif int i; int j; #ifdef WOLFSSL_AESXTS_STREAM int k; #endif #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); if (large_input == NULL) ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); #endif @@ -10426,6 +10430,7 @@ static wc_test_ret_t aes_xts_128_test(void) for (i = 0; i < (int)LARGE_XTS_SZ; i++) large_input[i] = (byte)i; + /* first, encrypt block by block then decrypt with a one-shot call. */ for (j = 16; j < (int)LARGE_XTS_SZ; j++) { ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -10450,6 +10455,43 @@ static wc_test_ret_t aes_xts_128_test(void) break; } + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -10485,10 +10527,6 @@ static wc_test_ret_t aes_xts_128_test(void) } } #endif /* WOLFSSL_AESXTS_STREAM */ - - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) - XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - #endif } #endif /* !BENCH_EMBEDDED && !HAVE_CAVIUM && * !WOLFSSL_AFALG @@ -10496,6 +10534,13 @@ static wc_test_ret_t aes_xts_128_test(void) out: + #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_NO_MALLOC) + if (large_input) + XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif + if (aes_inited) wc_AesXtsFree(aes); @@ -10508,6 +10553,704 @@ static wc_test_ret_t aes_xts_128_test(void) } #endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 +static wc_test_ret_t aes_xts_192_test(void) +{ +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + XtsAes *aes = NULL; +#else + XtsAes aes[1]; +#endif + int aes_inited = 0; + wc_test_ret_t ret = 0; + unsigned char buf[AES_BLOCK_SIZE * 2 + 8]; + unsigned char cipher[AES_BLOCK_SIZE * 2 + 8]; +#ifdef WOLFSSL_AESXTS_STREAM + unsigned char i_copy[AES_BLOCK_SIZE]; +#endif +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + #define LARGE_XTS_SZ 1024 + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + byte* large_input = NULL; + #else + byte large_input[LARGE_XTS_SZ]; + #endif +#endif + + /* 192 bit key tests */ + WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { + 0x1e, 0xa6, 0x61, 0xc5, 0x8d, 0x94, 0x3a, 0x0e, + 0x48, 0x01, 0xe4, 0x2f, 0x4b, 0x09, 0x47, 0x14, + 0x9e, 0x7f, 0x9f, 0x8e, 0x3e, 0x68, 0xd0, 0xc7, + 0x50, 0x52, 0x10, 0xbd, 0x31, 0x1a, 0x0e, 0x7c, + 0xd6, 0xe1, 0x3f, 0xfd, 0xf2, 0x41, 0x8d, 0x8d, + 0x19, 0x11, 0xc0, 0x04, 0xcd, 0xa5, 0x8d, 0xa3 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char i1[] = { + 0x4f, 0xae, 0xf7, 0x11, 0x7c, 0xda, 0x59, 0xc6, + 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char p1[] = { + 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d, + 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char pp[] = { + 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d, + 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c, + 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char c1[] = { + 0x65, 0x37, 0x15, 0x53, 0xf1, 0x98, 0xab, 0xb4, + 0xdb, 0x4e, 0xd3, 0x69, 0xdf, 0x8e, 0x3a, 0xe0 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char k2[] = { + 0xad, 0x50, 0x4b, 0x85, 0xd7, 0x51, 0xbf, 0xba, + 0x69, 0x13, 0xb4, 0xcc, 0x79, 0xb6, 0x5a, 0x62, + 0xf7, 0xf3, 0x9d, 0x36, 0x0f, 0x35, 0xb5, 0xec, + 0x4a, 0x7e, 0x95, 0xbd, 0x9b, 0xa5, 0xf2, 0xec, + 0xc1, 0xd7, 0x7e, 0xa3, 0xc3, 0x74, 0xbd, 0x4b, + 0x13, 0x1b, 0x07, 0x83, 0x87, 0xdd, 0x55, 0x5a + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char i2[] = { + 0x5c, 0xf7, 0x9d, 0xb6, 0xc5, 0xcd, 0x99, 0x1a, + 0x1c, 0x78, 0x81, 0x42, 0x24, 0x95, 0x1e, 0x84 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char p2[] = { + 0xbd, 0xc5, 0x46, 0x8f, 0xbc, 0x8d, 0x50, 0xa1, + 0x0d, 0x1c, 0x85, 0x7f, 0x79, 0x1c, 0x5c, 0xba, + 0xb3, 0x81, 0x0d, 0x0d, 0x73, 0xcf, 0x8f, 0x20, + 0x46, 0xb1, 0xd1, 0x9e, 0x7d, 0x5d, 0x8a, 0x56 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char c2[] = { + 0x6c, 0xa6, 0xb5, 0x73, 0x48, 0xf1, 0x89, 0xfa, + 0xdd, 0x80, 0x72, 0x1f, 0xb8, 0x56, 0x0c, 0xa2, + 0x35, 0xd4, 0x08, 0xbf, 0x24, 0xcb, 0xec, 0xdb, + 0x81, 0xe0, 0xe6, 0x4f, 0x3d, 0x1c, 0x5c, 0x46 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char cp2[] = { + 0xe9, 0x58, 0xfe, 0xab, 0x66, 0xb4, 0xf1, 0x79, + 0x91, 0x3f, 0x91, 0xdc, 0x6f, 0xdf, 0xd6, 0xac, + 0x65, 0x37, 0x15, 0x53, 0xf1, 0x98, 0xab, 0xb4 + }; + +#ifndef HAVE_FIPS /* FIPS requires different keys for main and tweak. */ + WOLFSSL_SMALL_STACK_STATIC unsigned char k3[] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 + }; + WOLFSSL_SMALL_STACK_STATIC unsigned char i3[] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + }; + WOLFSSL_SMALL_STACK_STATIC unsigned char p3[] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xff, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 + }; + WOLFSSL_SMALL_STACK_STATIC unsigned char c3[] = { + 0xa4, 0xf2, 0x71, 0x5d, 0x80, 0x60, 0x68, 0xa0, + 0x80, 0x61, 0xd7, 0xc1, 0x55, 0xc8, 0x3a, 0x2e, + 0xd7, 0xf4, 0x62, 0xaf, 0xbd, 0x2d, 0xf9, 0x5f, + 0xe8, 0xc5, 0x99, 0x3d, 0x58, 0x3c, 0xeb, 0xba, + 0x86, 0xea, 0x2c, 0x7e, 0x1f, 0xba, 0x81, 0xde + }; +#endif /* HAVE_FIPS */ + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + if ((aes = (XtsAes *)XMALLOC(sizeof *aes, HEAP_HINT, DYNAMIC_TYPE_AES)) == NULL) + ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); +#endif + + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsInit(aes, HEAP_HINT, devId); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + else + aes_inited = 1; + + ret = wc_AesXtsSetKeyNoInit(aes, k2, sizeof(k2), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, buf, p2, sizeof(p2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i2, sizeof(i2)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p2 + AES_BLOCK_SIZE, sizeof(p2) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + ret = wc_AesXtsEncrypt(aes, buf, p2, sizeof(p2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + XMEMSET(buf, 0, sizeof(buf)); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsEncrypt(aes, buf, p1, sizeof(p1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p1, sizeof(p1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c1, buf, sizeof(c1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + ret = wc_AesXtsEncrypt(aes, buf, p1, sizeof(p1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* partial block encryption test */ + XMEMSET(cipher, 0, sizeof(cipher)); + ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(cp2, cipher, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, pp, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(cp2, buf, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + XMEMSET(cipher, 0, sizeof(cipher)); + ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(cp2, cipher, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* partial block decrypt test */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, buf, cipher, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, cipher, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, cipher, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* NIST decrypt test vector */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, c1, sizeof(c1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c1, sizeof(c1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, c1, sizeof(c1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p1, buf, AES_BLOCK_SIZE)) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* fail case with decrypting using wrong key */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, c2, sizeof(c2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p2, buf, sizeof(p2)) == 0) /* fail case with wrong key */ + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + + /* set correct key and retest */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsSetKeyNoInit(aes, k2, sizeof(k2), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, buf, c2, sizeof(c2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p2, buf, sizeof(p2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifndef HAVE_FIPS + + /* Test ciphertext stealing in-place. */ + XMEMCPY(buf, p3, sizeof(p3)); + ret = wc_AesXtsSetKeyNoInit(aes, k3, sizeof(k3), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, buf, buf, sizeof(p3), i3, sizeof(i3)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c3, buf, sizeof(c3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p3 + AES_BLOCK_SIZE, sizeof(p3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c3, buf, sizeof(c3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + + ret = wc_AesXtsSetKeyNoInit(aes, k3, sizeof(k3), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, buf, buf, sizeof(c3), i3, sizeof(i3)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p3, buf, sizeof(p3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf + AES_BLOCK_SIZE, c3 + AES_BLOCK_SIZE, sizeof(c3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p3, buf, sizeof(p3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#endif /* !HAVE_FIPS */ + +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + { + int i; + int j; +#ifdef WOLFSSL_AESXTS_STREAM + int k; +#endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if (large_input == NULL) + ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); + #endif + + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + +#ifdef WOLFSSL_AESXTS_STREAM + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } +#endif /* WOLFSSL_AESXTS_STREAM */ + } +#endif /* !BENCH_EMBEDDED && !HAVE_CAVIUM && + * !WOLFSSL_AFALG + */ + + out: + + #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_NO_MALLOC) + if (large_input) + XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + if (aes_inited) + wc_AesXtsFree(aes); + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + if (aes) + XFREE(aes, HEAP_HINT, DYNAMIC_TYPE_AES); +#endif + + return ret; +} +#endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 static wc_test_ret_t aes_xts_256_test(void) @@ -10524,6 +11267,15 @@ static wc_test_ret_t aes_xts_256_test(void) #ifdef WOLFSSL_AESXTS_STREAM unsigned char i_copy[AES_BLOCK_SIZE]; #endif +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + #define LARGE_XTS_SZ 1024 + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + byte* large_input = NULL; + #else + byte large_input[LARGE_XTS_SZ]; + #endif +#endif /* 256 key tests */ WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { @@ -10773,8 +11525,172 @@ static wc_test_ret_t aes_xts_256_test(void) if (XMEMCMP(p2, buf, sizeof(p2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + { + int i; + int j; +#ifdef WOLFSSL_AESXTS_STREAM + int k; +#endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if (large_input == NULL) + ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); + #endif + + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + +#ifdef WOLFSSL_AESXTS_STREAM + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } +#endif /* WOLFSSL_AESXTS_STREAM */ + } +#endif /* !BENCH_EMBEDDED && !HAVE_CAVIUM && + * !WOLFSSL_AFALG + */ + out: + #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_NO_MALLOC) + if (large_input) + XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif + if (aes_inited) wc_AesXtsFree(aes); @@ -12660,6 +13576,13 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t aes_xts_test(void) if (ret != 0) return ret; #endif + + #ifdef WOLFSSL_AES_192 + ret = aes_xts_192_test(); + if (ret != 0) + return ret; + #endif + #ifdef WOLFSSL_AES_256 ret = aes_xts_256_test(); if (ret != 0) @@ -34349,7 +35272,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) { /* Run tests for some rare code paths */ /* sig is exactly equal to the order */ - const byte rareEd1[] = { + static const byte rareEd1[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -34360,7 +35283,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; /* sig is larger than the order before we get to the low part */ - const byte rareEd2[] = { + static const byte rareEd2[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -34371,7 +35294,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10 }; /* sig is larger than the order in the low part */ - const byte rareEd3[] = { + static const byte rareEd3[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -34382,7 +35305,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; /* sig is smaller than the order */ - const byte rareEd4[] = { + static const byte rareEd4[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,