diff --git a/.github/workflows/openvpn.yml b/.github/workflows/openvpn.yml index 97243cb9e..10a20b065 100644 --- a/.github/workflows/openvpn.yml +++ b/.github/workflows/openvpn.yml @@ -39,7 +39,8 @@ jobs: fail-fast: false matrix: # List of refs to test - ref: [ release/2.6, v2.6.0, master ] + # disabled master on 20240514 -- see https://github.com/wolfSSL/wolfssl/issues/7508 + ref: [ release/2.6, v2.6.0 ] name: ${{ matrix.ref }} runs-on: ubuntu-latest # This should be a safe limit for the tests to run. diff --git a/configure.ac b/configure.ac index 353110418..09bfa9094 100644 --- a/configure.ac +++ b/configure.ac @@ -939,6 +939,7 @@ then if test "$ENABLED_FIPS" = "no" || test "$HAVE_FIPS_VERSION" -ge 6 || test "$FIPS_VERSION" = "v5-dev"; then test "$enable_aesxts" = "" && enable_aesxts=yes + test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && (test "$enable_armasm" = "" || test "$enable_armasm" = "no") && enable_aesxts_stream=yes test "$enable_aessiv" = "" && enable_aessiv=yes fi @@ -1078,6 +1079,7 @@ then if test "$ENABLED_FIPS" = "no" || test "$HAVE_FIPS_VERSION" -ge 6 || test "$FIPS_VERSION" = "v5-dev"; then test "$enable_aesxts" = "" && enable_aesxts=yes + test "$enable_aesxts_stream" = "" && test "$enable_aesxts" = "yes" && (test "$enable_armasm" = "" || test "$enable_armasm" = "no") && enable_aesxts_stream=yes test "$enable_aessiv" = "" && enable_aessiv=yes fi @@ -4848,6 +4850,17 @@ AC_ARG_ENABLE([aesxts], [ ENABLED_AESXTS=no ] ) +AS_IF([test "$ENABLED_AESXTS" = "yes" && test "$ENABLED_ARMASM" = "no"], + [ ENABLED_AESXTS_STREAM_DEFAULT=yes ], + [ ENABLED_AESXTS_STREAM_DEFAULT=no ] + ) + +AC_ARG_ENABLE([aesxts-stream], + [AS_HELP_STRING([--enable-aesxts-stream],[Enable wolfSSL AES-XTS support with streaming APIs (default: disabled)])], + [ ENABLED_AESXTS_STREAM=$enableval ], + [ ENABLED_AESXTS_STREAM=$ENABLED_AESXTS_STREAM_DEFAULT ] + ) + # legacy old option name, for compatibility: AC_ARG_ENABLE([xts], [AS_HELP_STRING([--enable-xts],[Please use --enable-aesxts])], @@ -5070,6 +5083,9 @@ AS_CASE([$FIPS_VERSION], AS_IF([test "x$ENABLED_AESXTS" = "xyes" && test "x$ENABLED_AESNI" = "xyes"], [AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AES_XTS"]) + AS_IF([test "x$ENABLED_AESXTS_STREAM" = "xno" && ! (test "$ENABLED_ARMASM" = "yes" || test "$ENABLED_ARMASM_CRYPTO" = "no")], + [ENABLED_AESXTS_STREAM="yes"]) + AS_IF([(test "$ENABLED_AESCCM" = "yes" && test "$HAVE_AESCCM_PORT" != "yes") || (test "$ENABLED_AESCTR" = "yes" && test "$HAVE_AESCTR_PORT" != "yes") || (test "$ENABLED_AESGCM" = "yes" && test "$HAVE_AESGCM_PORT" != "yes") || @@ -8351,15 +8367,20 @@ then for lkcapi_alg in $(echo "$ENABLED_LINUXKM_LKCAPI_REGISTER" | tr ',' ' ') do case "$lkcapi_alg" in - all) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_ALL" ;; + all) test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) + AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_ALL" ;; 'cbc(aes)') test "$ENABLED_AESCBC" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-CBC implementation not enabled.]) + test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESCBC" ;; 'cfb(aes)') test "$ENABLED_AESCFB" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-CFB implementation not enabled.]) + test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESCFB" ;; 'gcm(aes)') test "$ENABLED_AESGCM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-GCM implementation not enabled.]) test "$ENABLED_AESGCM_STREAM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: --enable-aesgcm-stream is required for LKCAPI.]) + test "$ENABLED_EXPERIMENTAL" = "yes" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: requires --enable-experimental.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESGCM" ;; 'xts(aes)') test "$ENABLED_AESXTS" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: AES-XTS implementation not enabled.]) + test "$ENABLED_AESXTS_STREAM" != "no" || AC_MSG_ERROR([linuxkm-lkcapi-register ${lkcapi_alg}: --enable-aesxts-stream is required for LKCAPI.]) AM_CFLAGS="$AM_CFLAGS -DLINUXKM_LKCAPI_REGISTER_AESXTS" ;; *) AC_MSG_ERROR([Unsupported LKCAPI algorithm "$lkcapi_alg".]) ;; esac @@ -9009,6 +9030,17 @@ then fi fi +if test "$ENABLED_AESXTS_STREAM" != "no" +then + if test "$ENABLED_AESXTS" = "no" + then + AC_MSG_ERROR([AES-XTS streaming enabled but AES-XTS is disabled]) + else + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESXTS_STREAM" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AESXTS_STREAM" + fi +fi + if test "$ENABLED_IOTSAFE" != "no" then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_IOTSAFE" @@ -9769,6 +9801,7 @@ echo " * AES-CTR: $ENABLED_AESCTR" echo " * AES-CFB: $ENABLED_AESCFB" echo " * AES-OFB: $ENABLED_AESOFB" echo " * AES-XTS: $ENABLED_AESXTS" +echo " * AES-XTS streaming: $ENABLED_AESXTS_STREAM" echo " * AES-SIV: $ENABLED_AESSIV" echo " * AES-EAX: $ENABLED_AESEAX" echo " * AES Bitspliced: $ENABLED_AESBS" diff --git a/linuxkm/lkcapi_glue.c b/linuxkm/lkcapi_glue.c index c38d7d866..047f3258b 100644 --- a/linuxkm/lkcapi_glue.c +++ b/linuxkm/lkcapi_glue.c @@ -75,16 +75,25 @@ #if defined(HAVE_AES_CBC) && \ (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESCBC)) +#ifndef WOLFSSL_EXPERIMENTAL_SETTINGS + #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS +#endif static int linuxkm_test_aescbc(void); #endif #if defined(WOLFSSL_AES_CFB) && \ (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESCFB)) +#ifndef WOLFSSL_EXPERIMENTAL_SETTINGS + #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS +#endif static int linuxkm_test_aescfb(void); #endif #if defined(HAVE_AESGCM) && \ (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESGCM)) +#ifndef WOLFSSL_EXPERIMENTAL_SETTINGS + #error Experimental settings without WOLFSSL_EXPERIMENTAL_SETTINGS +#endif static int linuxkm_test_aesgcm(void); #endif #if defined(WOLFSSL_AES_XTS) && \ @@ -790,6 +799,10 @@ static int gcmAesAead_loaded = 0; (defined(LINUXKM_LKCAPI_REGISTER_ALL) || \ defined(LINUXKM_LKCAPI_REGISTER_AESXTS)) +#ifndef WOLFSSL_AESXTS_STREAM + #error LKCAPI registration of AES-XTS requires WOLFSSL_AESXTS_STREAM (--enable-aesxts-stream). +#endif + struct km_AesXtsCtx { XtsAes *aesXts; /* allocated in km_AesXtsInitCommon() to assure alignment * for AESNI. @@ -835,6 +848,16 @@ static int km_AesXtsSetKey(struct crypto_skcipher *tfm, const u8 *in_key, int err; struct km_AesXtsCtx * ctx = crypto_skcipher_ctx(tfm); + /* filter bad keysizes here, to avoid console noise from + * CONFIG_CRYPTO_MANAGER_EXTRA_TESTS. + */ + if ((key_len != (AES_128_KEY_SIZE*2)) && + (key_len != (AES_192_KEY_SIZE*2)) && + (key_len != (AES_256_KEY_SIZE*2))) + { + return -EINVAL; + } + err = wc_AesXtsSetKeyNoInit(ctx->aesXts, in_key, key_len, AES_ENCRYPTION_AND_DECRYPTION); @@ -852,7 +875,6 @@ static int km_AesXtsSetKey(struct crypto_skcipher *tfm, const u8 *in_key, static int km_AesXtsEncrypt(struct skcipher_request *req) { int err = 0; - struct crypto_skcipher * tfm = NULL; struct km_AesXtsCtx * ctx = NULL; struct skcipher_walk walk; @@ -861,6 +883,9 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) tfm = crypto_skcipher_reqtfm(req); ctx = crypto_skcipher_ctx(tfm); + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + err = skcipher_walk_virt(&walk, req, false); if (unlikely(err)) { @@ -869,10 +894,9 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) return err; } - while ((nbytes = walk.nbytes) != 0) { + if (walk.nbytes == walk.total) { err = wc_AesXtsEncrypt(ctx->aesXts, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - walk.iv, walk.ivsize); + walk.src.virt.addr, walk.nbytes, walk.iv, walk.ivsize); if (unlikely(err)) { pr_err("%s: wc_AesXtsEncrypt failed: %d\n", @@ -880,12 +904,92 @@ static int km_AesXtsEncrypt(struct skcipher_request *req) return -EINVAL; } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + err = skcipher_walk_done(&walk, 0); + + } else { + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + + if (tail > 0) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + + err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err ? : -EINVAL; + } else { + tail = 0; + } + + err = wc_AesXtsEncryptInit(ctx->aesXts, walk.iv, walk.ivsize); if (unlikely(err)) { - pr_err("%s: skcipher_walk_done failed: %d\n", + pr_err("%s: wc_AesXtsEncryptInit failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); - return err; + return -EINVAL; + } + + while ((nbytes = walk.nbytes) != 0) { + /* if this isn't the final call, pass block-aligned data to prevent + * end-of-message ciphertext stealing. + */ + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + err = wc_AesXtsEncryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + walk.iv); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsEncryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (unlikely(err)) { + pr_err("%s: skcipher_walk_done failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return err; + } + } + + if (unlikely(tail > 0)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + err = wc_AesXtsEncryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, + walk.iv); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsEncryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, 0); } } @@ -903,6 +1007,9 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) tfm = crypto_skcipher_reqtfm(req); ctx = crypto_skcipher_ctx(tfm); + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + err = skcipher_walk_virt(&walk, req, false); if (unlikely(err)) { @@ -911,26 +1018,107 @@ static int km_AesXtsDecrypt(struct skcipher_request *req) return err; } - while ((nbytes = walk.nbytes) != 0) { - err = wc_AesXtsDecrypt(ctx->aesXts, walk.dst.virt.addr, - walk.src.virt.addr, nbytes, - walk.iv, walk.ivsize); + if (walk.nbytes == walk.total) { + + err = wc_AesXtsDecrypt(ctx->aesXts, + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv, walk.ivsize); if (unlikely(err)) { - pr_err("%s: wc_AesCbcDecrypt failed: %d\n", + pr_err("%s: wc_AesXtsDecrypt failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); return -EINVAL; } - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + err = skcipher_walk_done(&walk, 0); + + } else { + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + + if (unlikely(tail > 0)) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + + err = skcipher_walk_virt(&walk, req, false); + if (!walk.nbytes) + return err ? : -EINVAL; + } else { + tail = 0; + } + + err = wc_AesXtsDecryptInit(ctx->aesXts, walk.iv, walk.ivsize); if (unlikely(err)) { - pr_err("%s: skcipher_walk_done failed: %d\n", + pr_err("%s: wc_AesXtsDecryptInit failed: %d\n", crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); - return err; + return -EINVAL; } - } + while ((nbytes = walk.nbytes) != 0) { + /* if this isn't the final call, pass block-aligned data to prevent + * end-of-message ciphertext stealing. + */ + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + walk.iv); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (unlikely(err)) { + pr_err("%s: skcipher_walk_done failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return err; + } + } + + if (unlikely(tail > 0)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + err = wc_AesXtsDecryptUpdate(ctx->aesXts, walk.dst.virt.addr, + walk.src.virt.addr, walk.nbytes, + walk.iv); + + if (unlikely(err)) { + pr_err("%s: wc_AesXtsDecryptUpdate failed: %d\n", + crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)), err); + return -EINVAL; + } + + err = skcipher_walk_done(&walk, 0); + } + + } return err; } @@ -1834,6 +2022,25 @@ static int aes_xts_128_test(void) XMEMSET(buf, 0, AES_XTS_128_TEST_BUF_SIZ); + XMEMCPY(iv, i2, sizeof(i2)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, + p2 + AES_BLOCK_SIZE, + sizeof(p2) - AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if (XMEMCMP(c2, buf, sizeof(c2))) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + + XMEMSET(buf, 0, AES_XTS_128_TEST_BUF_SIZ); + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) goto out; @@ -1985,6 +2192,7 @@ static int aes_xts_128_test(void) #define LARGE_XTS_SZ 1024 int i; int j; + int k; large_input = (byte *)XMALLOC(LARGE_XTS_SZ, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -1996,6 +2204,38 @@ static int aes_xts_128_test(void) for (i = 0; i < (int)LARGE_XTS_SZ; i++) large_input[i] = (byte)i; + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + goto out; + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + goto out; + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + if (ret != 0) + goto out; + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ for (j = 16; j < (int)LARGE_XTS_SZ; j++) { ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -2004,14 +2244,20 @@ static int aes_xts_128_test(void) sizeof(i1)); if (ret != 0) goto out; - ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); if (ret != 0) goto out; - ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, - sizeof(i1)); + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, iv, sizeof(iv)); if (ret != 0) goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } for (i = 0; i < j; i++) { if (large_input[i] != (byte)i) { ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; @@ -2237,6 +2483,7 @@ static int aes_xts_256_test(void) struct crypto_skcipher *tfm = NULL; struct skcipher_request *req = NULL; u8 iv[AES_BLOCK_SIZE]; + byte* large_input = NULL; /* 256 key tests */ static const unsigned char k1[] = { @@ -2355,6 +2602,25 @@ static int aes_xts_256_test(void) goto out; } + XMEMSET(buf, 0, AES_XTS_256_TEST_BUF_SIZ); + + XMEMCPY(iv, i2, sizeof(i2)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, + p2 + AES_BLOCK_SIZE, + sizeof(p2) - AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if (XMEMCMP(c2, buf, sizeof(c2))) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + XMEMSET(buf, 0, AES_XTS_256_TEST_BUF_SIZ); ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -2408,6 +2674,85 @@ static int aes_xts_256_test(void) goto out; } + { + #define LARGE_XTS_SZ 1024 + int i; + int j; + int k; + + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (large_input == NULL) { + ret = MEMORY_E; + goto out; + } + + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + goto out; + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + goto out; + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + if (ret != 0) + goto out; + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + goto out; + ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + if (ret != 0) + goto out; + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + goto out; + XMEMCPY(iv, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, iv, sizeof(iv)); + if (ret != 0) + goto out; + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, iv); + if (ret != 0) + goto out; + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ret = LINUXKM_LKCAPI_AES_KAT_MISMATCH_E; + goto out; + } + } + } + } + /* now the kernel crypto part */ enc2 = XMALLOC(sizeof(p1), NULL, DYNAMIC_TYPE_AES); @@ -2587,6 +2932,9 @@ static int aes_xts_256_test(void) out: + if (large_input) + XFREE(large_input, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (aes_inited) wc_AesXtsFree(aes); diff --git a/linuxkm/x86_vector_register_glue.c b/linuxkm/x86_vector_register_glue.c index ea84201c3..8f0ffb4ca 100644 --- a/linuxkm/x86_vector_register_glue.c +++ b/linuxkm/x86_vector_register_glue.c @@ -189,8 +189,7 @@ static struct wc_thread_fpu_count_ent *wc_linuxkm_fpu_state_assoc(int create_p) * dependency loop on intelasm builds, we allocate here. * this is not thread-safe and doesn't need to be. */ - int ret = allocate_wolfcrypt_linuxkm_fpu_states(); - if (ret != 0) + if ((! create_p) || (allocate_wolfcrypt_linuxkm_fpu_states() != 0)) #endif { if (_warned_on_null == 0) { diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index f53428299..e19ec0eed 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -12336,12 +12336,16 @@ int wc_AesXtsSetKeyNoInit(XtsAes* aes, const byte* key, word32 len, int dir) return BAD_FUNC_ARG; } - keySz = len/2; - if (keySz != AES_128_KEY_SIZE && keySz != AES_256_KEY_SIZE) { + if ((len != (AES_128_KEY_SIZE*2)) && + (len != (AES_192_KEY_SIZE*2)) && + (len != (AES_256_KEY_SIZE*2))) + { WOLFSSL_MSG("Unsupported key size"); return WC_KEY_SIZE_E; } + keySz = len/2; + #ifdef HAVE_FIPS if (XMEMCMP(key, key + keySz, keySz) == 0) { WOLFSSL_MSG("FIPS AES-XTS main and tweak keys must differ"); @@ -12525,12 +12529,28 @@ void AES_XTS_encrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* i, const unsigned char* key, const unsigned char* key2, int nr) XASM_LINK("AES_XTS_encrypt_aesni"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_init_aesni(unsigned char* i, const unsigned char* tweak_key, + int tweak_nr) + XASM_LINK("AES_XTS_init_aesni"); +void AES_XTS_encrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *i, int nr) + XASM_LINK("AES_XTS_encrypt_update_aesni"); +#endif #ifdef HAVE_INTEL_AVX1 void AES_XTS_encrypt_avx1(const unsigned char *in, unsigned char *out, - word32 sz, const unsigned char* i, - const unsigned char* key, const unsigned char* key2, - int nr) - XASM_LINK("AES_XTS_encrypt_avx1"); + word32 sz, const unsigned char* i, + const unsigned char* key, const unsigned char* key2, + int nr) + XASM_LINK("AES_XTS_encrypt_avx1"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_init_avx1(unsigned char* i, const unsigned char* tweak_key, + int tweak_nr) + XASM_LINK("AES_XTS_init_avx1"); +void AES_XTS_encrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *i, int nr) + XASM_LINK("AES_XTS_encrypt_update_avx1"); +#endif #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_AES_DECRYPT @@ -12538,12 +12558,22 @@ void AES_XTS_decrypt_aesni(const unsigned char *in, unsigned char *out, word32 s const unsigned char* i, const unsigned char* key, const unsigned char* key2, int nr) XASM_LINK("AES_XTS_decrypt_aesni"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_decrypt_update_aesni(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *i, int nr) + XASM_LINK("AES_XTS_decrypt_update_aesni"); +#endif #ifdef HAVE_INTEL_AVX1 void AES_XTS_decrypt_avx1(const unsigned char *in, unsigned char *out, - word32 sz, const unsigned char* i, - const unsigned char* key, const unsigned char* key2, - int nr) - XASM_LINK("AES_XTS_decrypt_avx1"); + word32 sz, const unsigned char* i, + const unsigned char* key, const unsigned char* key2, + int nr) + XASM_LINK("AES_XTS_decrypt_avx1"); +#ifdef WOLFSSL_AESXTS_STREAM +void AES_XTS_decrypt_update_avx1(const unsigned char *in, unsigned char *out, word32 sz, + const unsigned char* key, unsigned char *i, int nr) + XASM_LINK("AES_XTS_decrypt_update_avx1"); +#endif #endif /* HAVE_INTEL_AVX1 */ #endif /* HAVE_AES_DECRYPT */ @@ -12590,7 +12620,6 @@ static WARN_UNUSED_RESULT int _AesXtsHelper( } #endif /* HAVE_AES_ECB */ - /* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. * * xaes AES keys to use for block encrypt/decrypt @@ -12602,27 +12631,63 @@ static WARN_UNUSED_RESULT int _AesXtsHelper( * returns 0 on success */ /* Software AES - XTS Encrypt */ + +static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, + byte *i); static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i) +{ + int ret; + byte tweak_block[AES_BLOCK_SIZE]; + + ret = wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); + if (ret != 0) + return ret; + + return AesXtsEncryptUpdate_sw(xaes, out, in, sz, tweak_block); +} + +#ifdef WOLFSSL_AESXTS_STREAM + +/* Block-streaming AES-XTS tweak setup. + * + * xaes AES keys to use for block encrypt/decrypt + * i readwrite value to use for tweak + * + * returns 0 on success + */ +static int AesXtsInitTweak_sw(XtsAes* xaes, byte* i) { + return wc_AesEncryptDirect(&xaes->tweak, i, i); +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + +/* Block-streaming AES-XTS. + * + * Supply block-aligned input data with successive calls. Final call need not + * be block aligned. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * + * returns 0 on success + */ +/* Software AES - XTS Encrypt */ +static int AesXtsEncryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, + byte *i) { int ret = 0; word32 blocks = (sz / AES_BLOCK_SIZE); Aes *aes = &xaes->aes; - Aes *tweak = &xaes->tweak; - byte tmp[AES_BLOCK_SIZE]; - - XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES - * key setup passed to encrypt direct*/ - - ret = wc_AesEncryptDirect(tweak, tmp, i); - - if (ret != 0) - return ret; #ifdef HAVE_AES_ECB /* encrypt all of buffer at once when possible */ if (in != out) { /* can not handle inline */ - XMEMCPY(out, tmp, AES_BLOCK_SIZE); + XMEMCPY(out, i, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) return ret; } @@ -12639,23 +12704,23 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, byte buf[AES_BLOCK_SIZE]; XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(aes, out, buf); if (ret != 0) return ret; } - xorbuf(out, tmp, AES_BLOCK_SIZE); + xorbuf(out, i, AES_BLOCK_SIZE); /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tmp[j] >> 7) & 0x01; - tmp[j] = (byte)((tmp[j] << 1) + carry); + tmpC = (i[j] >> 7) & 0x01; + i[j] = (byte)((i[j] << 1) + carry); carry = tmpC; } if (carry) { - tmp[0] ^= GF_XTS; + i[0] ^= GF_XTS; } in += AES_BLOCK_SIZE; @@ -12684,10 +12749,10 @@ static int AesXtsEncrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, XMEMCPY(out, buf2, sz); } - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); if (ret == 0) - xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE); + xorbuf(out - AES_BLOCK_SIZE, i, AES_BLOCK_SIZE); } return ret; @@ -12773,6 +12838,144 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, return ret; } +#ifdef WOLFSSL_AESXTS_STREAM + +/* Block-streaming AES-XTS. + * + * xaes AES keys to use for block encrypt/decrypt + * i readwrite value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsEncryptInit(XtsAes* xaes, byte* i, word32 iSz) +{ + int ret; + + Aes *aes; + + if ((xaes == NULL) || (i == NULL)) { + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + aes = &xaes->aes; + + if (aes->keylen == 0) { + WOLFSSL_MSG("wc_AesXtsEncrypt called with unset encryption key."); + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + { +#ifdef WOLFSSL_AESNI + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_init_avx1(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); + ret = 0; + } + else +#endif + { + AES_XTS_init_aesni(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* WOLFSSL_AESNI */ + { + ret = AesXtsInitTweak_sw(xaes, i); + } + } + + return ret; +} + +/* Block-streaming AES-XTS + * + * Note that sz must be greater than AES_BLOCK_SIZE in each call, and must be a + * multiple of AES_BLOCK_SIZE in all but the final call. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers -- must be >= AES_BLOCK_SIZE. + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, + byte *i) +{ + int ret; + +#ifdef WOLFSSL_AESNI + Aes *aes; +#endif + + if (xaes == NULL || out == NULL || in == NULL || i == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AESNI + aes = &xaes->aes; +#endif + + if (sz < AES_BLOCK_SIZE) { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + { +#ifdef WOLFSSL_AESNI + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_encrypt_update_avx1(in, out, sz, + (const byte*)aes->key, + i, + (int)aes->rounds); + ret = 0; + } + else +#endif + { + AES_XTS_encrypt_update_aesni(in, out, sz, + (const byte*)aes->key, + i, + (int)aes->rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* WOLFSSL_AESNI */ + { + ret = AesXtsEncryptUpdate_sw(xaes, out, in, sz, i); + } + } + + return ret; +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + + /* Same process as encryption but use aes_decrypt key. * * xaes AES keys to use for block encrypt/decrypt @@ -12784,8 +12987,41 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, * returns 0 on success */ /* Software AES - XTS Decrypt */ + +static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, byte *i); + static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, const byte* i) +{ + int ret; + byte tweak_block[AES_BLOCK_SIZE]; + + ret = wc_AesEncryptDirect(&xaes->tweak, tweak_block, i); + if (ret != 0) + return ret; + + return AesXtsDecryptUpdate_sw(xaes, out, in, sz, tweak_block); +} + +/* Block-streaming AES-XTS. + * + * Same process as encryption but use decrypt key. + * + * Supply block-aligned input data with successive calls. Final call need not + * be block aligned. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * + * returns 0 on success + */ +/* Software AES - XTS Decrypt */ +static int AesXtsDecryptUpdate_sw(XtsAes* xaes, byte* out, const byte* in, + word32 sz, byte *i) { int ret = 0; word32 blocks = (sz / AES_BLOCK_SIZE); @@ -12794,19 +13030,10 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #else Aes *aes = &xaes->aes; #endif - Aes *tweak = &xaes->tweak; word32 j; byte carry = 0; - byte tmp[AES_BLOCK_SIZE]; byte stl = (sz % AES_BLOCK_SIZE); - XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES - * key setup passed to decrypt direct*/ - - ret = wc_AesEncryptDirect(tweak, tmp, i); - if (ret != 0) - return ret; - /* if Stealing then break out of loop one block early to handle special * case */ if (stl > 0) { @@ -12816,7 +13043,7 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, #ifdef HAVE_AES_ECB /* decrypt all of buffer at once when possible */ if (in != out) { /* can not handle inline */ - XMEMCPY(out, tmp, AES_BLOCK_SIZE); + XMEMCPY(out, i, AES_BLOCK_SIZE); if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) return ret; } @@ -12830,23 +13057,23 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, byte buf[AES_BLOCK_SIZE]; XMEMCPY(buf, in, AES_BLOCK_SIZE); - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesDecryptDirect(aes, out, buf); if (ret != 0) return ret; } - xorbuf(out, tmp, AES_BLOCK_SIZE); + xorbuf(out, i, AES_BLOCK_SIZE); /* multiply by shift left and propagate carry */ for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tmp[j] >> 7) & 0x01; - tmp[j] = (byte)((tmp[j] << 1) + carry); + tmpC = (i[j] >> 7) & 0x01; + i[j] = (byte)((i[j] << 1) + carry); carry = tmpC; } if (carry) { - tmp[0] ^= GF_XTS; + i[0] ^= GF_XTS; } carry = 0; @@ -12865,8 +13092,8 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, for (j = 0; j < AES_BLOCK_SIZE; j++) { byte tmpC; - tmpC = (tmp[j] >> 7) & 0x01; - tmp2[j] = (byte)((tmp[j] << 1) + carry); + tmpC = (i[j] >> 7) & 0x01; + tmp2[j] = (byte)((i[j] << 1) + carry); carry = tmpC; } if (carry) { @@ -12894,11 +13121,11 @@ static int AesXtsDecrypt_sw(XtsAes* xaes, byte* out, const byte* in, word32 sz, XMEMCPY(buf, in, sz); XMEMCPY(out, tmp2, sz); - xorbuf(buf, tmp, AES_BLOCK_SIZE); + xorbuf(buf, i, AES_BLOCK_SIZE); ret = wc_AesDecryptDirect(aes, tmp2, buf); if (ret != 0) return ret; - xorbuf(tmp2, tmp, AES_BLOCK_SIZE); + xorbuf(tmp2, i, AES_BLOCK_SIZE); XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); } @@ -12987,6 +13214,145 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, return ret; } } + +#ifdef WOLFSSL_AESXTS_STREAM + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * i readwrite value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +int wc_AesXtsDecryptInit(XtsAes* xaes, byte* i, word32 iSz) +{ + int ret; + Aes *aes; + + if (xaes == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + aes = &xaes->aes_decrypt; +#else + aes = &xaes->aes; +#endif + + if (aes->keylen == 0) { + WOLFSSL_MSG("wc_AesXtsDecrypt called with unset decryption key."); + return BAD_FUNC_ARG; + } + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + { +#ifdef WOLFSSL_AESNI + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_init_avx1(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); + ret = 0; + } + else +#endif + { + AES_XTS_init_aesni(i, (const byte*)xaes->tweak.key, + (int)xaes->tweak.rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* WOLFSSL_AESNI */ + { + ret = AesXtsInitTweak_sw(xaes, i); + } + + } + + return ret; +} + +/* Block-streaming AES-XTS + * + * Note that sz must be greater than AES_BLOCK_SIZE in each call, and must be a + * multiple of AES_BLOCK_SIZE in all but the final call. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i tweak buffer of size AES_BLOCK_SIZE. + * + * returns 0 on success + */ +int wc_AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 sz, + byte *i) +{ + int ret; +#ifdef WOLFSSL_AESNI + Aes *aes; +#endif + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AESNI +#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + aes = &xaes->aes_decrypt; +#else + aes = &xaes->aes; +#endif +#endif + + if (sz < AES_BLOCK_SIZE) { + WOLFSSL_MSG("Cipher text input too small for decryption"); + return BAD_FUNC_ARG; + } + + { +#ifdef WOLFSSL_AESNI + if (aes->use_aesni) { + SAVE_VECTOR_REGISTERS(return _svr_ret;); +#if defined(HAVE_INTEL_AVX1) + if (IS_INTEL_AVX1(intel_flags)) { + AES_XTS_decrypt_update_avx1(in, out, sz, + (const byte*)aes->key, + i, + (int)aes->rounds); + ret = 0; + } + else +#endif + { + AES_XTS_decrypt_update_aesni(in, out, sz, + (const byte*)aes->key, + i, + (int)aes->rounds); + ret = 0; + } + RESTORE_VECTOR_REGISTERS(); + } + else +#endif /* WOLFSSL_AESNI */ + { + ret = AesXtsDecryptUpdate_sw(xaes, out, in, sz, i); + } + } + + return ret; +} + +#endif /* WOLFSSL_AESXTS_STREAM */ + #endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_HW_CRYPTO */ /* Same as wc_AesXtsEncryptSector but the sector gets incremented by one every diff --git a/wolfcrypt/src/aes_xts_asm.S b/wolfcrypt/src/aes_xts_asm.S index fedead84f..f65c01525 100644 --- a/wolfcrypt/src/aes_xts_asm.S +++ b/wolfcrypt/src/aes_xts_asm.S @@ -1,6 +1,6 @@ /* aes_xts_asm.S */ /* - * Copyright (C) 2006-2023 wolfSSL Inc. + * Copyright (C) 2006-2024 wolfSSL Inc. * * This file is part of wolfSSL. * @@ -48,6 +48,59 @@ #ifdef WOLFSSL_AES_XTS #ifdef WOLFSSL_X86_64_BUILD #ifndef __APPLE__ +.text +.globl AES_XTS_init_aesni +.type AES_XTS_init_aesni,@function +.align 16 +AES_XTS_init_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_init_aesni +.p2align 4 +_AES_XTS_init_aesni: +#endif /* __APPLE__ */ + movdqu (%rdi), %xmm0 + # aes_enc_block + pxor (%rsi), %xmm0 + movdqu 16(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 32(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 48(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 64(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 80(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 96(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 112(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 128(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + movdqu 144(%rsi), %xmm2 + aesenc %xmm2, %xmm0 + cmpl $11, %edx + movdqu 160(%rsi), %xmm2 + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc %xmm2, %xmm0 + movdqu 176(%rsi), %xmm3 + aesenc %xmm3, %xmm0 + cmpl $13, %edx + movdqu 192(%rsi), %xmm2 + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc %xmm2, %xmm0 + movdqu 208(%rsi), %xmm3 + aesenc %xmm3, %xmm0 + movdqu 224(%rsi), %xmm2 +L_AES_XTS_init_aesni_tweak_aes_enc_block_last: + aesenclast %xmm2, %xmm0 + movdqu %xmm0, (%rdi) + repz retq +#ifndef __APPLE__ +.size AES_XTS_init_aesni,.-AES_XTS_init_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ .data #else .section __DATA,__data @@ -378,6 +431,291 @@ L_AES_XTS_encrypt_aesni_done_enc: #endif /* __APPLE__ */ #ifndef __APPLE__ .text +.globl AES_XTS_encrypt_update_aesni +.type AES_XTS_encrypt_update_aesni,@function +.align 16 +AES_XTS_encrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_encrypt_update_aesni +.p2align 4 +_AES_XTS_encrypt_update_aesni: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $0x40, %rsp + movdqu L_aes_xts_gc_xts(%rip), %xmm12 + movdqu (%r8), %xmm0 + xorl %r12d, %r12d + cmpl $0x40, %eax + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_aesni_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_encrypt_update_aesni_enc_64: + # 64 bytes of input + # aes_enc_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + movdqu (%rcx), %xmm8 + movdqu 16(%rcx), %xmm9 + movdqu 32(%rcx), %xmm10 + movdqu 48(%rcx), %xmm11 + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm1 + psrad $31, %xmm4 + pslld $0x01, %xmm1 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm4 + movdqa %xmm1, %xmm2 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm2 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm3 + psrad $31, %xmm4 + pslld $0x01, %xmm3 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm3 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + # aes_enc_block + movdqu (%r10), %xmm4 + pxor %xmm4, %xmm8 + pxor %xmm4, %xmm9 + pxor %xmm4, %xmm10 + pxor %xmm4, %xmm11 + movdqu 16(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 32(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 48(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 64(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 80(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 96(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 112(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 128(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 144(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + cmpl $11, %r9d + movdqu 160(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 176(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + cmpl $13, %r9d + movdqu 192(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 208(%r10), %xmm4 + aesenc %xmm4, %xmm8 + aesenc %xmm4, %xmm9 + aesenc %xmm4, %xmm10 + aesenc %xmm4, %xmm11 + movdqu 224(%r10), %xmm4 +L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last: + aesenclast %xmm4, %xmm8 + aesenclast %xmm4, %xmm9 + aesenclast %xmm4, %xmm10 + aesenclast %xmm4, %xmm11 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm0 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_aesni_enc_64 +L_AES_XTS_encrypt_update_aesni_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_encrypt_update_aesni_done_enc + subl %r12d, %r11d + cmpl $16, %r11d + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_aesni_last_15 + andl $0xfffffff0, %r11d + # 16 bytes of input +L_AES_XTS_encrypt_update_aesni_enc_16: + leaq (%rdi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + pxor %xmm0, %xmm8 + # aes_enc_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesenc %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesenc %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesenc %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_aesni_aes_enc_block_last: + aesenclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) + movdqa %xmm0, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_aesni_enc_16 + cmpl %eax, %r12d + je L_AES_XTS_encrypt_update_aesni_done_enc +L_AES_XTS_encrypt_update_aesni_last_15: + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + addq $16, %r12 + movdqu %xmm8, (%rsp) + xorq %rdx, %rdx +L_AES_XTS_encrypt_update_aesni_last_15_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_encrypt_update_aesni_last_15_byte_loop + subq %rdx, %r12 + movdqu (%rsp), %xmm8 + subq $16, %r12 + pxor %xmm0, %xmm8 + # aes_enc_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesenc %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesenc %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesenc %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesenc %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last: + aesenclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) +L_AES_XTS_encrypt_update_aesni_done_enc: + movdqu %xmm0, (%r8) + addq $0x40, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_encrypt_update_aesni,.-AES_XTS_encrypt_update_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_XTS_decrypt_aesni .type AES_XTS_decrypt_aesni,@function .align 16 @@ -752,8 +1090,401 @@ L_AES_XTS_decrypt_aesni_done_dec: #ifndef __APPLE__ .size AES_XTS_decrypt_aesni,.-AES_XTS_decrypt_aesni #endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_XTS_decrypt_update_aesni +.type AES_XTS_decrypt_update_aesni,@function +.align 16 +AES_XTS_decrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_XTS_decrypt_update_aesni +.p2align 4 +_AES_XTS_decrypt_update_aesni: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $16, %rsp + movdqu L_aes_xts_gc_xts(%rip), %xmm12 + movdqu (%r8), %xmm0 + xorl %r12d, %r12d + movl %eax, %r11d + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_aesni_mul16_64 + subl $16, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_aesni_last_31_start +L_AES_XTS_decrypt_update_aesni_mul16_64: + cmpl $0x40, %r11d + jl L_AES_XTS_decrypt_update_aesni_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_decrypt_update_aesni_dec_64: + # 64 bytes of input + # aes_dec_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + movdqu (%rcx), %xmm8 + movdqu 16(%rcx), %xmm9 + movdqu 32(%rcx), %xmm10 + movdqu 48(%rcx), %xmm11 + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm1 + psrad $31, %xmm4 + pslld $0x01, %xmm1 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm4 + movdqa %xmm1, %xmm2 + psrad $31, %xmm4 + pslld $0x01, %xmm2 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm2 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm3 + psrad $31, %xmm4 + pslld $0x01, %xmm3 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm3 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + # aes_dec_block + movdqu (%r10), %xmm4 + pxor %xmm4, %xmm8 + pxor %xmm4, %xmm9 + pxor %xmm4, %xmm10 + pxor %xmm4, %xmm11 + movdqu 16(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 32(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 48(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 64(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 80(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 96(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 112(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 128(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 144(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + cmpl $11, %r9d + movdqu 160(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 176(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + cmpl $13, %r9d + movdqu 192(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 208(%r10), %xmm4 + aesdec %xmm4, %xmm8 + aesdec %xmm4, %xmm9 + aesdec %xmm4, %xmm10 + aesdec %xmm4, %xmm11 + movdqu 224(%r10), %xmm4 +L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last: + aesdeclast %xmm4, %xmm8 + aesdeclast %xmm4, %xmm9 + aesdeclast %xmm4, %xmm10 + aesdeclast %xmm4, %xmm11 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm2, %xmm10 + pxor %xmm3, %xmm11 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + movdqa %xmm3, %xmm4 + movdqa %xmm3, %xmm0 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_aesni_dec_64 +L_AES_XTS_decrypt_update_aesni_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_decrypt_update_aesni_done_dec + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_aesni_mul16 + subl $16, %r11d + subl %r12d, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_aesni_last_31_start + addl %r12d, %r11d +L_AES_XTS_decrypt_update_aesni_mul16: +L_AES_XTS_decrypt_update_aesni_dec_16: + # 16 bytes of input + leaq (%rdi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + pxor %xmm0, %xmm8 + # aes_dec_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesdec %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesdec %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesdec %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_aesni_aes_dec_block_last: + aesdeclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) + movdqa %xmm0, %xmm4 + psrad $31, %xmm4 + pslld $0x01, %xmm0 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_aesni_dec_16 + cmpl %eax, %r12d + je L_AES_XTS_decrypt_update_aesni_done_dec +L_AES_XTS_decrypt_update_aesni_last_31_start: + movdqa %xmm0, %xmm4 + movdqa %xmm0, %xmm7 + psrad $31, %xmm4 + pslld $0x01, %xmm7 + pshufd $0x93, %xmm4, %xmm4 + pand %xmm12, %xmm4 + pxor %xmm4, %xmm7 + leaq (%rdi,%r12,1), %rcx + movdqu (%rcx), %xmm8 + pxor %xmm7, %xmm8 + # aes_dec_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesdec %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesdec %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesdec %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last: + aesdeclast %xmm5, %xmm8 + pxor %xmm7, %xmm8 + movdqu %xmm8, (%rsp) + addq $16, %r12 + xorq %rdx, %rdx +L_AES_XTS_decrypt_update_aesni_last_31_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_decrypt_update_aesni_last_31_byte_loop + subq %rdx, %r12 + movdqu (%rsp), %xmm8 + pxor %xmm0, %xmm8 + # aes_dec_block + pxor (%r10), %xmm8 + movdqu 16(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 32(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 48(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 64(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 80(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 96(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 112(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 128(%r10), %xmm5 + aesdec %xmm5, %xmm8 + movdqu 144(%r10), %xmm5 + aesdec %xmm5, %xmm8 + cmpl $11, %r9d + movdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 176(%r10), %xmm6 + aesdec %xmm6, %xmm8 + cmpl $13, %r9d + movdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec %xmm5, %xmm8 + movdqu 208(%r10), %xmm6 + aesdec %xmm6, %xmm8 + movdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last: + aesdeclast %xmm5, %xmm8 + pxor %xmm0, %xmm8 + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + movdqu %xmm8, (%rcx) +L_AES_XTS_decrypt_update_aesni_done_dec: + movdqu %xmm0, (%r8) + addq $16, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_decrypt_update_aesni,.-AES_XTS_decrypt_update_aesni +#endif /* __APPLE__ */ #ifdef HAVE_INTEL_AVX1 #ifndef __APPLE__ +.text +.globl AES_XTS_init_avx1 +.type AES_XTS_init_avx1,@function +.align 16 +AES_XTS_init_avx1: +#else +.section __TEXT,__text +.globl _AES_XTS_init_avx1 +.p2align 4 +_AES_XTS_init_avx1: +#endif /* __APPLE__ */ + movl %edx, %eax + vmovdqu (%rdi), %xmm0 + # aes_enc_block + vpxor (%rsi), %xmm0, %xmm0 + vmovdqu 16(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 32(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 48(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 64(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 80(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 96(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 112(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 128(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 144(%rsi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + cmpl $11, %eax + vmovdqu 160(%rsi), %xmm2 + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 176(%rsi), %xmm3 + vaesenc %xmm3, %xmm0, %xmm0 + cmpl $13, %eax + vmovdqu 192(%rsi), %xmm2 + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqu 208(%rsi), %xmm3 + vaesenc %xmm3, %xmm0, %xmm0 + vmovdqu 224(%rsi), %xmm2 +L_AES_XTS_init_avx1_tweak_aes_enc_block_last: + vaesenclast %xmm2, %xmm0, %xmm0 + vmovdqu %xmm0, (%rdi) + repz retq +#ifndef __APPLE__ +.size AES_XTS_init_avx1,.-AES_XTS_init_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ .data #else .section __DATA,__data @@ -1066,7 +1797,6 @@ L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last: leaq (%rsi,%r13,1), %rcx vmovdqu %xmm8, (%rcx) L_AES_XTS_encrypt_avx1_done_enc: - vzeroupper addq $0x40, %rsp popq %r13 popq %r12 @@ -1076,6 +1806,282 @@ L_AES_XTS_encrypt_avx1_done_enc: #endif /* __APPLE__ */ #ifndef __APPLE__ .text +.globl AES_XTS_encrypt_update_avx1 +.type AES_XTS_encrypt_update_avx1,@function +.align 16 +AES_XTS_encrypt_update_avx1: +#else +.section __TEXT,__text +.globl _AES_XTS_encrypt_update_avx1 +.p2align 4 +_AES_XTS_encrypt_update_avx1: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $0x40, %rsp + vmovdqu L_avx1_aes_xts_gc_xts(%rip), %xmm12 + vmovdqu (%r8), %xmm0 + xorl %r12d, %r12d + cmpl $0x40, %eax + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_avx1_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_encrypt_update_avx1_enc_64: + # 64 bytes of input + # aes_enc_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + vmovdqu (%rcx), %xmm8 + vmovdqu 16(%rcx), %xmm9 + vmovdqu 32(%rcx), %xmm10 + vmovdqu 48(%rcx), %xmm11 + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm1 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm1, %xmm1 + vpsrad $31, %xmm1, %xmm4 + vpslld $0x01, %xmm1, %xmm2 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vpsrad $31, %xmm2, %xmm4 + vpslld $0x01, %xmm2, %xmm3 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + # aes_enc_block + vmovdqu (%r10), %xmm4 + vpxor %xmm4, %xmm8, %xmm8 + vpxor %xmm4, %xmm9, %xmm9 + vpxor %xmm4, %xmm10, %xmm10 + vpxor %xmm4, %xmm11, %xmm11 + vmovdqu 16(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 32(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 48(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 64(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 80(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 96(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 112(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 128(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 144(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 176(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm4 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 208(%r10), %xmm4 + vaesenc %xmm4, %xmm8, %xmm8 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vmovdqu 224(%r10), %xmm4 +L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last: + vaesenclast %xmm4, %xmm8, %xmm8 + vaesenclast %xmm4, %xmm9, %xmm9 + vaesenclast %xmm4, %xmm10, %xmm10 + vaesenclast %xmm4, %xmm11, %xmm11 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vpsrad $31, %xmm3, %xmm4 + vpslld $0x01, %xmm3, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_avx1_enc_64 +L_AES_XTS_encrypt_update_avx1_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_encrypt_update_avx1_done_enc + subl %r12d, %r11d + cmpl $16, %r11d + movl %eax, %r11d + jl L_AES_XTS_encrypt_update_avx1_last_15 + andl $0xfffffff0, %r11d + # 16 bytes of input +L_AES_XTS_encrypt_update_avx1_enc_16: + leaq (%rdi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + # aes_enc_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_avx1_aes_enc_block_last: + vaesenclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_encrypt_update_avx1_enc_16 + cmpl %eax, %r12d + je L_AES_XTS_encrypt_update_avx1_done_enc +L_AES_XTS_encrypt_update_avx1_last_15: + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + addq $16, %r12 + vmovdqu %xmm8, (%rsp) + xorq %rdx, %rdx +L_AES_XTS_encrypt_update_avx1_last_15_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_encrypt_update_avx1_last_15_byte_loop + subq %rdx, %r12 + vmovdqu (%rsp), %xmm8 + subq $16, %r12 + vpxor %xmm0, %xmm8, %xmm8 + # aes_enc_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesenc %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesenc %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last: + vaesenclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) +L_AES_XTS_encrypt_update_avx1_done_enc: + vmovdqu %xmm0, (%r8) + addq $0x40, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_encrypt_update_avx1,.-AES_XTS_encrypt_update_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text .globl AES_XTS_decrypt_avx1 .type AES_XTS_decrypt_avx1,@function .align 16 @@ -1432,7 +2438,6 @@ L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last: leaq (%rsi,%r13,1), %rcx vmovdqu %xmm8, (%rcx) L_AES_XTS_decrypt_avx1_done_dec: - vzeroupper addq $16, %rsp popq %r13 popq %r12 @@ -1440,6 +2445,334 @@ L_AES_XTS_decrypt_avx1_done_dec: #ifndef __APPLE__ .size AES_XTS_decrypt_avx1,.-AES_XTS_decrypt_avx1 #endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_XTS_decrypt_update_avx1 +.type AES_XTS_decrypt_update_avx1,@function +.align 16 +AES_XTS_decrypt_update_avx1: +#else +.section __TEXT,__text +.globl _AES_XTS_decrypt_update_avx1 +.p2align 4 +_AES_XTS_decrypt_update_avx1: +#endif /* __APPLE__ */ + pushq %r12 + movq %rdx, %rax + movq %rcx, %r10 + subq $16, %rsp + vmovdqu L_avx1_aes_xts_gc_xts(%rip), %xmm12 + vmovdqu (%r8), %xmm0 + xorl %r12d, %r12d + movl %eax, %r11d + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_avx1_mul16_64 + subl $16, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_avx1_last_31_start +L_AES_XTS_decrypt_update_avx1_mul16_64: + cmpl $0x40, %r11d + jl L_AES_XTS_decrypt_update_avx1_done_64 + andl $0xffffffc0, %r11d +L_AES_XTS_decrypt_update_avx1_dec_64: + # 64 bytes of input + # aes_dec_64 + leaq (%rdi,%r12,1), %rcx + leaq (%rsi,%r12,1), %rdx + vmovdqu (%rcx), %xmm8 + vmovdqu 16(%rcx), %xmm9 + vmovdqu 32(%rcx), %xmm10 + vmovdqu 48(%rcx), %xmm11 + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm1 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm1, %xmm1 + vpsrad $31, %xmm1, %xmm4 + vpslld $0x01, %xmm1, %xmm2 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm2, %xmm2 + vpsrad $31, %xmm2, %xmm4 + vpslld $0x01, %xmm2, %xmm3 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm3, %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + # aes_dec_block + vmovdqu (%r10), %xmm4 + vpxor %xmm4, %xmm8, %xmm8 + vpxor %xmm4, %xmm9, %xmm9 + vpxor %xmm4, %xmm10, %xmm10 + vpxor %xmm4, %xmm11, %xmm11 + vmovdqu 16(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 32(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 48(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 64(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 80(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 96(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 112(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 128(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 144(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 176(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm4 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 208(%r10), %xmm4 + vaesdec %xmm4, %xmm8, %xmm8 + vaesdec %xmm4, %xmm9, %xmm9 + vaesdec %xmm4, %xmm10, %xmm10 + vaesdec %xmm4, %xmm11, %xmm11 + vmovdqu 224(%r10), %xmm4 +L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last: + vaesdeclast %xmm4, %xmm8, %xmm8 + vaesdeclast %xmm4, %xmm9, %xmm9 + vaesdeclast %xmm4, %xmm10, %xmm10 + vaesdeclast %xmm4, %xmm11, %xmm11 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vpsrad $31, %xmm3, %xmm4 + vpslld $0x01, %xmm3, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $0x40, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_avx1_dec_64 +L_AES_XTS_decrypt_update_avx1_done_64: + cmpl %eax, %r12d + movl %eax, %r11d + je L_AES_XTS_decrypt_update_avx1_done_dec + andl $0xfffffff0, %r11d + cmpl %eax, %r11d + je L_AES_XTS_decrypt_update_avx1_mul16 + subl $16, %r11d + subl %r12d, %r11d + cmpl $16, %r11d + jl L_AES_XTS_decrypt_update_avx1_last_31_start + addl %r12d, %r11d +L_AES_XTS_decrypt_update_avx1_mul16: +L_AES_XTS_decrypt_update_avx1_dec_16: + # 16 bytes of input + leaq (%rdi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + # aes_dec_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_avx1_aes_dec_block_last: + vaesdeclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm0 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm0, %xmm0 + addl $16, %r12d + cmpl %r11d, %r12d + jl L_AES_XTS_decrypt_update_avx1_dec_16 + cmpl %eax, %r12d + je L_AES_XTS_decrypt_update_avx1_done_dec +L_AES_XTS_decrypt_update_avx1_last_31_start: + vpsrad $31, %xmm0, %xmm4 + vpslld $0x01, %xmm0, %xmm7 + vpshufd $0x93, %xmm4, %xmm4 + vpand %xmm12, %xmm4, %xmm4 + vpxor %xmm4, %xmm7, %xmm7 + leaq (%rdi,%r12,1), %rcx + vmovdqu (%rcx), %xmm8 + vpxor %xmm7, %xmm8, %xmm8 + # aes_dec_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last: + vaesdeclast %xmm5, %xmm8, %xmm8 + vpxor %xmm7, %xmm8, %xmm8 + vmovdqu %xmm8, (%rsp) + addq $16, %r12 + xorq %rdx, %rdx +L_AES_XTS_decrypt_update_avx1_last_31_byte_loop: + movb (%rsp,%rdx,1), %r11b + movb (%rdi,%r12,1), %cl + movb %r11b, (%rsi,%r12,1) + movb %cl, (%rsp,%rdx,1) + incl %r12d + incl %edx + cmpl %eax, %r12d + jl L_AES_XTS_decrypt_update_avx1_last_31_byte_loop + subq %rdx, %r12 + vmovdqu (%rsp), %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + # aes_dec_block + vpxor (%r10), %xmm8, %xmm8 + vmovdqu 16(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 32(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 48(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 64(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 80(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 96(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 112(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 128(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 144(%r10), %xmm5 + vaesdec %xmm5, %xmm8, %xmm8 + cmpl $11, %r9d + vmovdqu 160(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 176(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + cmpl $13, %r9d + vmovdqu 192(%r10), %xmm5 + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec %xmm5, %xmm8, %xmm8 + vmovdqu 208(%r10), %xmm6 + vaesdec %xmm6, %xmm8, %xmm8 + vmovdqu 224(%r10), %xmm5 +L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last: + vaesdeclast %xmm5, %xmm8, %xmm8 + vpxor %xmm0, %xmm8, %xmm8 + subq $16, %r12 + leaq (%rsi,%r12,1), %rcx + vmovdqu %xmm8, (%rcx) +L_AES_XTS_decrypt_update_avx1_done_dec: + vmovdqu %xmm0, (%r8) + addq $16, %rsp + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_XTS_decrypt_update_avx1,.-AES_XTS_decrypt_update_avx1 +#endif /* __APPLE__ */ #endif /* HAVE_INTEL_AVX1 */ #endif /* WOLFSSL_X86_64_BUILD */ #endif /* WOLFSSL_AES_XTS */ diff --git a/wolfcrypt/src/aes_xts_asm.asm b/wolfcrypt/src/aes_xts_asm.asm index 3185ec224..7194a06b8 100644 --- a/wolfcrypt/src/aes_xts_asm.asm +++ b/wolfcrypt/src/aes_xts_asm.asm @@ -40,6 +40,48 @@ IFNDEF _WIN64 _WIN64 = 1 ENDIF +_text SEGMENT READONLY PARA +AES_XTS_init_aesni PROC + movdqu xmm0, OWORD PTR [rcx] + ; aes_enc_block + pxor xmm0, [rdx] + movdqu xmm2, OWORD PTR [rdx+16] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+32] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+48] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+64] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+80] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+96] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+112] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+128] + aesenc xmm0, xmm2 + movdqu xmm2, OWORD PTR [rdx+144] + aesenc xmm0, xmm2 + cmp r8d, 11 + movdqu xmm2, OWORD PTR [rdx+160] + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm2 + movdqu xmm3, OWORD PTR [rdx+176] + aesenc xmm0, xmm3 + cmp r8d, 13 + movdqu xmm2, OWORD PTR [rdx+192] + jl L_AES_XTS_init_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm2 + movdqu xmm3, OWORD PTR [rdx+208] + aesenc xmm0, xmm3 + movdqu xmm2, OWORD PTR [rdx+224] +L_AES_XTS_init_aesni_tweak_aes_enc_block_last: + aesenclast xmm0, xmm2 + movdqu OWORD PTR [rcx], xmm0 + ret +AES_XTS_init_aesni ENDP +_text ENDS _DATA SEGMENT ALIGN 16 L_aes_xts_gc_xts DWORD 135,1,1,1 @@ -379,6 +421,302 @@ L_AES_XTS_encrypt_aesni_done_enc: AES_XTS_encrypt_aesni ENDP _text ENDS _text SEGMENT READONLY PARA +AES_XTS_encrypt_update_aesni PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 176 + movdqu OWORD PTR [rsp+64], xmm6 + movdqu OWORD PTR [rsp+80], xmm7 + movdqu OWORD PTR [rsp+96], xmm8 + movdqu OWORD PTR [rsp+112], xmm9 + movdqu OWORD PTR [rsp+128], xmm10 + movdqu OWORD PTR [rsp+144], xmm11 + movdqu OWORD PTR [rsp+160], xmm12 + movdqu xmm12, OWORD PTR L_aes_xts_gc_xts + movdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + cmp eax, 64 + mov r11d, eax + jl L_AES_XTS_encrypt_update_aesni_done_64 + and r11d, 4294967232 +L_AES_XTS_encrypt_update_aesni_enc_64: + ; 64 bytes of input + ; aes_enc_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + movdqu xmm8, OWORD PTR [rcx] + movdqu xmm9, OWORD PTR [rcx+16] + movdqu xmm10, OWORD PTR [rcx+32] + movdqu xmm11, OWORD PTR [rcx+48] + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + psrad xmm4, 31 + pslld xmm1, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm1, xmm4 + movdqa xmm4, xmm1 + movdqa xmm2, xmm1 + psrad xmm4, 31 + pslld xmm2, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm2, xmm4 + movdqa xmm4, xmm2 + movdqa xmm3, xmm2 + psrad xmm4, 31 + pslld xmm3, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm3, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + ; aes_enc_block + movdqu xmm4, OWORD PTR [r10] + pxor xmm8, xmm4 + pxor xmm9, xmm4 + pxor xmm10, xmm4 + pxor xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+16] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+32] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+48] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+64] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+80] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+96] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+112] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+128] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+144] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + cmp r9d, 11 + movdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+176] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + cmp r9d, 13 + movdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+208] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_aesni_aes_enc_64_aes_enc_block_last: + aesenclast xmm8, xmm4 + aesenclast xmm9, xmm4 + aesenclast xmm10, xmm4 + aesenclast xmm11, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + movdqu OWORD PTR [rdx], xmm8 + movdqu OWORD PTR [rdx+16], xmm9 + movdqu OWORD PTR [rdx+32], xmm10 + movdqu OWORD PTR [rdx+48], xmm11 + movdqa xmm4, xmm3 + movdqa xmm0, xmm3 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_aesni_enc_64 +L_AES_XTS_encrypt_update_aesni_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_encrypt_update_aesni_done_enc + sub r11d, r12d + cmp r11d, 16 + mov r11d, eax + jl L_AES_XTS_encrypt_update_aesni_last_15 + and r11d, 4294967280 + ; 16 bytes of input +L_AES_XTS_encrypt_update_aesni_enc_16: + lea rcx, QWORD PTR [rdi+r12] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm0 + ; aes_enc_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesenc xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesenc xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_aesni_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesenc xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_aesni_aes_enc_block_last: + aesenclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 + movdqa xmm4, xmm0 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_aesni_enc_16 + cmp r12d, eax + je L_AES_XTS_encrypt_update_aesni_done_enc +L_AES_XTS_encrypt_update_aesni_last_15: + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + movdqu xmm8, OWORD PTR [rcx] + add r12, 16 + movdqu OWORD PTR [rsp], xmm8 + xor rdx, rdx +L_AES_XTS_encrypt_update_aesni_last_15_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_encrypt_update_aesni_last_15_byte_loop + sub r12, rdx + movdqu xmm8, OWORD PTR [rsp] + sub r12, 16 + pxor xmm8, xmm0 + ; aes_enc_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesenc xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesenc xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesenc xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_aesni_last_15_aes_enc_block_last: + aesenclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_encrypt_update_aesni_done_enc: + movdqu OWORD PTR [r8], xmm0 + movdqu xmm6, OWORD PTR [rsp+64] + movdqu xmm7, OWORD PTR [rsp+80] + movdqu xmm8, OWORD PTR [rsp+96] + movdqu xmm9, OWORD PTR [rsp+112] + movdqu xmm10, OWORD PTR [rsp+128] + movdqu xmm11, OWORD PTR [rsp+144] + movdqu xmm12, OWORD PTR [rsp+160] + add rsp, 176 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_encrypt_update_aesni ENDP +_text ENDS +_text SEGMENT READONLY PARA AES_XTS_decrypt_aesni PROC push rdi push rsi @@ -765,7 +1103,400 @@ L_AES_XTS_decrypt_aesni_done_dec: ret AES_XTS_decrypt_aesni ENDP _text ENDS +_text SEGMENT READONLY PARA +AES_XTS_decrypt_update_aesni PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 128 + movdqu OWORD PTR [rsp+16], xmm6 + movdqu OWORD PTR [rsp+32], xmm7 + movdqu OWORD PTR [rsp+48], xmm8 + movdqu OWORD PTR [rsp+64], xmm9 + movdqu OWORD PTR [rsp+80], xmm10 + movdqu OWORD PTR [rsp+96], xmm11 + movdqu OWORD PTR [rsp+112], xmm12 + movdqu xmm12, OWORD PTR L_aes_xts_gc_xts + movdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + mov r11d, eax + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_aesni_mul16_64 + sub r11d, 16 + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_aesni_last_31_start +L_AES_XTS_decrypt_update_aesni_mul16_64: + cmp r11d, 64 + jl L_AES_XTS_decrypt_update_aesni_done_64 + and r11d, 4294967232 +L_AES_XTS_decrypt_update_aesni_dec_64: + ; 64 bytes of input + ; aes_dec_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + movdqu xmm8, OWORD PTR [rcx] + movdqu xmm9, OWORD PTR [rcx+16] + movdqu xmm10, OWORD PTR [rcx+32] + movdqu xmm11, OWORD PTR [rcx+48] + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + psrad xmm4, 31 + pslld xmm1, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm1, xmm4 + movdqa xmm4, xmm1 + movdqa xmm2, xmm1 + psrad xmm4, 31 + pslld xmm2, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm2, xmm4 + movdqa xmm4, xmm2 + movdqa xmm3, xmm2 + psrad xmm4, 31 + pslld xmm3, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm3, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + ; aes_dec_block + movdqu xmm4, OWORD PTR [r10] + pxor xmm8, xmm4 + pxor xmm9, xmm4 + pxor xmm10, xmm4 + pxor xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+16] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+32] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+48] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+64] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+80] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+96] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+112] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+128] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+144] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + cmp r9d, 11 + movdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+176] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + cmp r9d, 13 + movdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+208] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_aes_dec_64_aes_dec_block_last: + aesdeclast xmm8, xmm4 + aesdeclast xmm9, xmm4 + aesdeclast xmm10, xmm4 + aesdeclast xmm11, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + movdqu OWORD PTR [rdx], xmm8 + movdqu OWORD PTR [rdx+16], xmm9 + movdqu OWORD PTR [rdx+32], xmm10 + movdqu OWORD PTR [rdx+48], xmm11 + movdqa xmm4, xmm3 + movdqa xmm0, xmm3 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_aesni_dec_64 +L_AES_XTS_decrypt_update_aesni_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_decrypt_update_aesni_done_dec + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_aesni_mul16 + sub r11d, 16 + sub r11d, r12d + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_aesni_last_31_start + add r11d, r12d +L_AES_XTS_decrypt_update_aesni_mul16: +L_AES_XTS_decrypt_update_aesni_dec_16: + ; 16 bytes of input + lea rcx, QWORD PTR [rdi+r12] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm0 + ; aes_dec_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesdec xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesdec xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 + movdqa xmm4, xmm0 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_aesni_dec_16 + cmp r12d, eax + je L_AES_XTS_decrypt_update_aesni_done_dec +L_AES_XTS_decrypt_update_aesni_last_31_start: + movdqa xmm4, xmm0 + movdqa xmm7, xmm0 + psrad xmm4, 31 + pslld xmm7, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm7, xmm4 + lea rcx, QWORD PTR [rdi+r12] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm7 + ; aes_dec_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesdec xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesdec xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_last_31_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm7 + movdqu OWORD PTR [rsp], xmm8 + add r12, 16 + xor rdx, rdx +L_AES_XTS_decrypt_update_aesni_last_31_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_decrypt_update_aesni_last_31_byte_loop + sub r12, rdx + movdqu xmm8, OWORD PTR [rsp] + pxor xmm8, xmm0 + ; aes_dec_block + pxor xmm8, [r10] + movdqu xmm5, OWORD PTR [r10+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r10+144] + aesdec xmm8, xmm5 + cmp r9d, 11 + movdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+176] + aesdec xmm8, xmm6 + cmp r9d, 13 + movdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r10+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_aesni_last_31_2_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm0 + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + movdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_decrypt_update_aesni_done_dec: + movdqu OWORD PTR [r8], xmm0 + movdqu xmm6, OWORD PTR [rsp+16] + movdqu xmm7, OWORD PTR [rsp+32] + movdqu xmm8, OWORD PTR [rsp+48] + movdqu xmm9, OWORD PTR [rsp+64] + movdqu xmm10, OWORD PTR [rsp+80] + movdqu xmm11, OWORD PTR [rsp+96] + movdqu xmm12, OWORD PTR [rsp+112] + add rsp, 128 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_decrypt_update_aesni ENDP +_text ENDS IFDEF HAVE_INTEL_AVX1 +_text SEGMENT READONLY PARA +AES_XTS_init_avx1 PROC + mov eax, r8d + vmovdqu xmm0, OWORD PTR [rcx] + ; aes_enc_block + vpxor xmm0, xmm0, [rdx] + vmovdqu xmm2, OWORD PTR [rdx+16] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+32] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+48] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+64] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+80] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+96] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+112] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+128] + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm2, OWORD PTR [rdx+144] + vaesenc xmm0, xmm0, xmm2 + cmp eax, 11 + vmovdqu xmm2, OWORD PTR [rdx+160] + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm3, OWORD PTR [rdx+176] + vaesenc xmm0, xmm0, xmm3 + cmp eax, 13 + vmovdqu xmm2, OWORD PTR [rdx+192] + jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm2 + vmovdqu xmm3, OWORD PTR [rdx+208] + vaesenc xmm0, xmm0, xmm3 + vmovdqu xmm2, OWORD PTR [rdx+224] +L_AES_XTS_init_avx1_tweak_aes_enc_block_last: + vaesenclast xmm0, xmm0, xmm2 + vmovdqu OWORD PTR [rcx], xmm0 + ret +AES_XTS_init_avx1 ENDP +_text ENDS _DATA SEGMENT ALIGN 16 L_avx1_aes_xts_gc_xts DWORD 135,1,1,1 @@ -1080,7 +1811,6 @@ L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last: lea rcx, QWORD PTR [rsi+r13] vmovdqu OWORD PTR [rcx], xmm8 L_AES_XTS_encrypt_avx1_done_enc: - vzeroupper vmovdqu xmm6, OWORD PTR [rsp+64] vmovdqu xmm7, OWORD PTR [rsp+80] vmovdqu xmm8, OWORD PTR [rsp+96] @@ -1097,6 +1827,293 @@ L_AES_XTS_encrypt_avx1_done_enc: AES_XTS_encrypt_avx1 ENDP _text ENDS _text SEGMENT READONLY PARA +AES_XTS_encrypt_update_avx1 PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 176 + vmovdqu OWORD PTR [rsp+64], xmm6 + vmovdqu OWORD PTR [rsp+80], xmm7 + vmovdqu OWORD PTR [rsp+96], xmm8 + vmovdqu OWORD PTR [rsp+112], xmm9 + vmovdqu OWORD PTR [rsp+128], xmm10 + vmovdqu OWORD PTR [rsp+144], xmm11 + vmovdqu OWORD PTR [rsp+160], xmm12 + vmovdqu xmm12, OWORD PTR L_avx1_aes_xts_gc_xts + vmovdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + cmp eax, 64 + mov r11d, eax + jl L_AES_XTS_encrypt_update_avx1_done_64 + and r11d, 4294967232 +L_AES_XTS_encrypt_update_avx1_enc_64: + ; 64 bytes of input + ; aes_enc_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vmovdqu xmm9, OWORD PTR [rcx+16] + vmovdqu xmm10, OWORD PTR [rcx+32] + vmovdqu xmm11, OWORD PTR [rcx+48] + vpsrad xmm4, xmm0, 31 + vpslld xmm1, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm1, xmm1, xmm4 + vpsrad xmm4, xmm1, 31 + vpslld xmm2, xmm1, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm2, xmm2, xmm4 + vpsrad xmm4, xmm2, 31 + vpslld xmm3, xmm2, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm3, xmm3, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + ; aes_enc_block + vmovdqu xmm4, OWORD PTR [r10] + vpxor xmm8, xmm8, xmm4 + vpxor xmm9, xmm9, xmm4 + vpxor xmm10, xmm10, xmm4 + vpxor xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+16] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+32] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+48] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+64] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+80] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+96] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+112] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+128] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+144] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + cmp r9d, 11 + vmovdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+176] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + cmp r9d, 13 + vmovdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+208] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_avx1_aes_enc_64_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm4 + vaesenclast xmm9, xmm9, xmm4 + vaesenclast xmm10, xmm10, xmm4 + vaesenclast xmm11, xmm11, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + vmovdqu OWORD PTR [rdx], xmm8 + vmovdqu OWORD PTR [rdx+16], xmm9 + vmovdqu OWORD PTR [rdx+32], xmm10 + vmovdqu OWORD PTR [rdx+48], xmm11 + vpsrad xmm4, xmm3, 31 + vpslld xmm0, xmm3, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_avx1_enc_64 +L_AES_XTS_encrypt_update_avx1_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_encrypt_update_avx1_done_enc + sub r11d, r12d + cmp r11d, 16 + mov r11d, eax + jl L_AES_XTS_encrypt_update_avx1_last_15 + and r11d, 4294967280 + ; 16 bytes of input +L_AES_XTS_encrypt_update_avx1_enc_16: + lea rcx, QWORD PTR [rdi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm0 + ; aes_enc_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesenc xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesenc xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_avx1_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesenc xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_avx1_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 + vpsrad xmm4, xmm0, 31 + vpslld xmm0, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_encrypt_update_avx1_enc_16 + cmp r12d, eax + je L_AES_XTS_encrypt_update_avx1_done_enc +L_AES_XTS_encrypt_update_avx1_last_15: + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + add r12, 16 + vmovdqu OWORD PTR [rsp], xmm8 + xor rdx, rdx +L_AES_XTS_encrypt_update_avx1_last_15_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_encrypt_update_avx1_last_15_byte_loop + sub r12, rdx + vmovdqu xmm8, OWORD PTR [rsp] + sub r12, 16 + vpxor xmm8, xmm8, xmm0 + ; aes_enc_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesenc xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesenc xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesenc xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_encrypt_update_avx1_last_15_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_encrypt_update_avx1_done_enc: + vmovdqu OWORD PTR [r8], xmm0 + vmovdqu xmm6, OWORD PTR [rsp+64] + vmovdqu xmm7, OWORD PTR [rsp+80] + vmovdqu xmm8, OWORD PTR [rsp+96] + vmovdqu xmm9, OWORD PTR [rsp+112] + vmovdqu xmm10, OWORD PTR [rsp+128] + vmovdqu xmm11, OWORD PTR [rsp+144] + vmovdqu xmm12, OWORD PTR [rsp+160] + add rsp, 176 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_encrypt_update_avx1 ENDP +_text ENDS +_text SEGMENT READONLY PARA AES_XTS_decrypt_avx1 PROC push rdi push rsi @@ -1457,7 +2474,6 @@ L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last: lea rcx, QWORD PTR [rsi+r13] vmovdqu OWORD PTR [rcx], xmm8 L_AES_XTS_decrypt_avx1_done_dec: - vzeroupper vmovdqu xmm6, OWORD PTR [rsp+16] vmovdqu xmm7, OWORD PTR [rsp+32] vmovdqu xmm8, OWORD PTR [rsp+48] @@ -1473,5 +2489,344 @@ L_AES_XTS_decrypt_avx1_done_dec: ret AES_XTS_decrypt_avx1 ENDP _text ENDS +_text SEGMENT READONLY PARA +AES_XTS_decrypt_update_avx1 PROC + push rdi + push rsi + push r12 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r10, r9 + mov r8, QWORD PTR [rsp+64] + mov r9d, DWORD PTR [rsp+72] + sub rsp, 128 + vmovdqu OWORD PTR [rsp+16], xmm6 + vmovdqu OWORD PTR [rsp+32], xmm7 + vmovdqu OWORD PTR [rsp+48], xmm8 + vmovdqu OWORD PTR [rsp+64], xmm9 + vmovdqu OWORD PTR [rsp+80], xmm10 + vmovdqu OWORD PTR [rsp+96], xmm11 + vmovdqu OWORD PTR [rsp+112], xmm12 + vmovdqu xmm12, OWORD PTR L_avx1_aes_xts_gc_xts + vmovdqu xmm0, OWORD PTR [r8] + xor r12d, r12d + mov r11d, eax + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_avx1_mul16_64 + sub r11d, 16 + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_avx1_last_31_start +L_AES_XTS_decrypt_update_avx1_mul16_64: + cmp r11d, 64 + jl L_AES_XTS_decrypt_update_avx1_done_64 + and r11d, 4294967232 +L_AES_XTS_decrypt_update_avx1_dec_64: + ; 64 bytes of input + ; aes_dec_64 + lea rcx, QWORD PTR [rdi+r12] + lea rdx, QWORD PTR [rsi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vmovdqu xmm9, OWORD PTR [rcx+16] + vmovdqu xmm10, OWORD PTR [rcx+32] + vmovdqu xmm11, OWORD PTR [rcx+48] + vpsrad xmm4, xmm0, 31 + vpslld xmm1, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm1, xmm1, xmm4 + vpsrad xmm4, xmm1, 31 + vpslld xmm2, xmm1, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm2, xmm2, xmm4 + vpsrad xmm4, xmm2, 31 + vpslld xmm3, xmm2, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm3, xmm3, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + ; aes_dec_block + vmovdqu xmm4, OWORD PTR [r10] + vpxor xmm8, xmm8, xmm4 + vpxor xmm9, xmm9, xmm4 + vpxor xmm10, xmm10, xmm4 + vpxor xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + cmp r9d, 11 + vmovdqu xmm4, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + cmp r9d, 13 + vmovdqu xmm4, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_aes_dec_64_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm4 + vaesdeclast xmm9, xmm9, xmm4 + vaesdeclast xmm10, xmm10, xmm4 + vaesdeclast xmm11, xmm11, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + vmovdqu OWORD PTR [rdx], xmm8 + vmovdqu OWORD PTR [rdx+16], xmm9 + vmovdqu OWORD PTR [rdx+32], xmm10 + vmovdqu OWORD PTR [rdx+48], xmm11 + vpsrad xmm4, xmm3, 31 + vpslld xmm0, xmm3, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 64 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_avx1_dec_64 +L_AES_XTS_decrypt_update_avx1_done_64: + cmp r12d, eax + mov r11d, eax + je L_AES_XTS_decrypt_update_avx1_done_dec + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_update_avx1_mul16 + sub r11d, 16 + sub r11d, r12d + cmp r11d, 16 + jl L_AES_XTS_decrypt_update_avx1_last_31_start + add r11d, r12d +L_AES_XTS_decrypt_update_avx1_mul16: +L_AES_XTS_decrypt_update_avx1_dec_16: + ; 16 bytes of input + lea rcx, QWORD PTR [rdi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm0 + ; aes_dec_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 + vpsrad xmm4, xmm0, 31 + vpslld xmm0, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r12d, 16 + cmp r12d, r11d + jl L_AES_XTS_decrypt_update_avx1_dec_16 + cmp r12d, eax + je L_AES_XTS_decrypt_update_avx1_done_dec +L_AES_XTS_decrypt_update_avx1_last_31_start: + vpsrad xmm4, xmm0, 31 + vpslld xmm7, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm7, xmm7, xmm4 + lea rcx, QWORD PTR [rdi+r12] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm7 + ; aes_dec_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_last_31_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm7 + vmovdqu OWORD PTR [rsp], xmm8 + add r12, 16 + xor rdx, rdx +L_AES_XTS_decrypt_update_avx1_last_31_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r12] + mov BYTE PTR [rsi+r12], r11b + mov BYTE PTR [rsp+rdx], cl + inc r12d + inc edx + cmp r12d, eax + jl L_AES_XTS_decrypt_update_avx1_last_31_byte_loop + sub r12, rdx + vmovdqu xmm8, OWORD PTR [rsp] + vpxor xmm8, xmm8, xmm0 + ; aes_dec_block + vpxor xmm8, xmm8, [r10] + vmovdqu xmm5, OWORD PTR [r10+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r10+144] + vaesdec xmm8, xmm8, xmm5 + cmp r9d, 11 + vmovdqu xmm5, OWORD PTR [r10+160] + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+176] + vaesdec xmm8, xmm8, xmm6 + cmp r9d, 13 + vmovdqu xmm5, OWORD PTR [r10+192] + jl L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r10+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r10+224] +L_AES_XTS_decrypt_update_avx1_last_31_2_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + sub r12, 16 + lea rcx, QWORD PTR [rsi+r12] + vmovdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_decrypt_update_avx1_done_dec: + vmovdqu OWORD PTR [r8], xmm0 + vmovdqu xmm6, OWORD PTR [rsp+16] + vmovdqu xmm7, OWORD PTR [rsp+32] + vmovdqu xmm8, OWORD PTR [rsp+48] + vmovdqu xmm9, OWORD PTR [rsp+64] + vmovdqu xmm10, OWORD PTR [rsp+80] + vmovdqu xmm11, OWORD PTR [rsp+96] + vmovdqu xmm12, OWORD PTR [rsp+112] + add rsp, 128 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_decrypt_update_avx1 ENDP +_text ENDS ENDIF END diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index e1bde7b1b..d67acc32e 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -9863,6 +9863,18 @@ static wc_test_ret_t aes_xts_128_test(void) wc_test_ret_t ret = 0; unsigned char buf[AES_BLOCK_SIZE * 2 + 8]; unsigned char cipher[AES_BLOCK_SIZE * 2 + 8]; +#ifdef WOLFSSL_AESXTS_STREAM + unsigned char i_copy[AES_BLOCK_SIZE]; +#endif +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + #define LARGE_XTS_SZ 1024 + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + byte* large_input = NULL; + #else + byte large_input[LARGE_XTS_SZ]; + #endif +#endif /* 128 key tests */ WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { @@ -9987,6 +9999,34 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(c2, buf, sizeof(c2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i2, sizeof(i2)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p2 + AES_BLOCK_SIZE, sizeof(p2) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10015,6 +10055,27 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p1, sizeof(p1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c1, buf, sizeof(c1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10040,6 +10101,27 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(cp2, cipher, sizeof(cp2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, pp, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(cp2, buf, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10073,6 +10155,27 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(pp, buf, sizeof(pp))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, cipher, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10104,9 +10207,30 @@ static wc_test_ret_t aes_xts_128_test(void) #endif if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - if (XMEMCMP(p1, buf, AES_BLOCK_SIZE)) + if (XMEMCMP(p1, buf, sizeof(p1))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c1, sizeof(c1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ defined(WC_C_DYNAMIC_FALLBACK) WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); @@ -10176,6 +10300,34 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(c3, buf, sizeof(c3))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p3 + AES_BLOCK_SIZE, sizeof(p3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c3, buf, sizeof(c3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + ret = wc_AesXtsSetKeyNoInit(aes, k3, sizeof(k3), AES_DECRYPTION); if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); @@ -10192,21 +10344,47 @@ static wc_test_ret_t aes_xts_128_test(void) if (XMEMCMP(p3, buf, sizeof(p3))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf + AES_BLOCK_SIZE, c3 + AES_BLOCK_SIZE, sizeof(c3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p3, buf, sizeof(p3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + #endif /* !HAVE_FIPS */ #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ !defined(WOLFSSL_AFALG) { - #define LARGE_XTS_SZ 1024 - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) - byte* large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, - DYNAMIC_TYPE_TMP_BUFFER); - #else - byte large_input[LARGE_XTS_SZ]; - #endif int i; int j; +#ifdef WOLFSSL_AESXTS_STREAM + int k; +#endif #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); if (large_input == NULL) ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); #endif @@ -10247,9 +10425,108 @@ static wc_test_ret_t aes_xts_128_test(void) } } } - #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) - XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); - #endif + +#ifdef WOLFSSL_AESXTS_STREAM + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } +#endif /* WOLFSSL_AESXTS_STREAM */ } #endif /* !BENCH_EMBEDDED && !HAVE_CAVIUM && * !WOLFSSL_AFALG @@ -10257,6 +10534,13 @@ static wc_test_ret_t aes_xts_128_test(void) out: + #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_NO_MALLOC) + if (large_input) + XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif + if (aes_inited) wc_AesXtsFree(aes); @@ -10269,6 +10553,704 @@ static wc_test_ret_t aes_xts_128_test(void) } #endif /* WOLFSSL_AES_128 */ +#ifdef WOLFSSL_AES_192 +static wc_test_ret_t aes_xts_192_test(void) +{ +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + XtsAes *aes = NULL; +#else + XtsAes aes[1]; +#endif + int aes_inited = 0; + wc_test_ret_t ret = 0; + unsigned char buf[AES_BLOCK_SIZE * 2 + 8]; + unsigned char cipher[AES_BLOCK_SIZE * 2 + 8]; +#ifdef WOLFSSL_AESXTS_STREAM + unsigned char i_copy[AES_BLOCK_SIZE]; +#endif +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + #define LARGE_XTS_SZ 1024 + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + byte* large_input = NULL; + #else + byte large_input[LARGE_XTS_SZ]; + #endif +#endif + + /* 192 bit key tests */ + WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { + 0x1e, 0xa6, 0x61, 0xc5, 0x8d, 0x94, 0x3a, 0x0e, + 0x48, 0x01, 0xe4, 0x2f, 0x4b, 0x09, 0x47, 0x14, + 0x9e, 0x7f, 0x9f, 0x8e, 0x3e, 0x68, 0xd0, 0xc7, + 0x50, 0x52, 0x10, 0xbd, 0x31, 0x1a, 0x0e, 0x7c, + 0xd6, 0xe1, 0x3f, 0xfd, 0xf2, 0x41, 0x8d, 0x8d, + 0x19, 0x11, 0xc0, 0x04, 0xcd, 0xa5, 0x8d, 0xa3 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char i1[] = { + 0x4f, 0xae, 0xf7, 0x11, 0x7c, 0xda, 0x59, 0xc6, + 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char p1[] = { + 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d, + 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char pp[] = { + 0xeb, 0xab, 0xce, 0x95, 0xb1, 0x4d, 0x3c, 0x8d, + 0x6f, 0xb3, 0x50, 0x39, 0x07, 0x90, 0x31, 0x1c, + 0x6e, 0x4b, 0x92, 0x01, 0x3e, 0x76, 0x8a, 0xd5 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char c1[] = { + 0x65, 0x37, 0x15, 0x53, 0xf1, 0x98, 0xab, 0xb4, + 0xdb, 0x4e, 0xd3, 0x69, 0xdf, 0x8e, 0x3a, 0xe0 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char k2[] = { + 0xad, 0x50, 0x4b, 0x85, 0xd7, 0x51, 0xbf, 0xba, + 0x69, 0x13, 0xb4, 0xcc, 0x79, 0xb6, 0x5a, 0x62, + 0xf7, 0xf3, 0x9d, 0x36, 0x0f, 0x35, 0xb5, 0xec, + 0x4a, 0x7e, 0x95, 0xbd, 0x9b, 0xa5, 0xf2, 0xec, + 0xc1, 0xd7, 0x7e, 0xa3, 0xc3, 0x74, 0xbd, 0x4b, + 0x13, 0x1b, 0x07, 0x83, 0x87, 0xdd, 0x55, 0x5a + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char i2[] = { + 0x5c, 0xf7, 0x9d, 0xb6, 0xc5, 0xcd, 0x99, 0x1a, + 0x1c, 0x78, 0x81, 0x42, 0x24, 0x95, 0x1e, 0x84 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char p2[] = { + 0xbd, 0xc5, 0x46, 0x8f, 0xbc, 0x8d, 0x50, 0xa1, + 0x0d, 0x1c, 0x85, 0x7f, 0x79, 0x1c, 0x5c, 0xba, + 0xb3, 0x81, 0x0d, 0x0d, 0x73, 0xcf, 0x8f, 0x20, + 0x46, 0xb1, 0xd1, 0x9e, 0x7d, 0x5d, 0x8a, 0x56 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char c2[] = { + 0x6c, 0xa6, 0xb5, 0x73, 0x48, 0xf1, 0x89, 0xfa, + 0xdd, 0x80, 0x72, 0x1f, 0xb8, 0x56, 0x0c, 0xa2, + 0x35, 0xd4, 0x08, 0xbf, 0x24, 0xcb, 0xec, 0xdb, + 0x81, 0xe0, 0xe6, 0x4f, 0x3d, 0x1c, 0x5c, 0x46 + }; + + WOLFSSL_SMALL_STACK_STATIC unsigned char cp2[] = { + 0xe9, 0x58, 0xfe, 0xab, 0x66, 0xb4, 0xf1, 0x79, + 0x91, 0x3f, 0x91, 0xdc, 0x6f, 0xdf, 0xd6, 0xac, + 0x65, 0x37, 0x15, 0x53, 0xf1, 0x98, 0xab, 0xb4 + }; + +#ifndef HAVE_FIPS /* FIPS requires different keys for main and tweak. */ + WOLFSSL_SMALL_STACK_STATIC unsigned char k3[] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 + }; + WOLFSSL_SMALL_STACK_STATIC unsigned char i3[] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + }; + WOLFSSL_SMALL_STACK_STATIC unsigned char p3[] = { + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0xff, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 + }; + WOLFSSL_SMALL_STACK_STATIC unsigned char c3[] = { + 0xa4, 0xf2, 0x71, 0x5d, 0x80, 0x60, 0x68, 0xa0, + 0x80, 0x61, 0xd7, 0xc1, 0x55, 0xc8, 0x3a, 0x2e, + 0xd7, 0xf4, 0x62, 0xaf, 0xbd, 0x2d, 0xf9, 0x5f, + 0xe8, 0xc5, 0x99, 0x3d, 0x58, 0x3c, 0xeb, 0xba, + 0x86, 0xea, 0x2c, 0x7e, 0x1f, 0xba, 0x81, 0xde + }; +#endif /* HAVE_FIPS */ + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + if ((aes = (XtsAes *)XMALLOC(sizeof *aes, HEAP_HINT, DYNAMIC_TYPE_AES)) == NULL) + ERROR_OUT(WC_TEST_RET_ENC_ERRNO, out); +#endif + + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsInit(aes, HEAP_HINT, devId); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + else + aes_inited = 1; + + ret = wc_AesXtsSetKeyNoInit(aes, k2, sizeof(k2), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, buf, p2, sizeof(p2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i2, sizeof(i2)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p2 + AES_BLOCK_SIZE, sizeof(p2) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + ret = wc_AesXtsEncrypt(aes, buf, p2, sizeof(p2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + XMEMSET(buf, 0, sizeof(buf)); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsEncrypt(aes, buf, p1, sizeof(p1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p1, sizeof(p1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c1, buf, sizeof(c1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + ret = wc_AesXtsEncrypt(aes, buf, p1, sizeof(p1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* partial block encryption test */ + XMEMSET(cipher, 0, sizeof(cipher)); + ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(cp2, cipher, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, pp, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(cp2, buf, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + XMEMSET(cipher, 0, sizeof(cipher)); + ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(cp2, cipher, sizeof(cp2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* partial block decrypt test */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, buf, cipher, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, cipher, sizeof(pp), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, cipher, sizeof(pp), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(pp, buf, sizeof(pp))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* NIST decrypt test vector */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, c1, sizeof(c1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c1, sizeof(c1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#if defined(DEBUG_VECTOR_REGISTER_ACCESS_AESXTS) && \ + defined(WC_C_DYNAMIC_FALLBACK) + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(SYSLIB_FAILED_E); + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, c1, sizeof(c1), i1, sizeof(i1)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(0); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p1, buf, AES_BLOCK_SIZE)) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif + + /* fail case with decrypting using wrong key */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsDecrypt(aes, buf, c2, sizeof(c2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p2, buf, sizeof(p2)) == 0) /* fail case with wrong key */ + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + + /* set correct key and retest */ + XMEMSET(buf, 0, sizeof(buf)); + ret = wc_AesXtsSetKeyNoInit(aes, k2, sizeof(k2), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, buf, c2, sizeof(c2), i2, sizeof(i2)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p2, buf, sizeof(p2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifndef HAVE_FIPS + + /* Test ciphertext stealing in-place. */ + XMEMCPY(buf, p3, sizeof(p3)); + ret = wc_AesXtsSetKeyNoInit(aes, k3, sizeof(k3), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, buf, buf, sizeof(p3), i3, sizeof(i3)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(c3, buf, sizeof(c3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p3 + AES_BLOCK_SIZE, sizeof(p3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c3, buf, sizeof(c3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + + ret = wc_AesXtsSetKeyNoInit(aes, k3, sizeof(k3), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, buf, buf, sizeof(c3), i3, sizeof(i3)); +#if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if (XMEMCMP(p3, buf, sizeof(p3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i3, sizeof(i3)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c3, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf + AES_BLOCK_SIZE, c3 + AES_BLOCK_SIZE, sizeof(c3) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p3, buf, sizeof(p3))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + +#endif /* !HAVE_FIPS */ + +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + { + int i; + int j; +#ifdef WOLFSSL_AESXTS_STREAM + int k; +#endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if (large_input == NULL) + ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); + #endif + + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + +#ifdef WOLFSSL_AESXTS_STREAM + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } +#endif /* WOLFSSL_AESXTS_STREAM */ + } +#endif /* !BENCH_EMBEDDED && !HAVE_CAVIUM && + * !WOLFSSL_AFALG + */ + + out: + + #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_NO_MALLOC) + if (large_input) + XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif + + if (aes_inited) + wc_AesXtsFree(aes); + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + if (aes) + XFREE(aes, HEAP_HINT, DYNAMIC_TYPE_AES); +#endif + + return ret; +} +#endif /* WOLFSSL_AES_192 */ + #ifdef WOLFSSL_AES_256 static wc_test_ret_t aes_xts_256_test(void) @@ -10282,6 +11264,18 @@ static wc_test_ret_t aes_xts_256_test(void) wc_test_ret_t ret = 0; unsigned char buf[AES_BLOCK_SIZE * 3]; unsigned char cipher[AES_BLOCK_SIZE * 3]; +#ifdef WOLFSSL_AESXTS_STREAM + unsigned char i_copy[AES_BLOCK_SIZE]; +#endif +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + #define LARGE_XTS_SZ 1024 + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + byte* large_input = NULL; + #else + byte large_input[LARGE_XTS_SZ]; + #endif +#endif /* 256 key tests */ WOLFSSL_SMALL_STACK_STATIC unsigned char k1[] = { @@ -10389,6 +11383,34 @@ static wc_test_ret_t aes_xts_256_test(void) if (XMEMCMP(c2, buf, sizeof(c2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i2, sizeof(i2)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p2, AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf + AES_BLOCK_SIZE, p2 + AES_BLOCK_SIZE, sizeof(p2) - AES_BLOCK_SIZE, i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c2, buf, sizeof(c2))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + XMEMSET(buf, 0, sizeof(buf)); ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); if (ret != 0) @@ -10402,6 +11424,27 @@ static wc_test_ret_t aes_xts_256_test(void) if (XMEMCMP(c1, buf, AES_BLOCK_SIZE)) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncryptUpdate(aes, buf, p1, sizeof(p1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(c1, buf, sizeof(c1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + /* partial block encryption test */ XMEMSET(cipher, 0, sizeof(cipher)); ret = wc_AesXtsEncrypt(aes, cipher, pp, sizeof(pp), i1, sizeof(i1)); @@ -10441,9 +11484,30 @@ static wc_test_ret_t aes_xts_256_test(void) #endif if (ret != 0) ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); - if (XMEMCMP(p1, buf, AES_BLOCK_SIZE)) + if (XMEMCMP(p1, buf, sizeof(p1))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#ifdef WOLFSSL_AESXTS_STREAM + XMEMCPY(i_copy, i1, sizeof(i1)); + + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsDecryptUpdate(aes, buf, c1, sizeof(c1), i_copy); +#if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); +#endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + if (XMEMCMP(p1, buf, sizeof(p1))) + ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#endif /* WOLFSSL_AESXTS_STREAM */ + XMEMSET(buf, 0, sizeof(buf)); ret = wc_AesXtsSetKeyNoInit(aes, k2, sizeof(k2), AES_DECRYPTION); if (ret != 0) @@ -10461,8 +11525,172 @@ static wc_test_ret_t aes_xts_256_test(void) if (XMEMCMP(p2, buf, sizeof(p2))) ERROR_OUT(WC_TEST_RET_ENC_NC, out); +#if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) + { + int i; + int j; +#ifdef WOLFSSL_AESXTS_STREAM + int k; +#endif + #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) + large_input = (byte *)XMALLOC(LARGE_XTS_SZ, HEAP_HINT, + DYNAMIC_TYPE_TMP_BUFFER); + if (large_input == NULL) + ERROR_OUT(WC_TEST_RET_ENC_EC(MEMORY_E), out); + #endif + + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsEncrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, large_input, large_input, j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + +#ifdef WOLFSSL_AESXTS_STREAM + for (i = 0; i < (int)LARGE_XTS_SZ; i++) + large_input[i] = (byte)i; + + /* first, encrypt block by block then decrypt with a one-shot call. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsEncryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsEncryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + ret = wc_AesXtsDecrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } + + /* second, encrypt with a one-shot call then decrypt block by block. */ + for (j = 16; j < (int)LARGE_XTS_SZ; j++) { + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_ENCRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsEncrypt(aes, large_input, large_input, (word32)j, i1, + sizeof(i1)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + ret = wc_AesXtsSetKeyNoInit(aes, k1, sizeof(k1), AES_DECRYPTION); + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + XMEMCPY(i_copy, i1, sizeof(i1)); + ret = wc_AesXtsDecryptInit(aes, i_copy, sizeof(i_copy)); + #if defined(WOLFSSL_ASYNC_CRYPT) + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + + for (k = 0; k < j; k += AES_BLOCK_SIZE) { + ret = wc_AesXtsDecryptUpdate(aes, large_input + k, large_input + k, (j - k) < AES_BLOCK_SIZE*2 ? j - k : AES_BLOCK_SIZE, i_copy); + #if defined(WOLFSSL_ASYNC_CRYPT) + #ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS + ret = wc_AsyncWait(ret, &aes->aes_decrypt.asyncDev, + WC_ASYNC_FLAG_NONE); + #else + ret = wc_AsyncWait(ret, &aes->aes.asyncDev, WC_ASYNC_FLAG_NONE); + #endif + #endif + if (ret != 0) + ERROR_OUT(WC_TEST_RET_ENC_EC(ret), out); + if ((j - k) < AES_BLOCK_SIZE*2) + break; + } + + for (i = 0; i < j; i++) { + if (large_input[i] != (byte)i) { + ERROR_OUT(WC_TEST_RET_ENC_NC, out); + } + } + } +#endif /* WOLFSSL_AESXTS_STREAM */ + } +#endif /* !BENCH_EMBEDDED && !HAVE_CAVIUM && + * !WOLFSSL_AFALG + */ + out: + #if !defined(BENCH_EMBEDDED) && !defined(HAVE_CAVIUM) && \ + !defined(WOLFSSL_AFALG) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_NO_MALLOC) + if (large_input) + XFREE(large_input, HEAP_HINT, DYNAMIC_TYPE_TMP_BUFFER); + #endif + if (aes_inited) wc_AesXtsFree(aes); @@ -12348,6 +13576,13 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t aes_xts_test(void) if (ret != 0) return ret; #endif + + #ifdef WOLFSSL_AES_192 + ret = aes_xts_192_test(); + if (ret != 0) + return ret; + #endif + #ifdef WOLFSSL_AES_256 ret = aes_xts_256_test(); if (ret != 0) @@ -34037,7 +35272,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) { /* Run tests for some rare code paths */ /* sig is exactly equal to the order */ - const byte rareEd1[] = { + static const byte rareEd1[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -34048,7 +35283,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; /* sig is larger than the order before we get to the low part */ - const byte rareEd2[] = { + static const byte rareEd2[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -34059,7 +35294,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x10 }; /* sig is larger than the order in the low part */ - const byte rareEd3[] = { + static const byte rareEd3[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -34070,7 +35305,7 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t ed25519_test(void) 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; /* sig is smaller than the order */ - const byte rareEd4[] = { + static const byte rareEd4[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index 81d135b0a..772e6e8eb 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -669,6 +669,20 @@ WOLFSSL_API int wc_AesXtsDecryptConsecutiveSectors(XtsAes* aes, byte* out, const byte* in, word32 sz, word64 sector, word32 sectorSz); +#ifdef WOLFSSL_AESXTS_STREAM + +WOLFSSL_API int wc_AesXtsEncryptInit(XtsAes* aes, byte* i, word32 iSz); + +WOLFSSL_API int wc_AesXtsDecryptInit(XtsAes* aes, byte* i, word32 iSz); + +WOLFSSL_API int wc_AesXtsEncryptUpdate(XtsAes* aes, byte* out, + const byte* in, word32 sz, byte *i); + +WOLFSSL_API int wc_AesXtsDecryptUpdate(XtsAes* aes, byte* out, + const byte* in, word32 sz, byte *i); + +#endif /* WOLFSSL_AESXTS_STREAM */ + WOLFSSL_API int wc_AesXtsFree(XtsAes* aes); #endif