diff --git a/configure.ac b/configure.ac index 22aa8b22c..1800ba856 100644 --- a/configure.ac +++ b/configure.ac @@ -1300,6 +1300,11 @@ AC_ARG_ENABLE([aesgcm], [ ENABLED_AESGCM=$enableval ], [ ENABLED_AESGCM=yes ] ) +AC_ARG_ENABLE([aesgcm-stream], + [AS_HELP_STRING([--enable-aesgcm-stream],[Enable wolfSSL AES-GCM support with streaming APIs (default: enabled)])], + [ ENABLED_AESGCM_STREAM=$enableval ], + [ ENABLED_AESGCM_STREAM=no ] + ) # leanpsk and leantls don't need gcm if test "$ENABLED_LEANPSK" = "yes" || ( test "$ENABLED_LEANTLS" = "yes" && @@ -5979,6 +5984,16 @@ then AM_CFLAGS="$AM_CFLAGS -DHAVE_AESGCM" fi +if test "$ENABLED_AESGCM_STREAM" != "no" +then + if test "$ENABLED_AESGCM" = "no" + then + AC_MSG_ERROR([AES-GCM streaming enabled but AES-GCM is disabled]) + else + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_AESGCM_STREAM" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_AESGCM_STREAM" + fi +fi AS_IF([test "x$ENABLED_MAXSTRENGTH" = "xyes"], diff --git a/tests/api.c b/tests/api.c index 37cdc2bd4..dde3d5406 100644 --- a/tests/api.c +++ b/tests/api.c @@ -11614,6 +11614,208 @@ static int test_wc_AesCmacGenerate (void) } /* END test_wc_AesCmacGenerate */ +/* + * Testing streaming AES-GCM API. + */ +static int test_wc_AesGcmStream (void) +{ + int ret = 0; +#if !defined(NO_AES) && defined(WOLFSSL_AES_128) && defined(HAVE_AESGCM) && \ + defined(WOLFSSL_AESGCM_STREAM) + + int i; + WC_RNG rng[1]; + Aes aesEnc[1]; + Aes aesDec[1]; + byte tag[AES_BLOCK_SIZE]; + byte in[AES_BLOCK_SIZE * 3 + 2] = { 0, }; + byte out[AES_BLOCK_SIZE * 3 + 2]; + byte plain[AES_BLOCK_SIZE * 3 + 2]; + byte aad[AES_BLOCK_SIZE * 3 + 2] = { 0, }; + byte key[AES_128_KEY_SIZE] = { 0, }; + byte iv[AES_IV_SIZE] = { 1, }; + byte ivOut[AES_IV_SIZE]; + static const byte expTagAAD1[AES_BLOCK_SIZE] = { + 0x6c, 0x35, 0xe6, 0x7f, 0x59, 0x9e, 0xa9, 0x2f, + 0x27, 0x2d, 0x5f, 0x8e, 0x7e, 0x42, 0xd3, 0x05 + }; + static const byte expTagPlain1[AES_BLOCK_SIZE] = { + 0x24, 0xba, 0x57, 0x95, 0xd0, 0x27, 0x9e, 0x78, + 0x3a, 0x88, 0x4c, 0x0a, 0x5d, 0x50, 0x23, 0xd1 + }; + static const byte expTag[AES_BLOCK_SIZE] = { + 0x22, 0x91, 0x70, 0xad, 0x42, 0xc3, 0xad, 0x96, + 0xe0, 0x31, 0x57, 0x60, 0xb7, 0x92, 0xa3, 0x6d + }; + + /* Create a random for generating IV/nonce. */ + AssertIntEQ(wc_InitRng(rng), 0); + + /* Initialize data structures. */ + AssertIntEQ(wc_AesInit(aesEnc, NULL, INVALID_DEVID), 0); + AssertIntEQ(wc_AesInit(aesDec, NULL, INVALID_DEVID), 0); + + /* BadParameters to streaming init. */ + AssertIntEQ(wc_AesGcmEncryptInit(NULL, NULL, 0, NULL, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptInit(NULL, NULL, 0, NULL, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptInit(aesEnc, NULL, AES_128_KEY_SIZE, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptInit(aesEnc, NULL, 0, NULL, GCM_NONCE_MID_SZ), + BAD_FUNC_ARG); + + /* Bad parameters to encrypt update. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(NULL, NULL, NULL, 0, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, NULL, 1, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, in, 1, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, out, NULL, 1, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, NULL, 0, NULL, 1), + BAD_FUNC_ARG); + /* Bad parameters to decrypt update. */ + AssertIntEQ(wc_AesGcmDecryptUpdate(NULL, NULL, NULL, 0, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, NULL, 1, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, in, 1, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, out, NULL, 1, NULL, 0), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, NULL, 0, NULL, 1), + BAD_FUNC_ARG); + + /* Bad parameters to encrypt final. */ + AssertIntEQ(wc_AesGcmEncryptFinal(NULL, NULL, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptFinal(NULL, tag, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptFinal(NULL, NULL, AES_BLOCK_SIZE), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, NULL, AES_BLOCK_SIZE), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, AES_BLOCK_SIZE + 1), + BAD_FUNC_ARG); + /* Bad parameters to decrypt final. */ + AssertIntEQ(wc_AesGcmDecryptFinal(NULL, NULL, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptFinal(NULL, tag, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptFinal(NULL, NULL, AES_BLOCK_SIZE), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, 0), BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, NULL, AES_BLOCK_SIZE), + BAD_FUNC_ARG); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, AES_BLOCK_SIZE + 1), + BAD_FUNC_ARG); + + /* Check calling final before setting key fails. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, sizeof(tag)), MISSING_KEY); + AssertIntEQ(wc_AesGcmEncryptFinal(aesDec, tag, sizeof(tag)), MISSING_KEY); + /* Check calling update before setting key else fails. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, NULL, 0, aad, 1), + MISSING_KEY); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, NULL, 0, aad, 1), + MISSING_KEY); + + /* Set key but not IV. */ + AssertIntEQ(wc_AesGcmInit(aesEnc, key, sizeof(key), NULL, 0), 0); + AssertIntEQ(wc_AesGcmInit(aesDec, key, sizeof(key), NULL, 0), 0); + /* Check calling final before setting IV fails. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, sizeof(tag)), MISSING_IV); + AssertIntEQ(wc_AesGcmEncryptFinal(aesDec, tag, sizeof(tag)), MISSING_IV); + /* Check calling update before setting IV else fails. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, NULL, 0, aad, 1), + MISSING_IV); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, NULL, 0, aad, 1), + MISSING_IV); + + /* Set IV using fixed part IV and external IV APIs. */ + AssertIntEQ(wc_AesGcmSetIV(aesEnc, GCM_NONCE_MID_SZ, iv, AES_IV_FIXED_SZ, + rng), 0); + AssertIntEQ(wc_AesGcmEncryptInit_ex(aesEnc, NULL, 0, ivOut, + GCM_NONCE_MID_SZ), 0); + AssertIntEQ(wc_AesGcmSetExtIV(aesDec, ivOut, GCM_NONCE_MID_SZ), 0); + AssertIntEQ(wc_AesGcmInit(aesDec, NULL, 0, NULL, 0), 0); + /* Encrypt and decrypt data. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, out, in, 1, aad, 1), 0); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, plain, out, 1, aad, 1), 0); + AssertIntEQ(XMEMCMP(plain, in, 1), 0); + /* Finalize and check tag matches. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, AES_BLOCK_SIZE), 0); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, AES_BLOCK_SIZE), 0); + + /* Set key and IV through streaming init API. */ + AssertIntEQ(wc_AesGcmInit(aesEnc, key, sizeof(key), iv, AES_IV_SIZE), 0); + AssertIntEQ(wc_AesGcmInit(aesDec, key, sizeof(key), iv, AES_IV_SIZE), 0); + /* Encrypt/decrypt one block and AAD of one block. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, out, in, AES_BLOCK_SIZE, aad, + AES_BLOCK_SIZE), 0); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, plain, out, AES_BLOCK_SIZE, aad, + AES_BLOCK_SIZE), 0); + AssertIntEQ(XMEMCMP(plain, in, AES_BLOCK_SIZE), 0); + /* Finalize and check tag matches. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, AES_BLOCK_SIZE), 0); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, AES_BLOCK_SIZE), 0); + + /* Set key and IV through streaming init API. */ + AssertIntEQ(wc_AesGcmInit(aesEnc, key, sizeof(key), iv, AES_IV_SIZE), 0); + AssertIntEQ(wc_AesGcmInit(aesDec, key, sizeof(key), iv, AES_IV_SIZE), 0); + /* No data to encrypt/decrypt one byte of AAD. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, NULL, 0, aad, 1), 0); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, NULL, 0, aad, 1), 0); + /* Finalize and check tag matches. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, AES_BLOCK_SIZE), 0); + AssertIntEQ(XMEMCMP(tag, expTagAAD1, AES_BLOCK_SIZE), 0); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, AES_BLOCK_SIZE), 0); + + /* Set key and IV through streaming init API. */ + AssertIntEQ(wc_AesGcmInit(aesEnc, key, sizeof(key), iv, AES_IV_SIZE), 0); + AssertIntEQ(wc_AesGcmInit(aesDec, key, sizeof(key), iv, AES_IV_SIZE), 0); + /* Encrypt/decrypt one byte and no AAD. */ + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, out, in, 1, NULL, 0), 0); + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, plain, out, 1, NULL, 0), 0); + AssertIntEQ(XMEMCMP(plain, in, 1), 0); + /* Finalize and check tag matches. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, AES_BLOCK_SIZE), 0); + AssertIntEQ(XMEMCMP(tag, expTagPlain1, AES_BLOCK_SIZE), 0); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, AES_BLOCK_SIZE), 0); + + /* Set key and IV through streaming init API. */ + AssertIntEQ(wc_AesGcmInit(aesEnc, key, sizeof(key), iv, AES_IV_SIZE), 0); + AssertIntEQ(wc_AesGcmInit(aesDec, key, sizeof(key), iv, AES_IV_SIZE), 0); + /* Encryption AES is one byte at a time */ + for (i = 0; i < (int)sizeof(aad); i++) { + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, NULL, NULL, 0, aad + i, 1), + 0); + } + for (i = 0; i < (int)sizeof(in); i++) { + AssertIntEQ(wc_AesGcmEncryptUpdate(aesEnc, out + i, in + i, 1, NULL, 0), + 0); + } + /* Decryption AES is two bytes at a time */ + for (i = 0; i < (int)sizeof(aad); i += 2) { + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, NULL, NULL, 0, aad + i, 2), + 0); + } + for (i = 0; i < (int)sizeof(aad); i += 2) { + AssertIntEQ(wc_AesGcmDecryptUpdate(aesDec, plain + i, out + i, 2, NULL, + 0), 0); + } + AssertIntEQ(XMEMCMP(plain, in, sizeof(in)), 0); + /* Finalize and check tag matches. */ + AssertIntEQ(wc_AesGcmEncryptFinal(aesEnc, tag, AES_BLOCK_SIZE), 0); + AssertIntEQ(XMEMCMP(tag, expTag, AES_BLOCK_SIZE), 0); + AssertIntEQ(wc_AesGcmDecryptFinal(aesDec, tag, AES_BLOCK_SIZE), 0); + + /* Check streaming encryption can be decrypted with one shot. */ + AssertIntEQ(wc_AesGcmSetKey(aesDec, key, sizeof(key)), 0); + AssertIntEQ(wc_AesGcmDecrypt(aesDec, plain, out, sizeof(in), iv, + AES_IV_SIZE, tag, AES_BLOCK_SIZE, aad, sizeof(aad)), 0); + AssertIntEQ(XMEMCMP(plain, in, sizeof(in)), 0); + +#endif + return ret; + +} /* END test_wc_AesGcmStream */ /* @@ -39748,15 +39950,19 @@ static void test_wolfssl_EVP_aes_gcm_AAD_2_parts(void) const byte iv[12] = { 0 }; const byte key[16] = { 0 }; const byte cleartext[16] = { 0 }; - const byte aad[] = {0x01, 0x10, 0x00, 0x2a, 0x08, 0x00, 0x04, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0xdc, 0x4d, - 0xad, 0x6b, 0x06, 0x93, 0x4f}; + const byte aad[] = { + 0x01, 0x10, 0x00, 0x2a, 0x08, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0xdc, 0x4d, 0xad, 0x6b, 0x06, 0x93, + 0x4f + }; byte out1Part[16]; byte outTag1Part[16]; byte out2Part[16]; byte outTag2Part[16]; byte decryptBuf[16]; int len; + int tlen; EVP_CIPHER_CTX* ctx = NULL; printf(testingFmt, "wolfssl_EVP_aes_gcm_AAD_2_parts"); @@ -39764,25 +39970,37 @@ static void test_wolfssl_EVP_aes_gcm_AAD_2_parts(void) /* ENCRYPT */ /* Send AAD and data in 1 part */ AssertNotNull(ctx = EVP_CIPHER_CTX_new()); - AssertIntEQ(EVP_EncryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), 1); + tlen = 0; + AssertIntEQ(EVP_EncryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), + 1); AssertIntEQ(EVP_EncryptInit_ex(ctx, NULL, NULL, key, iv), 1); AssertIntEQ(EVP_EncryptUpdate(ctx, NULL, &len, aad, sizeof(aad)), 1); - AssertIntEQ(EVP_EncryptUpdate(ctx, out1Part, &len, cleartext, sizeof(cleartext)), 1); + AssertIntEQ(EVP_EncryptUpdate(ctx, out1Part, &len, cleartext, + sizeof(cleartext)), 1); + tlen += len; AssertIntEQ(EVP_EncryptFinal_ex(ctx, out1Part, &len), 1); - AssertIntEQ(len, sizeof(cleartext)); - AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, 16, outTag1Part), 1); + tlen += len; + AssertIntEQ(tlen, sizeof(cleartext)); + AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, 16, + outTag1Part), 1); EVP_CIPHER_CTX_free(ctx); /* DECRYPT */ /* Send AAD and data in 1 part */ AssertNotNull(ctx = EVP_CIPHER_CTX_new()); - AssertIntEQ(EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), 1); + tlen = 0; + AssertIntEQ(EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), + 1); AssertIntEQ(EVP_DecryptInit_ex(ctx, NULL, NULL, key, iv), 1); AssertIntEQ(EVP_DecryptUpdate(ctx, NULL, &len, aad, sizeof(aad)), 1); - AssertIntEQ(EVP_DecryptUpdate(ctx, decryptBuf, &len, out1Part, sizeof(cleartext)), 1); - AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, 16, outTag1Part), 1); + AssertIntEQ(EVP_DecryptUpdate(ctx, decryptBuf, &len, out1Part, + sizeof(cleartext)), 1); + tlen += len; + AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, 16, + outTag1Part), 1); AssertIntEQ(EVP_DecryptFinal_ex(ctx, decryptBuf, &len), 1); - AssertIntEQ(len, sizeof(cleartext)); + tlen += len; + AssertIntEQ(tlen, sizeof(cleartext)); EVP_CIPHER_CTX_free(ctx); AssertIntEQ(XMEMCMP(decryptBuf, cleartext, len), 0); @@ -39790,16 +40008,23 @@ static void test_wolfssl_EVP_aes_gcm_AAD_2_parts(void) /* ENCRYPT */ /* Send AAD and data in 2 parts */ AssertNotNull(ctx = EVP_CIPHER_CTX_new()); - AssertIntEQ(EVP_EncryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), 1); + tlen = 0; + AssertIntEQ(EVP_EncryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), + 1); AssertIntEQ(EVP_EncryptInit_ex(ctx, NULL, NULL, key, iv), 1); AssertIntEQ(EVP_EncryptUpdate(ctx, NULL, &len, aad, 1), 1); - AssertIntEQ(EVP_EncryptUpdate(ctx, NULL, &len, aad + 1, sizeof(aad) - 1), 1); + AssertIntEQ(EVP_EncryptUpdate(ctx, NULL, &len, aad + 1, sizeof(aad) - 1), + 1); AssertIntEQ(EVP_EncryptUpdate(ctx, out2Part, &len, cleartext, 1), 1); - AssertIntEQ(EVP_EncryptUpdate(ctx, out2Part, &len, cleartext + 1, + tlen += len; + AssertIntEQ(EVP_EncryptUpdate(ctx, out2Part + tlen, &len, cleartext + 1, sizeof(cleartext) - 1), 1); - AssertIntEQ(EVP_EncryptFinal_ex(ctx, out2Part, &len), 1); - AssertIntEQ(len, sizeof(cleartext)); - AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, 16, outTag2Part), 1); + tlen += len; + AssertIntEQ(EVP_EncryptFinal_ex(ctx, out2Part + tlen, &len), 1); + tlen += len; + AssertIntEQ(tlen, sizeof(cleartext)); + AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, 16, + outTag2Part), 1); AssertIntEQ(XMEMCMP(out1Part, out2Part, sizeof(out1Part)), 0); AssertIntEQ(XMEMCMP(outTag1Part, outTag2Part, sizeof(outTag1Part)), 0); @@ -39808,16 +40033,23 @@ static void test_wolfssl_EVP_aes_gcm_AAD_2_parts(void) /* DECRYPT */ /* Send AAD and data in 2 parts */ AssertNotNull(ctx = EVP_CIPHER_CTX_new()); - AssertIntEQ(EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), 1); + tlen = 0; + AssertIntEQ(EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL), + 1); AssertIntEQ(EVP_DecryptInit_ex(ctx, NULL, NULL, key, iv), 1); AssertIntEQ(EVP_DecryptUpdate(ctx, NULL, &len, aad, 1), 1); - AssertIntEQ(EVP_DecryptUpdate(ctx, NULL, &len, aad + 1, sizeof(aad) - 1), 1); + AssertIntEQ(EVP_DecryptUpdate(ctx, NULL, &len, aad + 1, sizeof(aad) - 1), + 1); AssertIntEQ(EVP_DecryptUpdate(ctx, decryptBuf, &len, out1Part, 1), 1); - AssertIntEQ(EVP_DecryptUpdate(ctx, decryptBuf, &len, out1Part + 1, + tlen += len; + AssertIntEQ(EVP_DecryptUpdate(ctx, decryptBuf + tlen, &len, out1Part + 1, sizeof(cleartext) - 1), 1); - AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, 16, outTag1Part), 1); - AssertIntEQ(EVP_DecryptFinal_ex(ctx, decryptBuf, &len), 1); - AssertIntEQ(len, sizeof(cleartext)); + tlen += len; + AssertIntEQ(EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, 16, + outTag1Part), 1); + AssertIntEQ(EVP_DecryptFinal_ex(ctx, decryptBuf + tlen, &len), 1); + tlen += len; + AssertIntEQ(tlen, sizeof(cleartext)); AssertIntEQ(XMEMCMP(decryptBuf, cleartext, len), 0); @@ -39834,14 +40066,15 @@ static void test_wolfssl_EVP_aes_gcm_zeroLen(void) { /* Zero length plain text */ - byte key[] = - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + byte key[] = { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; /* align */ - byte iv[] = - {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; - /* align */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; /* align */ + byte iv[] = { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + }; /* align */ byte plaintxt[0]; int ivSz = 12; int plaintxtSz = 0; @@ -39861,7 +40094,8 @@ static void test_wolfssl_EVP_aes_gcm_zeroLen(void) AssertIntEQ(1, EVP_EncryptInit_ex(en, EVP_aes_256_gcm(), NULL, key, iv)); AssertIntEQ(1, EVP_CIPHER_CTX_ctrl(en, EVP_CTRL_GCM_SET_IVLEN, ivSz, NULL)); - AssertIntEQ(1, EVP_EncryptUpdate(en, ciphertxt, &ciphertxtSz , plaintxt, plaintxtSz)); + AssertIntEQ(1, EVP_EncryptUpdate(en, ciphertxt, &ciphertxtSz , plaintxt, + plaintxtSz)); AssertIntEQ(1, EVP_EncryptFinal_ex(en, ciphertxt, &len)); ciphertxtSz += len; AssertIntEQ(1, EVP_CIPHER_CTX_ctrl(en, EVP_CTRL_GCM_GET_TAG, 16, tag)); @@ -41833,6 +42067,7 @@ void ApiTest(void) AssertIntEQ(test_wc_CmacUpdate(), 0); AssertIntEQ(test_wc_CmacFinal(), 0); AssertIntEQ(test_wc_AesCmacGenerate(), 0); + AssertIntEQ(test_wc_AesGcmStream(), 0); AssertIntEQ(test_wc_Des3_SetIV(), 0); AssertIntEQ(test_wc_Des3_SetKey(), 0); diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index adea5980f..cee0a90c4 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -2394,10 +2394,23 @@ static void bench_aesgcm_internal(int doAsync, const byte* key, word32 keySz, /* while free pending slots in queue, submit ops */ for (i = 0; i < BENCH_MAX_PENDING; i++) { if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, numBlocks, &pending)) { +#ifndef BENCHMARK_AESGCM_STREAM ret = wc_AesGcmEncrypt(&enc[i], bench_cipher, bench_plain, BENCH_SIZE, iv, ivSz, bench_tag, AES_AUTH_TAG_SZ, bench_additional, aesAuthAddSz); +#else + ret = wc_AesGcmEncryptInit(&enc[i], NULL, 0, iv, ivSz); + if (ret == 0) { + ret = wc_AesGcmEncryptUpdate(&enc[i], bench_cipher, + bench_plain, BENCH_SIZE, bench_additional, + aesAuthAddSz); + } + if (ret == 0) { + ret = wc_AesGcmEncryptFinal(&enc[i], bench_tag, + AES_AUTH_TAG_SZ); + } +#endif if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&enc[i]), 0, ×, &pending)) { goto exit_aes_gcm; } @@ -2433,10 +2446,23 @@ exit_aes_gcm: /* while free pending slots in queue, submit ops */ for (i = 0; i < BENCH_MAX_PENDING; i++) { if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&dec[i]), 0, ×, numBlocks, &pending)) { +#ifndef BENCHMARK_AESGCM_STREAM ret = wc_AesGcmDecrypt(&dec[i], bench_plain, bench_cipher, BENCH_SIZE, iv, ivSz, bench_tag, AES_AUTH_TAG_SZ, bench_additional, aesAuthAddSz); +#else + ret = wc_AesGcmDecryptInit(&enc[i], NULL, 0, iv, ivSz); + if (ret == 0) { + ret = wc_AesGcmDecryptUpdate(&enc[i], bench_plain, + bench_cipher, BENCH_SIZE, bench_additional, + aesAuthAddSz); + } + if (ret == 0) { + ret = wc_AesGcmDecryptFinal(&enc[i], bench_tag, + AES_AUTH_TAG_SZ); + } +#endif if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&dec[i]), 0, ×, &pending)) { goto exit_aes_gcm_dec; } diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index b1c0d6a1f..4cb300b1b 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -4171,6 +4171,19 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) #ifdef HAVE_AESGCM +#ifdef WOLFSSL_AESGCM_STREAM + /* Access initialization counter data. */ + #define AES_INITCTR(aes) ((aes)->streamData + 0 * AES_BLOCK_SIZE) + /* Access counter data. */ + #define AES_COUNTER(aes) ((aes)->streamData + 1 * AES_BLOCK_SIZE) + /* Access tag data. */ + #define AES_TAG(aes) ((aes)->streamData + 2 * AES_BLOCK_SIZE) + /* Access last GHASH block. */ + #define AES_LASTGBLOCK(aes) ((aes)->streamData + 3 * AES_BLOCK_SIZE) + /* Access last encrypted block. */ + #define AES_LASTBLOCK(aes) ((aes)->streamData + 4 * AES_BLOCK_SIZE) +#endif + #if defined(HAVE_COLDFIRE_SEC) #error "Coldfire SEC doesn't currently support AES-GCM mode" @@ -4364,6 +4377,9 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) #endif XMEMSET(iv, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); +#ifdef WOLFSSL_AESGCM_STREAM + aes->gcmKeySet = 1; +#endif #ifdef WOLFSSL_AESNI /* AES-NI code generates its own H value. */ @@ -5569,7 +5585,6 @@ static void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, #endif /* _MSC_VER */ #endif /* WOLFSSL_AESNI */ - #if defined(GCM_SMALL) static void GMULT(byte* X, byte* Y) { @@ -5650,6 +5665,27 @@ void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, XMEMCPY(s, x, sSz); } +#ifdef WOLFSSL_AESGCM_STREAM +/* No extra initialization for small implementation. + * + * @param [in] aes AES GCM object. + */ +#define GHASH_INIT_EXTRA(aes) + +/* GHASH one block of data.. + * + * XOR block into tag and GMULT with H. + * + * @param [in, out] aes AES GCM object. + * @param [in] block Block of AAD or cipher text. + */ +#define GHASH_ONE_BLOCK(aes, block) \ + do { \ + xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \ + GMULT(AES_TAG(aes), aes->H); \ + } \ + while (0) +#endif /* WOLFSSL_AESGCM_STREAM */ /* end GCM_SMALL */ #elif defined(GCM_TABLE) @@ -5826,6 +5862,27 @@ void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, XMEMCPY(s, x, sSz); } +#ifdef WOLFSSL_AESGCM_STREAM +/* No extra initialization for table implementation. + * + * @param [in] aes AES GCM object. + */ +#define GHASH_INIT_EXTRA(aes) + +/* GHASH one block of data.. + * + * XOR block into tag and GMULT with H using pre-computed table. + * + * @param [in, out] aes AES GCM object. + * @param [in] block Block of AAD or cipher text. + */ +#define GHASH_ONE_BLOCK(aes, block) \ + do { \ + xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \ + GMULT(AES_TAG(aes), aes->M0); \ + } \ + while (0) +#endif /* WOLFSSL_AESGCM_STREAM */ /* end GCM_TABLE */ #elif defined(GCM_TABLE_4BIT) @@ -6101,6 +6158,27 @@ void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, XMEMCPY(s, x, sSz); } +#ifdef WOLFSSL_AESGCM_STREAM +/* No extra initialization for 4-bit table implementation. + * + * @param [in] aes AES GCM object. + */ +#define GHASH_INIT_EXTRA(aes) + +/* GHASH one block of data.. + * + * XOR block into tag and GMULT with H using pre-computed table. + * + * @param [in, out] aes AES GCM object. + * @param [in] block Block of AAD or cipher text. + */ +#define GHASH_ONE_BLOCK(aes, block) \ + do { \ + xorbuf(AES_TAG(aes), block, AES_BLOCK_SIZE); \ + GMULT(AES_TAG(aes), aes->M0); \ + } \ + while (0) +#endif /* WOLFSSL_AESGCM_STREAM */ #elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) #if !defined(FREESCALE_LTC_AES_GCM) @@ -6409,10 +6487,282 @@ void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, XMEMCPY(s, x, sSz); } +#ifdef WOLFSSL_AESGCM_STREAM +#ifdef LITTLE_ENDIAN_ORDER +/* Little-endian 32-bit word implementation requires byte reversal of H. + * + * H is all-zeros block encrypted with key. + * + * @param [in, out] aes AES GCM object. + */ +#define GHASH_INIT_EXTRA(aes) \ + ByteReverseWords((word32*)aes->H, (word32*)aes->H, AES_BLOCK_SIZE) + +/* GHASH one block of data.. + * + * XOR block, in big-endian form, into tag and GMULT with H. + * + * @param [in, out] aes AES GCM object. + * @param [in] block Block of AAD or cipher text. + */ +#define GHASH_ONE_BLOCK(aes, block) \ + do { \ + word32* x = (word32*)AES_TAG(aes); \ + word32* h = (word32*)aes->H; \ + word32 bigEnd[4]; \ + XMEMCPY(bigEnd, block, AES_BLOCK_SIZE); \ + ByteReverseWords(bigEnd, bigEnd, AES_BLOCK_SIZE); \ + x[0] ^= bigEnd[0]; \ + x[1] ^= bigEnd[1]; \ + x[2] ^= bigEnd[2]; \ + x[3] ^= bigEnd[3]; \ + GMULT(x, h); \ + } \ + while (0) + +/* GHASH in AAD and cipher text lengths in bits. + * + * Convert tag back to little-endian. + * + * @param [in, out] aes AES GCM object. + */ +#define GHASH_LEN_BLOCK(aes) \ + do { \ + word32 len[4]; \ + word32* x = (word32*)AES_TAG(aes); \ + word32* h = (word32*)aes->H; \ + len[0] = (aes->aSz >> (8*sizeof(aes->aSz) - 3)); \ + len[1] = aes->aSz << 3; \ + len[2] = (aes->cSz >> (8*sizeof(aes->cSz) - 3)); \ + len[3] = aes->cSz << 3; \ + x[0] ^= len[0]; \ + x[1] ^= len[1]; \ + x[2] ^= len[2]; \ + x[3] ^= len[3]; \ + GMULT(x, h); \ + ByteReverseWords(x, x, AES_BLOCK_SIZE); \ + } \ + while (0) +#else +/* No extra initialization for 32-bit word implementation. + * + * @param [in] aes AES GCM object. + */ +#define GHASH_INIT_EXTRA(aes) + +/* GHASH one block of data.. + * + * XOR block into tag and GMULT with H. + * + * @param [in, out] aes AES GCM object. + * @param [in] block Block of AAD or cipher text. + */ +#define GHASH_ONE_BLOCK(aes, block) \ + do { \ + word32* x = (word32*)AES_TAG(aes); \ + word32* h = (word32*)aes->H; \ + word32 block32[4]; \ + XMEMCPY(block32, block, AES_BLOCK_SIZE); \ + x[0] ^= block32[0]; \ + x[1] ^= block32[1]; \ + x[2] ^= block32[2]; \ + x[3] ^= block32[3]; \ + GMULT(x, h); \ + } \ + while (0) + +/* GHASH in AAD and cipher text lengths in bits. + * + * @param [in, out] aes AES GCM object. + */ +#define GHASH_LEN_BLOCK(aes) \ + do { \ + word32 len[4]; \ + word32* x = (word32*)AES_TAG(aes); \ + word32* h = (word32*)aes->H; \ + len[0] = (aes->aSz >> (8*sizeof(aes->aSz) - 3)); \ + len[1] = aes->aSz << 3; \ + len[2] = (aes->cSz >> (8*sizeof(aes->cSz) - 3)); \ + len[3] = aes->cSz << 3; \ + x[0] ^= len[0]; \ + x[1] ^= len[1]; \ + x[2] ^= len[2]; \ + x[3] ^= len[3]; \ + GMULT(x, h); \ + } \ + while (0) +#endif /* LITTLE_ENDIAN_ORDER */ +#endif /* WOLFSSL_AESGCM_STREAM */ #endif /* end GCM_WORD32 */ - #if !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES) +#ifdef WOLFSSL_AESGCM_STREAM +#ifndef GHASH_LEN_BLOCK +/* Hash in the lengths of the AAD and cipher text in bits. + * + * Default implementation. + * + * @param [in, out] aes AES GCM object. + */ +#define GHASH_LEN_BLOCK(aes) \ + do { \ + byte scratch[AES_BLOCK_SIZE]; \ + FlattenSzInBits(&scratch[0], aes->aSz); \ + FlattenSzInBits(&scratch[8], aes->cSz); \ + GHASH_ONE_BLOCK(aes, scratch); \ + } \ + while (0) +#endif + +/* Initialize a GHASH for streaming operations. + * + * @param [in, out] aes AES GCM object. + */ +static void GHASH_INIT(Aes* aes) { + /* Set tag to all zeros as initial value. */ + XMEMSET(AES_TAG(aes), 0, AES_BLOCK_SIZE); + /* Reset counts of AAD and cipher text. */ + aes->aOver = 0; + aes->cOver = 0; + /* Extra initialization baed on implementation. */ + GHASH_INIT_EXTRA(aes); +} + +/* Update the GHASH with AAD and/or cipher text. + * + * @param [in,out] aes AES GCM object. + * @param [in] a Additional authentication data buffer. + * @param [in] aSz Size of data in AAD buffer. + * @param [in] c Cipher text buffer. + * @param [in] cSz Size of data in cipher text buffer. + */ +static void GHASH_UPDATE(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz) +{ + word32 blocks; + word32 partial; + + /* Hash in A, the Additional Authentication Data */ + if (aSz != 0 && a != NULL) { + /* Update count of AAD we have hashed. */ + aes->aSz += aSz; + /* Check if we have unprocessed data. */ + if (aes->aOver > 0) { + /* Calculate amount we can use - fill up the block. */ + byte sz = AES_BLOCK_SIZE - aes->aOver; + if (sz > aSz) { + sz = aSz; + } + /* Copy extra into last GHASH block array and update count. */ + XMEMCPY(AES_LASTGBLOCK(aes) + aes->aOver, a, sz); + aes->aOver += sz; + if (aes->aOver == AES_BLOCK_SIZE) { + /* We have filled up the block and can process. */ + GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes)); + /* Reset count. */ + aes->aOver = 0; + } + /* Used up some data. */ + aSz -= sz; + a += sz; + } + + /* Calculate number of blocks of AAD and the leftover. */ + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + /* GHASH full blocks now. */ + while (blocks--) { + GHASH_ONE_BLOCK(aes, a); + a += AES_BLOCK_SIZE; + } + if (partial != 0) { + /* Cache the partial block. */ + XMEMCPY(AES_LASTGBLOCK(aes), a, partial); + aes->aOver = (byte)partial; + } + } + if (aes->aOver > 0 && cSz > 0 && c != NULL) { + /* No more AAD coming and we have a partial block. */ + /* Fill the rest of the block with zeros. */ + byte sz = AES_BLOCK_SIZE - aes->aOver; + XMEMSET(AES_LASTGBLOCK(aes) + aes->aOver, 0, sz); + /* GHASH last AAD block. */ + GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes)); + /* Clear partial count for next time through. */ + aes->aOver = 0; + } + + /* Hash in C, the Ciphertext */ + if (cSz != 0 && c != NULL) { + /* Update count of cipher text we have hashed. */ + aes->cSz += cSz; + if (aes->cOver > 0) { + /* Calculate amount we can use - fill up the block. */ + byte sz = AES_BLOCK_SIZE - aes->cOver; + if (sz > cSz) { + sz = cSz; + } + XMEMCPY(AES_LASTGBLOCK(aes) + aes->cOver, c, sz); + /* Update count of unsed encrypted counter. */ + aes->cOver += sz; + if (aes->cOver == AES_BLOCK_SIZE) { + /* We have filled up the block and can process. */ + GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes)); + /* Reset count. */ + aes->cOver = 0; + } + /* Used up some data. */ + cSz -= sz; + c += sz; + } + + /* Calculate number of blocks of cipher text and the leftover. */ + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + /* GHASH full blocks now. */ + while (blocks--) { + GHASH_ONE_BLOCK(aes, c); + c += AES_BLOCK_SIZE; + } + if (partial != 0) { + /* Cache the partial block. */ + XMEMCPY(AES_LASTGBLOCK(aes), c, partial); + aes->cOver = (byte)partial; + } + } +} + +/* Finalize the GHASH calculation. + * + * Complete hashing cipher text and hash the AAD and cipher text lengths. + * + * @param [in, out] aes AES GCM object. + * @param [out] s Authentication tag. + * @param [in] sSz Size of authentication tag required. + */ +static void GHASH_FINAL(Aes* aes, byte* s, word32 sSz) +{ + /* AAD block incomplete when > 0 */ + byte over = aes->aOver; + + if (aes->cOver > 0) { + /* Cipher text block incomplete. */ + over = aes->cOver; + } + if (over > 0) { + /* Zeroize the unused part of the block. */ + XMEMSET(AES_LASTGBLOCK(aes) + over, 0, AES_BLOCK_SIZE - over); + /* Hash the last block of cipher text. */ + GHASH_ONE_BLOCK(aes, AES_LASTGBLOCK(aes)); + } + /* Hash in the lengths of AAD and cipher text in bits */ + GHASH_LEN_BLOCK(aes); + /* Copy the result into s. */ + XMEMCPY(s, AES_TAG(aes), sSz); +} +#endif /* WOLFSSL_AESGCM_STREAM */ + + #ifdef FREESCALE_LTC_AES_GCM int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, @@ -6452,8 +6802,8 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, #ifdef STM32_CRYPTO_AES_GCM /* this function supports inline encrypt */ -/* define STM32_AESGCM_PARTIAL for newer STM Cube HAL's with workaround - for handling partial packets to improve auth tag calculation performance by +/* define STM32_AESGCM_PARTIAL for newer STM Cube HAL's with workaround + for handling partial packets to improve auth tag calculation performance by using hardware */ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, @@ -6668,6 +7018,7 @@ static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz #endif /* STM32_CRYPTO_AES_GCM */ #ifdef WOLFSSL_AESNI +/* For performance reasons, this code needs to be not inlined. */ int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, @@ -6885,7 +7236,6 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, #endif - /* AES GCM Decrypt */ #if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) #ifdef FREESCALE_LTC_AES_GCM @@ -6973,7 +7323,7 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, GHASH(aes, NULL, 0, iv, ivSz, (byte*)ctr, AES_BLOCK_SIZE); } - /* Make copy of expected authTag, which could get corrupted in some + /* Make copy of expected authTag, which could get corrupted in some * Cube HAL versions without proper partial block support. * For TLS blocks the authTag is after the output buffer, so save it */ XMEMCPY(tagExpected, authTag, authTagSz); @@ -7150,6 +7500,7 @@ static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, #endif /* STM32_CRYPTO_AES_GCM */ #ifdef WOLFSSL_AESNI +/* For performance reasons, this code needs to be not inlined. */ int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, @@ -7384,6 +7735,1177 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, } #endif #endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ + +#ifdef WOLFSSL_AESGCM_STREAM +/* Initialize the AES GCM cipher with an IV. C implementation. + * + * @param [in, out] aes AES object. + * @param [in] iv IV/nonce buffer. + * @param [in] ivSz Length of IV/nonce data. + */ +static void AesGcmInit_C(Aes* aes, const byte* iv, word32 ivSz) +{ + ALIGN32 byte counter[AES_BLOCK_SIZE]; + + if (ivSz == GCM_NONCE_MID_SZ) { + /* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */ + XMEMCPY(counter, iv, ivSz); + XMEMSET(counter + GCM_NONCE_MID_SZ, 0, + AES_BLOCK_SIZE - GCM_NONCE_MID_SZ - 1); + counter[AES_BLOCK_SIZE - 1] = 1; + } + else { + /* Counter is GHASH of IV. */ + #ifdef OPENSSL_EXTRA + word32 aadTemp = aes->aadLen; + aes->aadLen = 0; + #endif + GHASH(aes, NULL, 0, iv, ivSz, counter, AES_BLOCK_SIZE); + #ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; + #endif + } + + /* Copy in the counter for use with cipher. */ + XMEMCPY(AES_COUNTER(aes), counter, AES_BLOCK_SIZE); + /* Encrypt initial counter into a buffer for GCM. */ + wc_AesEncrypt(aes, counter, AES_INITCTR(aes)); + /* Reset state fields. */ + aes->over = 0; + aes->aSz = 0; + aes->cSz = 0; + /* Initialization for GHASH. */ + GHASH_INIT(aes); +} + +/* Update the AES GCM cipher with data. C implementation. + * + * Only enciphers data. + * + * @param [in, out] aes AES object. + * @param [in] out Cipher text or plaintext buffer. + * @param [in] in Plaintext or cipher text buffer. + * @param [in] sz Length of data. + */ +static void AesGcmCryptUpdate_C(Aes* aes, byte* out, const byte* in, word32 sz) +{ + word32 blocks; + word32 partial; + + /* Check if previous encrypted block was not used up. */ + if (aes->over > 0) { + byte pSz = AES_BLOCK_SIZE - aes->over; + if (pSz > sz) pSz = sz; + + /* Use some/all of last encrypted block. */ + xorbufout(out, AES_LASTBLOCK(aes) + aes->over, in, pSz); + aes->over = (aes->over + pSz) & (AES_BLOCK_SIZE - 1); + + /* Some data used. */ + sz -= pSz; + in += pSz; + out += pSz; + } + + /* Calculate the number of blocks needing to be encrypted and any leftover. + */ + blocks = sz / AES_BLOCK_SIZE; + partial = sz & (AES_BLOCK_SIZE - 1); + +#if defined(HAVE_AES_ECB) + /* Some hardware acceleration can gain performance from doing AES encryption + * of the whole buffer at once. + * Overwrites the cipher text before using plaintext - no inline encryption. + */ + if ((out != in) && blocks > 0) { + word32 b; + /* Place incrementing counter blocks into cipher text. */ + for (b = 0; b < blocks; b++) { + IncrementGcmCounter(AES_COUNTER(aes)); + XMEMCPY(out + b * AES_BLOCK_SIZE, AES_COUNTER(aes), AES_BLOCK_SIZE); + } + + /* Encrypt counter blocks. */ + wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); + /* XOR in plaintext. */ + xorbuf(out, in, AES_BLOCK_SIZE * blocks); + /* Skip over processed data. */ + in += AES_BLOCK_SIZE * blocks; + out += AES_BLOCK_SIZE * blocks; + } + else +#endif /* HAVE_AES_ECB */ + { + /* Encrypt block by block. */ + while (blocks--) { + ALIGN32 byte scratch[AES_BLOCK_SIZE]; + IncrementGcmCounter(AES_COUNTER(aes)); + /* Encrypt counter into a buffer. */ + wc_AesEncrypt(aes, AES_COUNTER(aes), scratch); + /* XOR plain text into encrypted counter into cipher text buffer. */ + xorbufout(out, scratch, in, AES_BLOCK_SIZE); + /* Data complete. */ + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + } + + if (partial != 0) { + /* Generate an extra block and use up as much as needed. */ + IncrementGcmCounter(AES_COUNTER(aes)); + /* Encrypt counter into cache. */ + wc_AesEncrypt(aes, AES_COUNTER(aes), AES_LASTBLOCK(aes)); + /* XOR plain text into encrypted counter into cipher text buffer. */ + xorbufout(out, AES_LASTBLOCK(aes), in, partial); + /* Keep amount of encrypted block used. */ + aes->over = partial; + } +} + +/* Calculates authentication tag for AES GCM. C implementation. + * + * @param [in, out] aes AES object. + * @param [out] authTag Buffer to store authentication tag in. + * @param [in] authTagSz Length of tag to create. + */ +static void AesGcmFinal_C(Aes* aes, byte* authTag, word32 authTagSz) +{ + /* Calculate authentication tag. */ + GHASH_FINAL(aes, authTag, authTagSz); + /* XOR in as much of encrypted counter as is required. */ + xorbuf(authTag, AES_INITCTR(aes), authTagSz); +#ifdef OPENSSL_EXTRA + /* store AAD size for next call */ + aes->aadLen = aes->aSz; +#endif + /* Zeroize last block to protect sensitive data. */ + ForceZero(AES_LASTBLOCK(aes), sizeof(AES_LASTBLOCK(aes))); +} + +#ifdef WOLFSSL_AESNI +/* Assembly code implementations in: aes_gcm_asm.S */ +#ifdef HAVE_INTEL_AVX2 +extern void AES_GCM_init_avx2(const unsigned char* key, int nr, + const unsigned char* ivec, unsigned int ibytes, unsigned char* h, + unsigned char* counter, unsigned char* initCtr); +extern void AES_GCM_aad_update_avx2(const unsigned char* addt, + unsigned int abytes, unsigned char* tag, unsigned char* h); +extern void AES_GCM_encrypt_block_avx2(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned char* counter); +extern void AES_GCM_ghash_block_avx2(const unsigned char* data, + unsigned char* tag, unsigned char* h); + +extern void AES_GCM_encrypt_update_avx2(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned int nbytes, + unsigned char* tag, unsigned char* h, unsigned char* counter); +extern void AES_GCM_encrypt_final_avx2(unsigned char* tag, + unsigned char* authTag, unsigned int tbytes, unsigned int nbytes, + unsigned int abytes, unsigned char* h, unsigned char* initCtr); +#endif +#ifdef HAVE_INTEL_AVX1 +extern void AES_GCM_init_avx1(const unsigned char* key, int nr, + const unsigned char* ivec, unsigned int ibytes, unsigned char* h, + unsigned char* counter, unsigned char* initCtr); +extern void AES_GCM_aad_update_avx1(const unsigned char* addt, + unsigned int abytes, unsigned char* tag, unsigned char* h); +extern void AES_GCM_encrypt_block_avx1(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned char* counter); +extern void AES_GCM_ghash_block_avx1(const unsigned char* data, + unsigned char* tag, unsigned char* h); + +extern void AES_GCM_encrypt_update_avx1(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned int nbytes, + unsigned char* tag, unsigned char* h, unsigned char* counter); +extern void AES_GCM_encrypt_final_avx1(unsigned char* tag, + unsigned char* authTag, unsigned int tbytes, unsigned int nbytes, + unsigned int abytes, unsigned char* h, unsigned char* initCtr); +#endif +extern void AES_GCM_init_aesni(const unsigned char* key, int nr, + const unsigned char* ivec, unsigned int ibytes, unsigned char* h, + unsigned char* counter, unsigned char* initCtr); +extern void AES_GCM_aad_update_aesni(const unsigned char* addt, + unsigned int abytes, unsigned char* tag, unsigned char* h); +extern void AES_GCM_encrypt_block_aesni(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned char* counter); +extern void AES_GCM_ghash_block_aesni(const unsigned char* data, + unsigned char* tag, unsigned char* h); + +extern void AES_GCM_encrypt_update_aesni(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned int nbytes, + unsigned char* tag, unsigned char* h, unsigned char* counter); +extern void AES_GCM_encrypt_final_aesni(unsigned char* tag, + unsigned char* authTag, unsigned int tbytes, unsigned int nbytes, + unsigned int abytes, unsigned char* h, unsigned char* initCtr); + +/* Initialize the AES GCM cipher with an IV. AES-NI implementations. + * + * @param [in, out] aes AES object. + * @param [in] iv IV/nonce buffer. + * @param [in] ivSz Length of IV/nonce data. + */ +static void AesGcmInit_aesni(Aes* aes, const byte* iv, word32 ivSz) +{ + /* Reset state fields. */ + aes->aSz = 0; + aes->cSz = 0; + /* Set tag to all zeros as initial value. */ + XMEMSET(AES_TAG(aes), 0, AES_BLOCK_SIZE); + /* Reset counts of AAD and cipher text. */ + aes->aOver = 0; + aes->cOver = 0; + +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + SAVE_VECTOR_REGISTERS(); + AES_GCM_init_avx2((byte*)aes->key, aes->rounds, iv, ivSz, aes->H, + AES_COUNTER(aes), AES_INITCTR(aes)); + RESTORE_VECTOR_REGISTERS(); + } + else +#endif +#ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + SAVE_VECTOR_REGISTERS(); + AES_GCM_init_avx1((byte*)aes->key, aes->rounds, iv, ivSz, aes->H, + AES_COUNTER(aes), AES_INITCTR(aes)); + RESTORE_VECTOR_REGISTERS(); + } + else +#endif + { + SAVE_VECTOR_REGISTERS(); + AES_GCM_init_aesni((byte*)aes->key, aes->rounds, iv, ivSz, aes->H, + AES_COUNTER(aes), AES_INITCTR(aes)); + RESTORE_VECTOR_REGISTERS(); + } +} + +/* Update the AES GCM for encryption with authentication data. + * + * Implementation uses AVX2, AVX1 or straight AES-NI optimized assembly code. + * + * @param [in, out] aes AES object. + * @param [in] a Buffer holding authentication data. + * @param [in] aSz Length of authentication data in bytes. + * @param [in] endA Whether no more authentication data is expected. + */ +static void AesGcmAadUpdate_aesni(Aes* aes, const byte* a, word32 aSz, int endA) +{ + word32 blocks; + int partial; + + if (aSz != 0 && a != NULL) { + /* Total count of AAD updated. */ + aes->aSz += aSz; + /* Check if we have unprocessed data. */ + if (aes->aOver > 0) { + /* Calculate amount we can use - fill up the block. */ + byte sz = AES_BLOCK_SIZE - aes->aOver; + if (sz > aSz) { + sz = aSz; + } + /* Copy extra into last GHASH block array and update count. */ + XMEMCPY(AES_LASTGBLOCK(aes) + aes->aOver, a, sz); + aes->aOver += sz; + if (aes->aOver == AES_BLOCK_SIZE) { + /* We have filled up the block and can process. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_ghash_block_avx2(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_ghash_block_avx1(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + else + #endif + { + AES_GCM_ghash_block_aesni(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + /* Reset count. */ + aes->aOver = 0; + } + /* Used up some data. */ + aSz -= sz; + a += sz; + } + + /* Calculate number of blocks of AAD and the leftover. */ + blocks = aSz / AES_BLOCK_SIZE; + partial = aSz % AES_BLOCK_SIZE; + if (blocks > 0) { + /* GHASH full blocks now. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_aad_update_avx2(a, blocks * AES_BLOCK_SIZE, + AES_TAG(aes), aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_aad_update_avx1(a, blocks * AES_BLOCK_SIZE, + AES_TAG(aes), aes->H); + } + else + #endif + { + AES_GCM_aad_update_aesni(a, blocks * AES_BLOCK_SIZE, + AES_TAG(aes), aes->H); + } + /* Skip over to end of AAD blocks. */ + a += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + /* Cache the partial block. */ + XMEMCPY(AES_LASTGBLOCK(aes), a, partial); + aes->aOver = (byte)partial; + } + } + if (endA && (aes->aOver > 0)) { + /* No more AAD coming and we have a partial block. */ + /* Fill the rest of the block with zeros. */ + XMEMSET(AES_LASTGBLOCK(aes) + aes->aOver, 0, + AES_BLOCK_SIZE - aes->aOver); + /* GHASH last AAD block. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_ghash_block_avx2(AES_LASTGBLOCK(aes), AES_TAG(aes), aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_ghash_block_avx1(AES_LASTGBLOCK(aes), AES_TAG(aes), aes->H); + } + else + #endif + { + AES_GCM_ghash_block_aesni(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + /* Clear partial count for next time through. */ + aes->aOver = 0; + } +} + +/* Update the AES GCM for encryption with data and/or authentication data. + * + * Implementation uses AVX2, AVX1 or straight AES-NI optimized assembly code. + * + * @param [in, out] aes AES object. + * @param [out] c Buffer to hold cipher text. + * @param [in] p Buffer holding plaintext. + * @param [in] cSz Length of cipher text/plaintext in bytes. + * @param [in] a Buffer holding authentication data. + * @param [in] aSz Length of authentication data in bytes. + */ +static void AesGcmEncryptUpdate_aesni(Aes* aes, byte* c, const byte* p, + word32 cSz, const byte* a, word32 aSz) +{ + word32 blocks; + int partial; + + SAVE_VECTOR_REGISTERS(); + /* Hash in A, the Authentication Data */ + AesGcmAadUpdate_aesni(aes, a, aSz, (cSz > 0) && (c != NULL)); + + /* Encrypt plaintext and Hash in C, the Cipher text */ + if (cSz != 0 && c != NULL) { + /* Update count of cipher text we have hashed. */ + aes->cSz += cSz; + if (aes->cOver > 0) { + /* Calculate amount we can use - fill up the block. */ + byte sz = AES_BLOCK_SIZE - aes->cOver; + if (sz > cSz) { + sz = cSz; + } + /* Encrypt some of the plaintext. */ + xorbuf(AES_LASTGBLOCK(aes) + aes->cOver, p, sz); + XMEMCPY(c, AES_LASTGBLOCK(aes) + aes->cOver, sz); + /* Update count of unsed encrypted counter. */ + aes->cOver += sz; + if (aes->cOver == AES_BLOCK_SIZE) { + /* We have filled up the block and can process. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_ghash_block_avx2(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_ghash_block_avx1(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + else + #endif + { + AES_GCM_ghash_block_aesni(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + /* Reset count. */ + aes->cOver = 0; + } + /* Used up some data. */ + cSz -= sz; + p += sz; + c += sz; + } + + /* Calculate number of blocks of plaintext and the leftover. */ + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + if (blocks > 0) { + /* Encrypt and GHASH full blocks now. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_encrypt_update_avx2((byte*)aes->key, aes->rounds, c, p, + blocks * AES_BLOCK_SIZE, AES_TAG(aes), aes->H, + AES_COUNTER(aes)); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_encrypt_update_avx1((byte*)aes->key, aes->rounds, c, p, + blocks * AES_BLOCK_SIZE, AES_TAG(aes), aes->H, + AES_COUNTER(aes)); + } + else + #endif + { + AES_GCM_encrypt_update_aesni((byte*)aes->key, aes->rounds, c, p, + blocks * AES_BLOCK_SIZE, AES_TAG(aes), aes->H, + AES_COUNTER(aes)); + } + /* Skip over to end of blocks. */ + p += blocks * AES_BLOCK_SIZE; + c += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + /* Encrypt the counter - XOR in zeros as proxy for plaintext. */ + XMEMSET(AES_LASTGBLOCK(aes), 0, AES_BLOCK_SIZE); + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_encrypt_block_avx2((byte*)aes->key, aes->rounds, + AES_LASTGBLOCK(aes), AES_LASTGBLOCK(aes), AES_COUNTER(aes)); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_encrypt_block_avx1((byte*)aes->key, aes->rounds, + AES_LASTGBLOCK(aes), AES_LASTGBLOCK(aes), AES_COUNTER(aes)); + } + else + #endif + { + AES_GCM_encrypt_block_aesni((byte*)aes->key, aes->rounds, + AES_LASTGBLOCK(aes), AES_LASTGBLOCK(aes), AES_COUNTER(aes)); + } + /* XOR the remaining plaintext to calculate cipher text. + * Keep cipher text for GHASH of last partial block. + */ + xorbuf(AES_LASTGBLOCK(aes), p, partial); + XMEMCPY(c, AES_LASTGBLOCK(aes), partial); + /* Update count of the block used. */ + aes->cOver = (byte)partial; + } + } + RESTORE_VECTOR_REGISTERS(); +} + +/* Finalize the AES GCM for encryption and calculate the authentication tag. + * + * Calls AVX2, AVX1 or straight AES-NI optimized assembly code. + * + * @param [in, out] aes AES object. + * @param [in] authTag Buffer to hold authentication tag. + * @param [in] authTagSz Length of authentication tag in bytes. + * @return 0 on success. + */ +static void AesGcmEncryptFinal_aesni(Aes* aes, byte* authTag, word32 authTagSz) +{ + /* AAD block incomplete when > 0 */ + byte over = aes->aOver; + + SAVE_VECTOR_REGISTERS(); + if (aes->cOver > 0) { + /* Cipher text block incomplete. */ + over = aes->cOver; + } + if (over > 0) { + /* Fill the rest of the block with zeros. */ + XMEMSET(AES_LASTGBLOCK(aes) + over, 0, AES_BLOCK_SIZE - over); + /* GHASH last cipher block. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_ghash_block_avx2(AES_LASTGBLOCK(aes), AES_TAG(aes), aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_ghash_block_avx1(AES_LASTGBLOCK(aes), AES_TAG(aes), aes->H); + } + else + #endif + { + AES_GCM_ghash_block_aesni(AES_LASTGBLOCK(aes), AES_TAG(aes), + aes->H); + } + } + /* Calculate the authentication tag. */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_encrypt_final_avx2(AES_TAG(aes), authTag, authTagSz, aes->cSz, + aes->aSz, aes->H, AES_INITCTR(aes)); + } + else +#endif +#ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_encrypt_final_avx1(AES_TAG(aes), authTag, authTagSz, aes->cSz, + aes->aSz, aes->H, AES_INITCTR(aes)); + } + else +#endif + { + AES_GCM_encrypt_final_aesni(AES_TAG(aes), authTag, authTagSz, aes->cSz, + aes->aSz, aes->H, AES_INITCTR(aes)); + } + RESTORE_VECTOR_REGISTERS(); +} + +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) +/* Assembly code implementations in: aes_gcm_asm.S */ +#ifdef HAVE_INTEL_AVX2 +extern void AES_GCM_decrypt_update_avx2(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned int nbytes, + unsigned char* tag, unsigned char* h, unsigned char* counter); +extern void AES_GCM_decrypt_final_avx2(unsigned char* tag, + const unsigned char* authTag, unsigned int tbytes, unsigned int nbytes, + unsigned int abytes, unsigned char* h, unsigned char* initCtr, int* res); +#endif +#ifdef HAVE_INTEL_AVX1 +extern void AES_GCM_decrypt_update_avx1(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned int nbytes, + unsigned char* tag, unsigned char* h, unsigned char* counter); +extern void AES_GCM_decrypt_final_avx1(unsigned char* tag, + const unsigned char* authTag, unsigned int tbytes, unsigned int nbytes, + unsigned int abytes, unsigned char* h, unsigned char* initCtr, int* res); +#endif +extern void AES_GCM_decrypt_update_aesni(const unsigned char* key, int nr, + unsigned char* out, const unsigned char* in, unsigned int nbytes, + unsigned char* tag, unsigned char* h, unsigned char* counter); +extern void AES_GCM_decrypt_final_aesni(unsigned char* tag, + const unsigned char* authTag, unsigned int tbytes, unsigned int nbytes, + unsigned int abytes, unsigned char* h, unsigned char* initCtr, int* res); + +/* Update the AES GCM for decryption with data and/or authentication data. + * + * @param [in, out] aes AES object. + * @param [out] p Buffer to hold plaintext. + * @param [in] c Buffer holding ciper text. + * @param [in] cSz Length of cipher text/plaintext in bytes. + * @param [in] a Buffer holding authentication data. + * @param [in] aSz Length of authentication data in bytes. + */ +static void AesGcmDecryptUpdate_aesni(Aes* aes, byte* p, const byte* c, + word32 cSz, const byte* a, word32 aSz) +{ + word32 blocks; + int partial; + + SAVE_VECTOR_REGISTERS(); + /* Hash in A, the Authentication Data */ + AesGcmAadUpdate_aesni(aes, a, aSz, (cSz > 0) && (c != NULL)); + + /* Hash in C, the Cipher text, and decrypt. */ + if (cSz != 0 && p != NULL) { + /* Update count of cipher text we have hashed. */ + aes->cSz += cSz; + if (aes->cOver > 0) { + /* Calculate amount we can use - fill up the block. */ + byte sz = AES_BLOCK_SIZE - aes->cOver; + if (sz > cSz) { + sz = cSz; + } + /* Keep a copy of the cipher text for GHASH. */ + XMEMCPY(AES_LASTBLOCK(aes) + aes->cOver, c, sz); + /* Decrypt some of the cipher text. */ + xorbuf(AES_LASTGBLOCK(aes) + aes->cOver, c, sz); + XMEMCPY(p, AES_LASTGBLOCK(aes) + aes->cOver, sz); + /* Update count of unsed encrypted counter. */ + aes->cOver += sz; + if (aes->cOver == AES_BLOCK_SIZE) { + /* We have filled up the block and can process. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_ghash_block_avx2(AES_LASTBLOCK(aes), AES_TAG(aes), + aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_ghash_block_avx1(AES_LASTBLOCK(aes), AES_TAG(aes), + aes->H); + } + else + #endif + { + AES_GCM_ghash_block_aesni(AES_LASTBLOCK(aes), AES_TAG(aes), + aes->H); + } + /* Reset count. */ + aes->cOver = 0; + } + /* Used up some data. */ + cSz -= sz; + c += sz; + p += sz; + } + + /* Calculate number of blocks of plaintext and the leftover. */ + blocks = cSz / AES_BLOCK_SIZE; + partial = cSz % AES_BLOCK_SIZE; + if (blocks > 0) { + /* Decrypt and GHASH full blocks now. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_decrypt_update_avx2((byte*)aes->key, aes->rounds, p, c, + blocks * AES_BLOCK_SIZE, AES_TAG(aes), aes->H, + AES_COUNTER(aes)); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_decrypt_update_avx1((byte*)aes->key, aes->rounds, p, c, + blocks * AES_BLOCK_SIZE, AES_TAG(aes), aes->H, + AES_COUNTER(aes)); + } + else + #endif + { + AES_GCM_decrypt_update_aesni((byte*)aes->key, aes->rounds, p, c, + blocks * AES_BLOCK_SIZE, AES_TAG(aes), aes->H, + AES_COUNTER(aes)); + } + /* Skip over to end of blocks. */ + c += blocks * AES_BLOCK_SIZE; + p += blocks * AES_BLOCK_SIZE; + } + if (partial != 0) { + /* Encrypt the counter - XOR in zeros as proxy for cipher text. */ + XMEMSET(AES_LASTGBLOCK(aes), 0, AES_BLOCK_SIZE); + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_encrypt_block_avx2((byte*)aes->key, aes->rounds, + AES_LASTGBLOCK(aes), AES_LASTGBLOCK(aes), AES_COUNTER(aes)); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_encrypt_block_avx1((byte*)aes->key, aes->rounds, + AES_LASTGBLOCK(aes), AES_LASTGBLOCK(aes), AES_COUNTER(aes)); + } + else + #endif + { + AES_GCM_encrypt_block_aesni((byte*)aes->key, aes->rounds, + AES_LASTGBLOCK(aes), AES_LASTGBLOCK(aes), AES_COUNTER(aes)); + } + /* Keep cipher text for GHASH of last partial block. */ + XMEMCPY(AES_LASTBLOCK(aes), c, partial); + /* XOR the remaining cipher text to calculate plaintext. */ + xorbuf(AES_LASTGBLOCK(aes), c, partial); + XMEMCPY(p, AES_LASTGBLOCK(aes), partial); + /* Update count of the block used. */ + aes->cOver = (byte)partial; + } + } + RESTORE_VECTOR_REGISTERS(); +} + +/* Finalize the AES GCM for decryption and check the authentication tag. + * + * Calls AVX2, AVX1 or straight AES-NI optimized assembly code. + * + * @param [in, out] aes AES object. + * @param [in] authTag Buffer holding authentication tag. + * @param [in] authTagSz Length of authentication tag in bytes. + * @return 0 on success. + * @return AES_GCM_AUTH_E when authentication tag doesn't match calculated + * value. + */ +static int AesGcmDecryptFinal_aesni(Aes* aes, const byte* authTag, + word32 authTagSz) +{ + int ret = 0; + int res; + /* AAD block incomplete when > 0 */ + byte over = aes->aOver; + byte *lastBlock = AES_LASTGBLOCK(aes); + + SAVE_VECTOR_REGISTERS(); + if (aes->cOver > 0) { + /* Cipher text block incomplete. */ + over = aes->cOver; + lastBlock = AES_LASTBLOCK(aes); + } + if (over > 0) { + /* Zeroize the unused part of the block. */ + XMEMSET(lastBlock + over, 0, AES_BLOCK_SIZE - over); + /* Hash the last block of cipher text. */ + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_ghash_block_avx2(lastBlock, AES_TAG(aes), aes->H); + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_ghash_block_avx1(lastBlock, AES_TAG(aes), aes->H); + } + else + #endif + { + AES_GCM_ghash_block_aesni(lastBlock, AES_TAG(aes), aes->H); + } + } + /* Calculate and compare the authentication tag. */ +#ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_decrypt_final_avx2(AES_TAG(aes), authTag, authTagSz, aes->cSz, + aes->aSz, aes->H, AES_INITCTR(aes), &res); + } + else +#endif +#ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_decrypt_final_avx1(AES_TAG(aes), authTag, authTagSz, aes->cSz, + aes->aSz, aes->H, AES_INITCTR(aes), &res); + } + else +#endif + { + AES_GCM_decrypt_final_aesni(AES_TAG(aes), authTag, authTagSz, aes->cSz, + aes->aSz, aes->H, AES_INITCTR(aes), &res); + } + RESTORE_VECTOR_REGISTERS(); + /* Return error code when calculated doesn't match input. */ + if (res == 0) { + ret = AES_GCM_AUTH_E; + } + return ret; +} +#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ +#endif /* WOLFSSL_AESNI */ + +/* Initialize an AES GCM cipher for encryption or decryption. + * + * Must call wc_AesInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [in] key Buffer holding key. + * @param [in] len Length of key in bytes. + * @param [in] iv Buffer holding IV/nonce. + * @param [in] ivSz Length of IV/nonce in bytes. + * @return 0 on success. + * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer + * is NULL, or the IV is NULL and no previous IV has been set. + * @return MEMORY_E when dynamic memory allocation fails. (WOLFSSL_SMALL_STACK) + */ +int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, const byte* iv, + word32 ivSz) +{ + int ret = 0; + + /* Check validity of parameters. */ + if ((aes == NULL) || ((len > 0) && (key == NULL)) || + ((ivSz == 0) && (iv != NULL)) || (ivSz > AES_BLOCK_SIZE) || + ((ivSz > 0) && (iv == NULL))) { + ret = BAD_FUNC_ARG; + } + +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_AESNI) + if ((ret == 0) && (aes->streamData == NULL)) { + /* Allocate buffers for streaming. */ + aes->streamData = (byte*)XMALLOC(5 * AES_BLOCK_SIZE, aes->heap, + DYNAMIC_TYPE_AES); + if (aes->streamData == NULL) { + ret = MEMORY_E; + } + } +#endif + + /* Set the key if passed in. */ + if ((ret == 0) && (key != NULL)) { + ret = wc_AesGcmSetKey(aes, key, len); + } + + if (ret == 0) { + /* Setup with IV if needed. */ + if (iv != NULL) { + /* Cache the IV in AES GCM object. */ + XMEMCPY((byte*)aes->reg, iv, ivSz); + aes->nonceSz = ivSz; + } + else if (aes->nonceSz != 0) { + /* Copy out the cached copy. */ + iv = (byte*)aes->reg; + ivSz = aes->nonceSz; + } + + if (iv != NULL) { + /* Initialize with the IV. */ + #ifdef WOLFSSL_AESNI + if (haveAESNI + #ifdef HAVE_INTEL_AVX2 + || IS_INTEL_AVX2(intel_flags) + #endif + #ifdef HAVE_INTEL_AVX1 + || IS_INTEL_AVX1(intel_flags) + #endif + ) { + AesGcmInit_aesni(aes, iv, ivSz); + } + else + #endif + { + AesGcmInit_C(aes, iv, ivSz); + } + + aes->nonceSet = 1; + } + } + + return ret; +} + +/* Initialize an AES GCM cipher for encryption. + * + * Must call wc_AesInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [in] key Buffer holding key. + * @param [in] len Length of key in bytes. + * @param [in] iv Buffer holding IV/nonce. + * @param [in] ivSz Length of IV/nonce in bytes. + * @return 0 on success. + * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer + * is NULL, or the IV is NULL and no previous IV has been set. + */ +int wc_AesGcmEncryptInit(Aes* aes, const byte* key, word32 len, const byte* iv, + word32 ivSz) +{ + return wc_AesGcmInit(aes, key, len, iv, ivSz); +} + +/* Initialize an AES GCM cipher for encryption or decryption. Get IV. + * + * Must call wc_AesInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [in] key Buffer holding key. + * @param [in] len Length of key in bytes. + * @param [in] iv Buffer holding IV/nonce. + * @param [in] ivSz Length of IV/nonce in bytes. + * @return 0 on success. + * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer + * is NULL, or the IV is NULL and no previous IV has been set. + */ +int wc_AesGcmEncryptInit_ex(Aes* aes, const byte* key, word32 len, byte* ivOut, + word32 ivOutSz) +{ + XMEMCPY(ivOut, aes->reg, ivOutSz); + return wc_AesGcmInit(aes, key, len, NULL, 0); +} + +/* Update the AES GCM for encryption with data and/or authentication data. + * + * All the AAD must be passed to update before the plaintext. + * Last part of AAD can be passed with first part of plaintext. + * + * Must set key and IV before calling this function. + * Must call wc_AesGcmInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [out] out Buffer to hold cipher text. + * @param [in] in Buffer holding plaintext. + * @param [in] sz Length of plaintext in bytes. + * @param [in] authIn Buffer holding authentication data. + * @param [in] authInSz Length of authentication data in bytes. + * @return 0 on success. + * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer + * is NULL. + */ +int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + + /* Check validity of parameters. */ + if ((aes == NULL) || ((authInSz > 0) && (authIn == NULL)) || ((sz > 0) && + ((out == NULL) || (in == NULL)))) { + ret = BAD_FUNC_ARG; + } + + /* Check key has been set. */ + if ((ret == 0) && (!aes->gcmKeySet)) { + ret = MISSING_KEY; + } + /* Check IV has been set. */ + if ((ret == 0) && (!aes->nonceSet)) { + ret = MISSING_IV; + } + + if ((ret == 0) && aes->ctrSet && (aes->aSz == 0) && (aes->cSz == 0)) { + aes->invokeCtr[0]++; + if (aes->invokeCtr[0] == 0) { + aes->invokeCtr[1]++; + if (aes->invokeCtr[1] == 0) + ret = AES_GCM_OVERFLOW_E; + } + } + + if (ret == 0) { + /* Encrypt with AAD and/or plaintext. */ + #if defined(WOLFSSL_AESNI) + if (haveAESNI + #ifdef HAVE_INTEL_AVX2 + || IS_INTEL_AVX2(intel_flags) + #endif + #ifdef HAVE_INTEL_AVX1 + || IS_INTEL_AVX1(intel_flags) + #endif + ) { + AesGcmEncryptUpdate_aesni(aes, out, in, sz, authIn, authInSz); + } + else + #endif + { + /* Encrypt the plaintext. */ + AesGcmCryptUpdate_C(aes, out, in, sz); + /* Update the authenication tag with any authentication data and the + * new cipher text. */ + GHASH_UPDATE(aes, authIn, authInSz, out, sz); + } + } + + return ret; +} + +/* Finalize the AES GCM for encryption and return the authentication tag. + * + * Must set key and IV before calling this function. + * Must call wc_AesGcmInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [out] authTag Buffer to hold authentication tag. + * @param [in] authTagSz Length of authentication tag in bytes. + * @return 0 on success. + */ +int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, word32 authTagSz) +{ + int ret = 0; + + /* Check validity of parameters. */ + if ((aes == NULL) || (authTag == NULL) || (authTagSz > AES_BLOCK_SIZE) || + (authTagSz == 0)) { + ret = BAD_FUNC_ARG; + } + + /* Check key has been set. */ + if ((ret == 0) && (!aes->gcmKeySet)) { + ret = MISSING_KEY; + } + /* Check IV has been set. */ + if ((ret == 0) && (!aes->nonceSet)) { + ret = MISSING_IV; + } + + if (ret == 0) { + /* Calculate authentication tag. */ + #ifdef WOLFSSL_AESNI + if (haveAESNI + #ifdef HAVE_INTEL_AVX2 + || IS_INTEL_AVX2(intel_flags) + #endif + #ifdef HAVE_INTEL_AVX1 + || IS_INTEL_AVX1(intel_flags) + #endif + ) { + AesGcmEncryptFinal_aesni(aes, authTag, authTagSz); + } + else + #endif + { + AesGcmFinal_C(aes, authTag, authTagSz); + } + } + + if ((ret == 0) && aes->ctrSet) { + IncCtr((byte*)aes->reg, aes->nonceSz); + } + + return ret; +} + +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) +/* Initialize an AES GCM cipher for decryption. + * + * Must call wc_AesInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [in] key Buffer holding key. + * @param [in] len Length of key in bytes. + * @param [in] iv Buffer holding IV/nonce. + * @param [in] ivSz Length of IV/nonce in bytes. + * @return 0 on success. + * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer + * is NULL, or the IV is NULL and no previous IV has been set. + */ +int wc_AesGcmDecryptInit(Aes* aes, const byte* key, word32 len, const byte* iv, + word32 ivSz) +{ + return wc_AesGcmInit(aes, key, len, iv, ivSz); +} + +/* Update the AES GCM for decryption with data and/or authentication data. + * + * All the AAD must be passed to update before the cipher text. + * Last part of AAD can be passed with first part of cipher text. + * + * Must set key and IV before calling this function. + * Must call wc_AesGcmInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [out] out Buffer to hold plaintext. + * @param [in] in Buffer holding cipher text. + * @param [in] sz Length of cipher text in bytes. + * @param [in] authIn Buffer holding authentication data. + * @param [in] authInSz Length of authentication data in bytes. + * @return 0 on success. + * @return BAD_FUNC_ARG when aes is NULL, or a length is non-zero but buffer + * is NULL. + */ +int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + + /* Check validity of parameters. */ + if ((aes == NULL) || ((authInSz > 0) && (authIn == NULL)) || ((sz > 0) && + ((out == NULL) || (in == NULL)))) { + ret = BAD_FUNC_ARG; + } + + /* Check key has been set. */ + if ((ret == 0) && (!aes->gcmKeySet)) { + ret = MISSING_KEY; + } + /* Check IV has been set. */ + if ((ret == 0) && (!aes->nonceSet)) { + ret = MISSING_IV; + } + + if (ret == 0) { + /* Decrypt with AAD and/or cipher text. */ + #if defined(WOLFSSL_AESNI) + if (haveAESNI + #ifdef HAVE_INTEL_AVX2 + || IS_INTEL_AVX2(intel_flags) + #endif + #ifdef HAVE_INTEL_AVX1 + || IS_INTEL_AVX1(intel_flags) + #endif + ) { + AesGcmDecryptUpdate_aesni(aes, out, in, sz, authIn, authInSz); + } + else + #endif + { + /* Update the authenication tag with any authentication data and + * cipher text. */ + GHASH_UPDATE(aes, authIn, authInSz, in, sz); + /* Decrypt the cipher text. */ + AesGcmCryptUpdate_C(aes, out, in, sz); + } + } + + return ret; +} + +/* Finalize the AES GCM for decryption and check the authentication tag. + * + * Must set key and IV before calling this function. + * Must call wc_AesGcmInit() before calling this function. + * + * @param [in, out] aes AES object. + * @param [in] authTag Buffer holding authentication tag. + * @param [in] authTagSz Length of authentication tag in bytes. + * @return 0 on success. + */ +int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, word32 authTagSz) +{ + int ret = 0; + + /* Check validity of parameters. */ + if ((aes == NULL) || (authTag == NULL) || (authTagSz > AES_BLOCK_SIZE) || + (authTagSz == 0)) { + ret = BAD_FUNC_ARG; + } + + /* Check key has been set. */ + if ((ret == 0) && (!aes->gcmKeySet)) { + ret = MISSING_KEY; + } + /* Check IV has been set. */ + if ((ret == 0) && (!aes->nonceSet)) { + ret = MISSING_IV; + } + + if (ret == 0) { + /* Calculate authentication tag and compare with one passed in.. */ + #ifdef WOLFSSL_AESNI + if (haveAESNI + #ifdef HAVE_INTEL_AVX2 + || IS_INTEL_AVX2(intel_flags) + #endif + #ifdef HAVE_INTEL_AVX1 + || IS_INTEL_AVX1(intel_flags) + #endif + ) { + ret = AesGcmDecryptFinal_aesni(aes, authTag, authTagSz); + } + else + #endif + { + ALIGN32 byte calcTag[AES_BLOCK_SIZE]; + /* Calculate authentication tag. */ + AesGcmFinal_C(aes, calcTag, authTagSz); + /* Check calculated tag matches the one passed in. */ + if (ConstantCompare(authTag, calcTag, authTagSz) != 0) { + ret = AES_GCM_AUTH_E; + } + } + } + + return ret; +} +#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ +#endif /* WOLFSSL_AESGCM_STREAM */ #endif /* WOLFSSL_XILINX_CRYPT */ #endif /* end of block for AESGCM implementation selection */ @@ -7411,6 +8933,9 @@ int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz) * counter to 32-bits. (SP 800-38D 8.3) */ aes->invokeCtr[0] = 0; aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; + #ifdef WOLFSSL_AESGCM_STREAM + aes->ctrSet = 1; + #endif aes->nonceSz = ivSz; } @@ -7448,6 +8973,9 @@ int wc_AesGcmSetIV(Aes* aes, word32 ivSz, * counter to 32-bits. (SP 800-38D 8.3) */ aes->invokeCtr[0] = 0; aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; + #ifdef WOLFSSL_AESGCM_STREAM + aes->ctrSet = 1; + #endif aes->nonceSz = ivSz; } @@ -7734,7 +9262,7 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, if (status != 0) { return status; } - + status = wolfSSL_CryptHwMutexLock(); if (status != 0) return status; @@ -8210,6 +9738,18 @@ int wc_AesInit(Aes* aes, void* heap, int devId) aes->aadLen = 0; #endif #endif + +#ifdef WOLFSSL_AESGCM_STREAM +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_AESNI) + aes->streamData = NULL; +#endif + aes->keylen = 0; + aes->nonceSz = 0; + aes->gcmKeySet = 0; + aes->nonceSet = 0; + aes->ctrSet = 0; +#endif + return ret; } @@ -8288,6 +9828,12 @@ void wc_AesFree(Aes* aes) #if defined(WOLFSSL_IMXRT_DCP) DCPAesFree(aes); #endif +#if defined(WOLFSSL_AESGCM_STREAM) && defined(WOLFSSL_SMALL_STACK) && \ + !defined(WOLFSSL_AESNI) + if (aes->streamData != NULL) { + XFREE(aes->streamData, aes->heap, DYNAMIC_TYPE_AES); + } +#endif } diff --git a/wolfcrypt/src/aes_gcm_asm.S b/wolfcrypt/src/aes_gcm_asm.S index 53546af66..1a1641056 100644 --- a/wolfcrypt/src/aes_gcm_asm.S +++ b/wolfcrypt/src/aes_gcm_asm.S @@ -3418,6 +3418,2778 @@ L_AES_GCM_decrypt_cmp_tag_done: #ifndef __APPLE__ .size AES_GCM_decrypt,.-AES_GCM_decrypt #endif /* __APPLE__ */ +#ifdef WOLFSSL_AESGCM_STREAM +#ifndef __APPLE__ +.text +.globl AES_GCM_init_aesni +.type AES_GCM_init_aesni,@function +.align 16 +AES_GCM_init_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_init_aesni +.p2align 4 +_AES_GCM_init_aesni: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdx, %r10 + movl %ecx, %r11d + movq 32(%rsp), %rax + subq $16, %rsp + pxor %xmm4, %xmm4 + movl %r11d, %edx + cmpl $12, %edx + jne L_AES_GCM_init_aesni_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + pinsrq $0x00, (%r10), %xmm4 + pinsrd $2, 8(%r10), %xmm4 + pinsrd $3, %ecx, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + movdqa %xmm4, %xmm1 + movdqa (%rdi), %xmm5 + pxor %xmm5, %xmm1 + movdqa 16(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 32(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 48(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 64(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 80(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 96(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 112(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 128(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 144(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $11, %esi + movdqa 160(%rdi), %xmm7 + jl L_AES_GCM_init_aesni_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 176(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + cmpl $13, %esi + movdqa 192(%rdi), %xmm7 + jl L_AES_GCM_init_aesni_calc_iv_12_last + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 208(%rdi), %xmm7 + aesenc %xmm7, %xmm5 + aesenc %xmm7, %xmm1 + movdqa 224(%rdi), %xmm7 +L_AES_GCM_init_aesni_calc_iv_12_last: + aesenclast %xmm7, %xmm5 + aesenclast %xmm7, %xmm1 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm5 + movdqa %xmm1, %xmm15 + jmp L_AES_GCM_init_aesni_iv_done +L_AES_GCM_init_aesni_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + movdqa (%rdi), %xmm5 + aesenc 16(%rdi), %xmm5 + aesenc 32(%rdi), %xmm5 + aesenc 48(%rdi), %xmm5 + aesenc 64(%rdi), %xmm5 + aesenc 80(%rdi), %xmm5 + aesenc 96(%rdi), %xmm5 + aesenc 112(%rdi), %xmm5 + aesenc 128(%rdi), %xmm5 + aesenc 144(%rdi), %xmm5 + cmpl $11, %esi + movdqa 160(%rdi), %xmm9 + jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last + aesenc %xmm9, %xmm5 + aesenc 176(%rdi), %xmm5 + cmpl $13, %esi + movdqa 192(%rdi), %xmm9 + jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last + aesenc %xmm9, %xmm5 + aesenc 208(%rdi), %xmm5 + movdqa 224(%rdi), %xmm9 +L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last: + aesenclast %xmm9, %xmm5 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_init_aesni_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_init_aesni_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_init_aesni_calc_iv_16_loop: + movdqu (%r10,%rcx,1), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_init_aesni_calc_iv_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_init_aesni_calc_iv_done +L_AES_GCM_init_aesni_calc_iv_lt16: + subq $16, %rsp + pxor %xmm8, %xmm8 + xorl %r13d, %r13d + movdqa %xmm8, (%rsp) +L_AES_GCM_init_aesni_calc_iv_loop: + movzbl (%r10,%rcx,1), %r12d + movb %r12b, (%rsp,%r13,1) + incl %ecx + incl %r13d + cmpl %edx, %ecx + jl L_AES_GCM_init_aesni_calc_iv_loop + movdqa (%rsp), %xmm8 + addq $16, %rsp + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 +L_AES_GCM_init_aesni_calc_iv_done: + # T = Encrypt counter + pxor %xmm0, %xmm0 + shll $3, %edx + pinsrq $0x00, %rdx, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm7 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm7 + pxor %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm7 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm7 + por %xmm1, %xmm4 + movdqa %xmm7, %xmm0 + movdqa %xmm7, %xmm1 + movdqa %xmm7, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm7 + movdqa %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm7, %xmm2 + pxor %xmm2, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm4 + # Encrypt counter + movdqa (%rdi), %xmm8 + pxor %xmm4, %xmm8 + aesenc 16(%rdi), %xmm8 + aesenc 32(%rdi), %xmm8 + aesenc 48(%rdi), %xmm8 + aesenc 64(%rdi), %xmm8 + aesenc 80(%rdi), %xmm8 + aesenc 96(%rdi), %xmm8 + aesenc 112(%rdi), %xmm8 + aesenc 128(%rdi), %xmm8 + aesenc 144(%rdi), %xmm8 + cmpl $11, %esi + movdqa 160(%rdi), %xmm9 + jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 176(%rdi), %xmm8 + cmpl $13, %esi + movdqa 192(%rdi), %xmm9 + jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 208(%rdi), %xmm8 + movdqa 224(%rdi), %xmm9 +L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last: + aesenclast %xmm9, %xmm8 + movdqa %xmm8, %xmm15 +L_AES_GCM_init_aesni_iv_done: + movdqa %xmm15, (%rax) + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm4 + paddd L_aes_gcm_one(%rip), %xmm4 + movdqa %xmm5, (%r8) + movdqa %xmm4, (%r9) + addq $16, %rsp + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_GCM_init_aesni,.-AES_GCM_init_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_aad_update_aesni +.type AES_GCM_aad_update_aesni,@function +.align 16 +AES_GCM_aad_update_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_aad_update_aesni +.p2align 4 +_AES_GCM_aad_update_aesni: +#endif /* __APPLE__ */ + movq %rcx, %rax + movdqa (%rdx), %xmm5 + movdqa (%rax), %xmm6 + xorl %ecx, %ecx +L_AES_GCM_aad_update_aesni_16_loop: + movdqu (%rdi,%rcx,1), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm5 + pshufd $0x4e, %xmm5, %xmm1 + pshufd $0x4e, %xmm6, %xmm2 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + pclmulqdq $0x11, %xmm5, %xmm3 + pclmulqdq $0x00, %xmm5, %xmm0 + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm4 + movdqa %xmm3, %xmm5 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm5 + movdqa %xmm4, %xmm0 + movdqa %xmm5, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm4 + pslld $0x01, %xmm5 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm5 + por %xmm0, %xmm4 + por %xmm1, %xmm5 + movdqa %xmm4, %xmm0 + movdqa %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm4 + movdqa %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm4, %xmm2 + pxor %xmm2, %xmm5 + addl $16, %ecx + cmpl %esi, %ecx + jl L_AES_GCM_aad_update_aesni_16_loop + movdqa %xmm5, (%rdx) + repz retq +#ifndef __APPLE__ +.size AES_GCM_aad_update_aesni,.-AES_GCM_aad_update_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_block_aesni +.type AES_GCM_encrypt_block_aesni,@function +.align 16 +AES_GCM_encrypt_block_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_block_aesni +.p2align 4 +_AES_GCM_encrypt_block_aesni: +#endif /* __APPLE__ */ + movq %rdx, %r10 + movq %rcx, %r11 + movdqa (%r8), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%rdi), %xmm8 + movdqa %xmm9, (%r8) + aesenc 16(%rdi), %xmm8 + aesenc 32(%rdi), %xmm8 + aesenc 48(%rdi), %xmm8 + aesenc 64(%rdi), %xmm8 + aesenc 80(%rdi), %xmm8 + aesenc 96(%rdi), %xmm8 + aesenc 112(%rdi), %xmm8 + aesenc 128(%rdi), %xmm8 + aesenc 144(%rdi), %xmm8 + cmpl $11, %esi + movdqa 160(%rdi), %xmm9 + jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 176(%rdi), %xmm8 + cmpl $13, %esi + movdqa 192(%rdi), %xmm9 + jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 208(%rdi), %xmm8 + movdqa 224(%rdi), %xmm9 +L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last: + aesenclast %xmm9, %xmm8 + movdqu (%r11), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%r10) + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_block_aesni,.-AES_GCM_encrypt_block_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_ghash_block_aesni +.type AES_GCM_ghash_block_aesni,@function +.align 16 +AES_GCM_ghash_block_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_ghash_block_aesni +.p2align 4 +_AES_GCM_ghash_block_aesni: +#endif /* __APPLE__ */ + movdqa (%rsi), %xmm4 + movdqa (%rdx), %xmm5 + movdqu (%rdi), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm4 + pshufd $0x4e, %xmm4, %xmm1 + pshufd $0x4e, %xmm5, %xmm2 + movdqa %xmm5, %xmm3 + movdqa %xmm5, %xmm0 + pclmulqdq $0x11, %xmm4, %xmm3 + pclmulqdq $0x00, %xmm4, %xmm0 + pxor %xmm4, %xmm1 + pxor %xmm5, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm6 + movdqa %xmm3, %xmm4 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm6 + pxor %xmm1, %xmm4 + movdqa %xmm6, %xmm0 + movdqa %xmm4, %xmm1 + psrld $31, %xmm0 + psrld $31, %xmm1 + pslld $0x01, %xmm6 + pslld $0x01, %xmm4 + movdqa %xmm0, %xmm2 + pslldq $4, %xmm0 + psrldq $12, %xmm2 + pslldq $4, %xmm1 + por %xmm2, %xmm4 + por %xmm0, %xmm6 + por %xmm1, %xmm4 + movdqa %xmm6, %xmm0 + movdqa %xmm6, %xmm1 + movdqa %xmm6, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm6 + movdqa %xmm6, %xmm2 + movdqa %xmm6, %xmm3 + movdqa %xmm6, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm6, %xmm2 + pxor %xmm2, %xmm4 + movdqa %xmm4, (%rsi) + repz retq +#ifndef __APPLE__ +.size AES_GCM_ghash_block_aesni,.-AES_GCM_ghash_block_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_update_aesni +.type AES_GCM_encrypt_update_aesni,@function +.align 16 +AES_GCM_encrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_update_aesni +.p2align 4 +_AES_GCM_encrypt_update_aesni: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r14 + movq %rdx, %r10 + movq %rcx, %r11 + movq 32(%rsp), %rax + movq 40(%rsp), %r12 + subq $0xa0, %rsp + movdqa (%r9), %xmm6 + movdqa (%rax), %xmm5 + movdqa %xmm5, %xmm9 + movdqa %xmm5, %xmm8 + psrlq $63, %xmm9 + psllq $0x01, %xmm8 + pslldq $8, %xmm9 + por %xmm9, %xmm8 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128(%rip), %xmm5 + pxor %xmm8, %xmm5 + xorq %r14, %r14 + cmpl $0x80, %r8d + movl %r8d, %r13d + jl L_AES_GCM_encrypt_update_aesni_done_128 + andl $0xffffff80, %r13d + movdqa %xmm6, %xmm2 + # H ^ 1 + movdqa %xmm5, (%rsp) + # H ^ 2 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm5, %xmm10 + movdqa %xmm5, %xmm11 + movdqa %xmm5, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm5, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm0 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm0 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm0 + movdqa %xmm0, 16(%rsp) + # H ^ 3 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm1 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm1 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm1 + movdqa %xmm1, 32(%rsp) + # H ^ 4 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm3 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm3 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm3 + movdqa %xmm3, 48(%rsp) + # H ^ 5 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 64(%rsp) + # H ^ 6 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 80(%rsp) + # H ^ 7 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 96(%rsp) + # H ^ 8 + pshufd $0x4e, %xmm3, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm3, %xmm11 + pclmulqdq $0x00, %xmm3, %xmm8 + pxor %xmm3, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 112(%rsp) + # First 128 bytes of input + movdqa (%r12), %xmm8 + movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1 + movdqa %xmm8, %xmm0 + pshufb %xmm1, %xmm8 + movdqa %xmm0, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm9 + pshufb %xmm1, %xmm9 + movdqa %xmm0, %xmm10 + paddd L_aes_gcm_two(%rip), %xmm10 + pshufb %xmm1, %xmm10 + movdqa %xmm0, %xmm11 + paddd L_aes_gcm_three(%rip), %xmm11 + pshufb %xmm1, %xmm11 + movdqa %xmm0, %xmm12 + paddd L_aes_gcm_four(%rip), %xmm12 + pshufb %xmm1, %xmm12 + movdqa %xmm0, %xmm13 + paddd L_aes_gcm_five(%rip), %xmm13 + pshufb %xmm1, %xmm13 + movdqa %xmm0, %xmm14 + paddd L_aes_gcm_six(%rip), %xmm14 + pshufb %xmm1, %xmm14 + movdqa %xmm0, %xmm15 + paddd L_aes_gcm_seven(%rip), %xmm15 + pshufb %xmm1, %xmm15 + paddd L_aes_gcm_eight(%rip), %xmm0 + movdqa (%rdi), %xmm7 + movdqa %xmm0, (%r12) + pxor %xmm7, %xmm8 + pxor %xmm7, %xmm9 + pxor %xmm7, %xmm10 + pxor %xmm7, %xmm11 + pxor %xmm7, %xmm12 + pxor %xmm7, %xmm13 + pxor %xmm7, %xmm14 + pxor %xmm7, %xmm15 + movdqa 16(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 32(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 48(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 64(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 80(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 96(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 112(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 128(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 144(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $11, %esi + movdqa 160(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_enc_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 176(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $13, %esi + movdqa 192(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_enc_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 208(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 224(%rdi), %xmm7 +L_AES_GCM_encrypt_update_aesni_enc_done: + aesenclast %xmm7, %xmm8 + aesenclast %xmm7, %xmm9 + movdqu (%r11), %xmm0 + movdqu 16(%r11), %xmm1 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + movdqu %xmm8, (%r10) + movdqu %xmm9, 16(%r10) + aesenclast %xmm7, %xmm10 + aesenclast %xmm7, %xmm11 + movdqu 32(%r11), %xmm0 + movdqu 48(%r11), %xmm1 + pxor %xmm0, %xmm10 + pxor %xmm1, %xmm11 + movdqu %xmm10, 32(%r10) + movdqu %xmm11, 48(%r10) + aesenclast %xmm7, %xmm12 + aesenclast %xmm7, %xmm13 + movdqu 64(%r11), %xmm0 + movdqu 80(%r11), %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + movdqu %xmm12, 64(%r10) + movdqu %xmm13, 80(%r10) + aesenclast %xmm7, %xmm14 + aesenclast %xmm7, %xmm15 + movdqu 96(%r11), %xmm0 + movdqu 112(%r11), %xmm1 + pxor %xmm0, %xmm14 + pxor %xmm1, %xmm15 + movdqu %xmm14, 96(%r10) + movdqu %xmm15, 112(%r10) + cmpl $0x80, %r13d + movl $0x80, %r14d + jle L_AES_GCM_encrypt_update_aesni_end_128 + # More 128 bytes of input +L_AES_GCM_encrypt_update_aesni_ghash_128: + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + movdqa (%r12), %xmm8 + movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1 + movdqa %xmm8, %xmm0 + pshufb %xmm1, %xmm8 + movdqa %xmm0, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm9 + pshufb %xmm1, %xmm9 + movdqa %xmm0, %xmm10 + paddd L_aes_gcm_two(%rip), %xmm10 + pshufb %xmm1, %xmm10 + movdqa %xmm0, %xmm11 + paddd L_aes_gcm_three(%rip), %xmm11 + pshufb %xmm1, %xmm11 + movdqa %xmm0, %xmm12 + paddd L_aes_gcm_four(%rip), %xmm12 + pshufb %xmm1, %xmm12 + movdqa %xmm0, %xmm13 + paddd L_aes_gcm_five(%rip), %xmm13 + pshufb %xmm1, %xmm13 + movdqa %xmm0, %xmm14 + paddd L_aes_gcm_six(%rip), %xmm14 + pshufb %xmm1, %xmm14 + movdqa %xmm0, %xmm15 + paddd L_aes_gcm_seven(%rip), %xmm15 + pshufb %xmm1, %xmm15 + paddd L_aes_gcm_eight(%rip), %xmm0 + movdqa (%rdi), %xmm7 + movdqa %xmm0, (%r12) + pxor %xmm7, %xmm8 + pxor %xmm7, %xmm9 + pxor %xmm7, %xmm10 + pxor %xmm7, %xmm11 + pxor %xmm7, %xmm12 + pxor %xmm7, %xmm13 + pxor %xmm7, %xmm14 + pxor %xmm7, %xmm15 + movdqa 112(%rsp), %xmm7 + movdqu -128(%rdx), %xmm0 + aesenc 16(%rdi), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + aesenc 16(%rdi), %xmm9 + aesenc 16(%rdi), %xmm10 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + aesenc 16(%rdi), %xmm11 + aesenc 16(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm1 + aesenc 16(%rdi), %xmm13 + aesenc 16(%rdi), %xmm14 + aesenc 16(%rdi), %xmm15 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa 96(%rsp), %xmm7 + movdqu -112(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 32(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 32(%rdi), %xmm9 + aesenc 32(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 32(%rdi), %xmm11 + aesenc 32(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 32(%rdi), %xmm13 + aesenc 32(%rdi), %xmm14 + aesenc 32(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 80(%rsp), %xmm7 + movdqu -96(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 48(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 48(%rdi), %xmm9 + aesenc 48(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 48(%rdi), %xmm11 + aesenc 48(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 48(%rdi), %xmm13 + aesenc 48(%rdi), %xmm14 + aesenc 48(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 64(%rsp), %xmm7 + movdqu -80(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 64(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 64(%rdi), %xmm9 + aesenc 64(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 64(%rdi), %xmm11 + aesenc 64(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 64(%rdi), %xmm13 + aesenc 64(%rdi), %xmm14 + aesenc 64(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 48(%rsp), %xmm7 + movdqu -64(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 80(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 80(%rdi), %xmm9 + aesenc 80(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 80(%rdi), %xmm11 + aesenc 80(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 80(%rdi), %xmm13 + aesenc 80(%rdi), %xmm14 + aesenc 80(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 32(%rsp), %xmm7 + movdqu -48(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 96(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 96(%rdi), %xmm9 + aesenc 96(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 96(%rdi), %xmm11 + aesenc 96(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 96(%rdi), %xmm13 + aesenc 96(%rdi), %xmm14 + aesenc 96(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 16(%rsp), %xmm7 + movdqu -32(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 112(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 112(%rdi), %xmm9 + aesenc 112(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 112(%rdi), %xmm11 + aesenc 112(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 112(%rdi), %xmm13 + aesenc 112(%rdi), %xmm14 + aesenc 112(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa (%rsp), %xmm7 + movdqu -16(%rdx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 128(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 128(%rdi), %xmm9 + aesenc 128(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 128(%rdi), %xmm11 + aesenc 128(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 128(%rdi), %xmm13 + aesenc 128(%rdi), %xmm14 + aesenc 128(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + aesenc 144(%rdi), %xmm8 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + aesenc 144(%rdi), %xmm9 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + aesenc 144(%rdi), %xmm10 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + aesenc 144(%rdi), %xmm11 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + aesenc 144(%rdi), %xmm12 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + aesenc 144(%rdi), %xmm13 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + aesenc 144(%rdi), %xmm14 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + aesenc 144(%rdi), %xmm15 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + cmpl $11, %esi + movdqa 160(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 176(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $13, %esi + movdqa 192(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 208(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 224(%rdi), %xmm7 +L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done: + aesenclast %xmm7, %xmm8 + aesenclast %xmm7, %xmm9 + movdqu (%rcx), %xmm0 + movdqu 16(%rcx), %xmm1 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + aesenclast %xmm7, %xmm10 + aesenclast %xmm7, %xmm11 + movdqu 32(%rcx), %xmm0 + movdqu 48(%rcx), %xmm1 + pxor %xmm0, %xmm10 + pxor %xmm1, %xmm11 + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + aesenclast %xmm7, %xmm12 + aesenclast %xmm7, %xmm13 + movdqu 64(%rcx), %xmm0 + movdqu 80(%rcx), %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + movdqu %xmm12, 64(%rdx) + movdqu %xmm13, 80(%rdx) + aesenclast %xmm7, %xmm14 + aesenclast %xmm7, %xmm15 + movdqu 96(%rcx), %xmm0 + movdqu 112(%rcx), %xmm1 + pxor %xmm0, %xmm14 + pxor %xmm1, %xmm15 + movdqu %xmm14, 96(%rdx) + movdqu %xmm15, 112(%rdx) + addl $0x80, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_encrypt_update_aesni_ghash_128 +L_AES_GCM_encrypt_update_aesni_end_128: + movdqa L_aes_gcm_bswap_mask(%rip), %xmm4 + pshufb %xmm4, %xmm8 + pshufb %xmm4, %xmm9 + pshufb %xmm4, %xmm10 + pshufb %xmm4, %xmm11 + pxor %xmm2, %xmm8 + pshufb %xmm4, %xmm12 + pshufb %xmm4, %xmm13 + pshufb %xmm4, %xmm14 + pshufb %xmm4, %xmm15 + movdqa 112(%rsp), %xmm7 + pshufd $0x4e, %xmm8, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm8, %xmm3 + pclmulqdq $0x00, %xmm8, %xmm0 + pxor %xmm8, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + movdqa %xmm0, %xmm4 + movdqa %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 96(%rsp), %xmm7 + pshufd $0x4e, %xmm9, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm9, %xmm3 + pclmulqdq $0x00, %xmm9, %xmm0 + pxor %xmm9, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 80(%rsp), %xmm7 + pshufd $0x4e, %xmm10, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm10, %xmm3 + pclmulqdq $0x00, %xmm10, %xmm0 + pxor %xmm10, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 64(%rsp), %xmm7 + pshufd $0x4e, %xmm11, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm11, %xmm3 + pclmulqdq $0x00, %xmm11, %xmm0 + pxor %xmm11, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 48(%rsp), %xmm7 + pshufd $0x4e, %xmm12, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm12, %xmm3 + pclmulqdq $0x00, %xmm12, %xmm0 + pxor %xmm12, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 32(%rsp), %xmm7 + pshufd $0x4e, %xmm13, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm13, %xmm3 + pclmulqdq $0x00, %xmm13, %xmm0 + pxor %xmm13, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa 16(%rsp), %xmm7 + pshufd $0x4e, %xmm14, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm14, %xmm3 + pclmulqdq $0x00, %xmm14, %xmm0 + pxor %xmm14, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa (%rsp), %xmm7 + pshufd $0x4e, %xmm15, %xmm1 + pshufd $0x4e, %xmm7, %xmm2 + movdqa %xmm7, %xmm3 + movdqa %xmm7, %xmm0 + pclmulqdq $0x11, %xmm15, %xmm3 + pclmulqdq $0x00, %xmm15, %xmm0 + pxor %xmm15, %xmm1 + pxor %xmm7, %xmm2 + pclmulqdq $0x00, %xmm2, %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm3, %xmm1 + movdqa %xmm1, %xmm2 + pxor %xmm0, %xmm4 + pxor %xmm3, %xmm6 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + pxor %xmm2, %xmm4 + pxor %xmm1, %xmm6 + movdqa %xmm4, %xmm0 + movdqa %xmm4, %xmm1 + movdqa %xmm4, %xmm2 + pslld $31, %xmm0 + pslld $30, %xmm1 + pslld $25, %xmm2 + pxor %xmm1, %xmm0 + pxor %xmm2, %xmm0 + movdqa %xmm0, %xmm1 + psrldq $4, %xmm1 + pslldq $12, %xmm0 + pxor %xmm0, %xmm4 + movdqa %xmm4, %xmm2 + movdqa %xmm4, %xmm3 + movdqa %xmm4, %xmm0 + psrld $0x01, %xmm2 + psrld $2, %xmm3 + psrld $7, %xmm0 + pxor %xmm3, %xmm2 + pxor %xmm0, %xmm2 + pxor %xmm1, %xmm2 + pxor %xmm4, %xmm2 + pxor %xmm2, %xmm6 + movdqa (%rsp), %xmm5 +L_AES_GCM_encrypt_update_aesni_done_128: + movl %r8d, %edx + cmpl %edx, %r14d + jge L_AES_GCM_encrypt_update_aesni_done_enc + movl %r8d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %r14d + jge L_AES_GCM_encrypt_update_aesni_last_block_done + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + movdqa (%r12), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%rdi), %xmm8 + movdqa %xmm9, (%r12) + aesenc 16(%rdi), %xmm8 + aesenc 32(%rdi), %xmm8 + aesenc 48(%rdi), %xmm8 + aesenc 64(%rdi), %xmm8 + aesenc 80(%rdi), %xmm8 + aesenc 96(%rdi), %xmm8 + aesenc 112(%rdi), %xmm8 + aesenc 128(%rdi), %xmm8 + aesenc 144(%rdi), %xmm8 + cmpl $11, %esi + movdqa 160(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 176(%rdi), %xmm8 + cmpl $13, %esi + movdqa 192(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last + aesenc %xmm9, %xmm8 + aesenc 208(%rdi), %xmm8 + movdqa 224(%rdi), %xmm9 +L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last: + aesenclast %xmm9, %xmm8 + movdqu (%rcx), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%rdx) + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + addl $16, %r14d + cmpl %r13d, %r14d + jge L_AES_GCM_encrypt_update_aesni_last_block_ghash +L_AES_GCM_encrypt_update_aesni_last_block_start: + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + movdqa (%r12), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%rdi), %xmm8 + movdqa %xmm9, (%r12) + movdqa %xmm6, %xmm10 + pclmulqdq $16, %xmm5, %xmm10 + aesenc 16(%rdi), %xmm8 + aesenc 32(%rdi), %xmm8 + movdqa %xmm6, %xmm11 + pclmulqdq $0x01, %xmm5, %xmm11 + aesenc 48(%rdi), %xmm8 + aesenc 64(%rdi), %xmm8 + movdqa %xmm6, %xmm12 + pclmulqdq $0x00, %xmm5, %xmm12 + aesenc 80(%rdi), %xmm8 + movdqa %xmm6, %xmm1 + pclmulqdq $0x11, %xmm5, %xmm1 + aesenc 96(%rdi), %xmm8 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm2 + psrldq $8, %xmm10 + pslldq $8, %xmm2 + aesenc 112(%rdi), %xmm8 + movdqa %xmm1, %xmm3 + pxor %xmm12, %xmm2 + pxor %xmm10, %xmm3 + movdqa L_aes_gcm_mod2_128(%rip), %xmm0 + movdqa %xmm2, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 128(%rdi), %xmm8 + pshufd $0x4e, %xmm2, %xmm10 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 144(%rdi), %xmm8 + pshufd $0x4e, %xmm10, %xmm6 + pxor %xmm11, %xmm6 + pxor %xmm3, %xmm6 + cmpl $11, %esi + movdqa 160(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 176(%rdi), %xmm8 + cmpl $13, %esi + movdqa 192(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 208(%rdi), %xmm8 + movdqa 224(%rdi), %xmm9 +L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last: + aesenclast %xmm9, %xmm8 + movdqu (%rcx), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%rdx) + pshufb L_aes_gcm_bswap_mask(%rip), %xmm8 + pxor %xmm8, %xmm6 + addl $16, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_encrypt_update_aesni_last_block_start +L_AES_GCM_encrypt_update_aesni_last_block_ghash: + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 +L_AES_GCM_encrypt_update_aesni_last_block_done: +L_AES_GCM_encrypt_update_aesni_done_enc: + movdqa %xmm6, (%r9) + addq $0xa0, %rsp + popq %r14 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_update_aesni,.-AES_GCM_encrypt_update_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_final_aesni +.type AES_GCM_encrypt_final_aesni,@function +.align 16 +AES_GCM_encrypt_final_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_final_aesni +.p2align 4 +_AES_GCM_encrypt_final_aesni: +#endif /* __APPLE__ */ + pushq %r13 + movq %rdx, %rax + movl %ecx, %r10d + movl %r8d, %r11d + movq 16(%rsp), %r8 + subq $16, %rsp + movdqa (%rdi), %xmm4 + movdqa (%r9), %xmm5 + movdqa (%r8), %xmm6 + movdqa %xmm5, %xmm9 + movdqa %xmm5, %xmm8 + psrlq $63, %xmm9 + psllq $0x01, %xmm8 + pslldq $8, %xmm9 + por %xmm9, %xmm8 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128(%rip), %xmm5 + pxor %xmm8, %xmm5 + movl %r10d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + pinsrq $0x00, %rdx, %xmm0 + pinsrq $0x01, %rcx, %xmm0 + pxor %xmm0, %xmm4 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm4, %xmm10 + movdqa %xmm4, %xmm11 + movdqa %xmm4, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm4, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm4 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm4 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm4 + movdqa %xmm6, %xmm0 + pxor %xmm4, %xmm0 + cmpl $16, %eax + je L_AES_GCM_encrypt_final_aesni_store_tag_16 + xorq %rcx, %rcx + movdqa %xmm0, (%rsp) +L_AES_GCM_encrypt_final_aesni_store_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + movb %r13b, (%rsi,%rcx,1) + incl %ecx + cmpl %eax, %ecx + jne L_AES_GCM_encrypt_final_aesni_store_tag_loop + jmp L_AES_GCM_encrypt_final_aesni_store_tag_done +L_AES_GCM_encrypt_final_aesni_store_tag_16: + movdqu %xmm0, (%rsi) +L_AES_GCM_encrypt_final_aesni_store_tag_done: + addq $16, %rsp + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_final_aesni,.-AES_GCM_encrypt_final_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_update_aesni +.type AES_GCM_decrypt_update_aesni,@function +.align 16 +AES_GCM_decrypt_update_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_update_aesni +.p2align 4 +_AES_GCM_decrypt_update_aesni: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r14 + pushq %r15 + movq %rdx, %r10 + movq %rcx, %r11 + movq 40(%rsp), %rax + movq 48(%rsp), %r12 + subq $0xa8, %rsp + movdqa (%r9), %xmm6 + movdqa (%rax), %xmm5 + movdqa %xmm5, %xmm9 + movdqa %xmm5, %xmm8 + psrlq $63, %xmm9 + psllq $0x01, %xmm8 + pslldq $8, %xmm9 + por %xmm9, %xmm8 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128(%rip), %xmm5 + pxor %xmm8, %xmm5 + xorl %r14d, %r14d + cmpl $0x80, %r8d + movl %r8d, %r13d + jl L_AES_GCM_decrypt_update_aesni_done_128 + andl $0xffffff80, %r13d + movdqa %xmm6, %xmm2 + # H ^ 1 + movdqa %xmm5, (%rsp) + # H ^ 2 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm5, %xmm10 + movdqa %xmm5, %xmm11 + movdqa %xmm5, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm5, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm0 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm0 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm0 + movdqa %xmm0, 16(%rsp) + # H ^ 3 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm1 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm1 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm1 + movdqa %xmm1, 32(%rsp) + # H ^ 4 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm0, %xmm10 + movdqa %xmm0, %xmm11 + movdqa %xmm0, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm0, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm3 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm3 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm3 + movdqa %xmm3, 48(%rsp) + # H ^ 5 + pshufd $0x4e, %xmm0, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm0, %xmm11 + pclmulqdq $0x00, %xmm0, %xmm8 + pxor %xmm0, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 64(%rsp) + # H ^ 6 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm1, %xmm10 + movdqa %xmm1, %xmm11 + movdqa %xmm1, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm1, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 80(%rsp) + # H ^ 7 + pshufd $0x4e, %xmm1, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm1, %xmm11 + pclmulqdq $0x00, %xmm1, %xmm8 + pxor %xmm1, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 96(%rsp) + # H ^ 8 + pshufd $0x4e, %xmm3, %xmm9 + pshufd $0x4e, %xmm3, %xmm10 + movdqa %xmm3, %xmm11 + movdqa %xmm3, %xmm8 + pclmulqdq $0x11, %xmm3, %xmm11 + pclmulqdq $0x00, %xmm3, %xmm8 + pxor %xmm3, %xmm9 + pxor %xmm3, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm7 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm7 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm7 + movdqa %xmm7, 112(%rsp) +L_AES_GCM_decrypt_update_aesni_ghash_128: + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + movdqa (%r12), %xmm8 + movdqa L_aes_gcm_bswap_epi64(%rip), %xmm1 + movdqa %xmm8, %xmm0 + pshufb %xmm1, %xmm8 + movdqa %xmm0, %xmm9 + paddd L_aes_gcm_one(%rip), %xmm9 + pshufb %xmm1, %xmm9 + movdqa %xmm0, %xmm10 + paddd L_aes_gcm_two(%rip), %xmm10 + pshufb %xmm1, %xmm10 + movdqa %xmm0, %xmm11 + paddd L_aes_gcm_three(%rip), %xmm11 + pshufb %xmm1, %xmm11 + movdqa %xmm0, %xmm12 + paddd L_aes_gcm_four(%rip), %xmm12 + pshufb %xmm1, %xmm12 + movdqa %xmm0, %xmm13 + paddd L_aes_gcm_five(%rip), %xmm13 + pshufb %xmm1, %xmm13 + movdqa %xmm0, %xmm14 + paddd L_aes_gcm_six(%rip), %xmm14 + pshufb %xmm1, %xmm14 + movdqa %xmm0, %xmm15 + paddd L_aes_gcm_seven(%rip), %xmm15 + pshufb %xmm1, %xmm15 + paddd L_aes_gcm_eight(%rip), %xmm0 + movdqa (%rdi), %xmm7 + movdqa %xmm0, (%r12) + pxor %xmm7, %xmm8 + pxor %xmm7, %xmm9 + pxor %xmm7, %xmm10 + pxor %xmm7, %xmm11 + pxor %xmm7, %xmm12 + pxor %xmm7, %xmm13 + pxor %xmm7, %xmm14 + pxor %xmm7, %xmm15 + movdqa 112(%rsp), %xmm7 + movdqu (%rcx), %xmm0 + aesenc 16(%rdi), %xmm8 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + pxor %xmm2, %xmm0 + pshufd $0x4e, %xmm7, %xmm1 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm7, %xmm1 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm3 + pclmulqdq $0x11, %xmm7, %xmm3 + aesenc 16(%rdi), %xmm9 + aesenc 16(%rdi), %xmm10 + movdqa %xmm0, %xmm2 + pclmulqdq $0x00, %xmm7, %xmm2 + aesenc 16(%rdi), %xmm11 + aesenc 16(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm1 + aesenc 16(%rdi), %xmm13 + aesenc 16(%rdi), %xmm14 + aesenc 16(%rdi), %xmm15 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqa 96(%rsp), %xmm7 + movdqu 16(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 32(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 32(%rdi), %xmm9 + aesenc 32(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 32(%rdi), %xmm11 + aesenc 32(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 32(%rdi), %xmm13 + aesenc 32(%rdi), %xmm14 + aesenc 32(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 80(%rsp), %xmm7 + movdqu 32(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 48(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 48(%rdi), %xmm9 + aesenc 48(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 48(%rdi), %xmm11 + aesenc 48(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 48(%rdi), %xmm13 + aesenc 48(%rdi), %xmm14 + aesenc 48(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 64(%rsp), %xmm7 + movdqu 48(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 64(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 64(%rdi), %xmm9 + aesenc 64(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 64(%rdi), %xmm11 + aesenc 64(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 64(%rdi), %xmm13 + aesenc 64(%rdi), %xmm14 + aesenc 64(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 48(%rsp), %xmm7 + movdqu 64(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 80(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 80(%rdi), %xmm9 + aesenc 80(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 80(%rdi), %xmm11 + aesenc 80(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 80(%rdi), %xmm13 + aesenc 80(%rdi), %xmm14 + aesenc 80(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 32(%rsp), %xmm7 + movdqu 80(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 96(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 96(%rdi), %xmm9 + aesenc 96(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 96(%rdi), %xmm11 + aesenc 96(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 96(%rdi), %xmm13 + aesenc 96(%rdi), %xmm14 + aesenc 96(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa 16(%rsp), %xmm7 + movdqu 96(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 112(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 112(%rdi), %xmm9 + aesenc 112(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 112(%rdi), %xmm11 + aesenc 112(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 112(%rdi), %xmm13 + aesenc 112(%rdi), %xmm14 + aesenc 112(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa (%rsp), %xmm7 + movdqu 112(%rcx), %xmm0 + pshufd $0x4e, %xmm7, %xmm4 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm0 + aesenc 128(%rdi), %xmm8 + pxor %xmm7, %xmm4 + pshufd $0x4e, %xmm0, %xmm5 + pxor %xmm0, %xmm5 + movdqa %xmm0, %xmm6 + pclmulqdq $0x11, %xmm7, %xmm6 + aesenc 128(%rdi), %xmm9 + aesenc 128(%rdi), %xmm10 + pclmulqdq $0x00, %xmm0, %xmm7 + aesenc 128(%rdi), %xmm11 + aesenc 128(%rdi), %xmm12 + pclmulqdq $0x00, %xmm5, %xmm4 + aesenc 128(%rdi), %xmm13 + aesenc 128(%rdi), %xmm14 + aesenc 128(%rdi), %xmm15 + pxor %xmm7, %xmm1 + pxor %xmm7, %xmm2 + pxor %xmm6, %xmm1 + pxor %xmm6, %xmm3 + pxor %xmm4, %xmm1 + movdqa %xmm1, %xmm5 + psrldq $8, %xmm1 + pslldq $8, %xmm5 + aesenc 144(%rdi), %xmm8 + pxor %xmm5, %xmm2 + pxor %xmm1, %xmm3 + movdqa %xmm2, %xmm7 + movdqa %xmm2, %xmm4 + movdqa %xmm2, %xmm5 + aesenc 144(%rdi), %xmm9 + pslld $31, %xmm7 + pslld $30, %xmm4 + pslld $25, %xmm5 + aesenc 144(%rdi), %xmm10 + pxor %xmm4, %xmm7 + pxor %xmm5, %xmm7 + aesenc 144(%rdi), %xmm11 + movdqa %xmm7, %xmm4 + pslldq $12, %xmm7 + psrldq $4, %xmm4 + aesenc 144(%rdi), %xmm12 + pxor %xmm7, %xmm2 + movdqa %xmm2, %xmm5 + movdqa %xmm2, %xmm1 + movdqa %xmm2, %xmm0 + aesenc 144(%rdi), %xmm13 + psrld $0x01, %xmm5 + psrld $2, %xmm1 + psrld $7, %xmm0 + aesenc 144(%rdi), %xmm14 + pxor %xmm1, %xmm5 + pxor %xmm0, %xmm5 + aesenc 144(%rdi), %xmm15 + pxor %xmm4, %xmm5 + pxor %xmm5, %xmm2 + pxor %xmm3, %xmm2 + cmpl $11, %esi + movdqa 160(%rdi), %xmm7 + jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 176(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + cmpl $13, %esi + movdqa 192(%rdi), %xmm7 + jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 208(%rdi), %xmm7 + aesenc %xmm7, %xmm8 + aesenc %xmm7, %xmm9 + aesenc %xmm7, %xmm10 + aesenc %xmm7, %xmm11 + aesenc %xmm7, %xmm12 + aesenc %xmm7, %xmm13 + aesenc %xmm7, %xmm14 + aesenc %xmm7, %xmm15 + movdqa 224(%rdi), %xmm7 +L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done: + aesenclast %xmm7, %xmm8 + aesenclast %xmm7, %xmm9 + movdqu (%rcx), %xmm0 + movdqu 16(%rcx), %xmm1 + pxor %xmm0, %xmm8 + pxor %xmm1, %xmm9 + movdqu %xmm8, (%rdx) + movdqu %xmm9, 16(%rdx) + aesenclast %xmm7, %xmm10 + aesenclast %xmm7, %xmm11 + movdqu 32(%rcx), %xmm0 + movdqu 48(%rcx), %xmm1 + pxor %xmm0, %xmm10 + pxor %xmm1, %xmm11 + movdqu %xmm10, 32(%rdx) + movdqu %xmm11, 48(%rdx) + aesenclast %xmm7, %xmm12 + aesenclast %xmm7, %xmm13 + movdqu 64(%rcx), %xmm0 + movdqu 80(%rcx), %xmm1 + pxor %xmm0, %xmm12 + pxor %xmm1, %xmm13 + movdqu %xmm12, 64(%rdx) + movdqu %xmm13, 80(%rdx) + aesenclast %xmm7, %xmm14 + aesenclast %xmm7, %xmm15 + movdqu 96(%rcx), %xmm0 + movdqu 112(%rcx), %xmm1 + pxor %xmm0, %xmm14 + pxor %xmm1, %xmm15 + movdqu %xmm14, 96(%rdx) + movdqu %xmm15, 112(%rdx) + addl $0x80, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_decrypt_update_aesni_ghash_128 + movdqa %xmm2, %xmm6 + movdqa (%rsp), %xmm5 +L_AES_GCM_decrypt_update_aesni_done_128: + movl %r8d, %edx + cmpl %edx, %r14d + jge L_AES_GCM_decrypt_update_aesni_done_dec + movl %r8d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %r14d + jge L_AES_GCM_decrypt_update_aesni_last_block_done +L_AES_GCM_decrypt_update_aesni_last_block_start: + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + movdqu (%rcx), %xmm1 + movdqa %xmm5, %xmm0 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm1 + pxor %xmm6, %xmm1 + movdqa (%r12), %xmm8 + movdqa %xmm8, %xmm9 + pshufb L_aes_gcm_bswap_epi64(%rip), %xmm8 + paddd L_aes_gcm_one(%rip), %xmm9 + pxor (%rdi), %xmm8 + movdqa %xmm9, (%r12) + movdqa %xmm1, %xmm10 + pclmulqdq $16, %xmm0, %xmm10 + aesenc 16(%rdi), %xmm8 + aesenc 32(%rdi), %xmm8 + movdqa %xmm1, %xmm11 + pclmulqdq $0x01, %xmm0, %xmm11 + aesenc 48(%rdi), %xmm8 + aesenc 64(%rdi), %xmm8 + movdqa %xmm1, %xmm12 + pclmulqdq $0x00, %xmm0, %xmm12 + aesenc 80(%rdi), %xmm8 + movdqa %xmm1, %xmm1 + pclmulqdq $0x11, %xmm0, %xmm1 + aesenc 96(%rdi), %xmm8 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm2 + psrldq $8, %xmm10 + pslldq $8, %xmm2 + aesenc 112(%rdi), %xmm8 + movdqa %xmm1, %xmm3 + pxor %xmm12, %xmm2 + pxor %xmm10, %xmm3 + movdqa L_aes_gcm_mod2_128(%rip), %xmm0 + movdqa %xmm2, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 128(%rdi), %xmm8 + pshufd $0x4e, %xmm2, %xmm10 + pxor %xmm11, %xmm10 + movdqa %xmm10, %xmm11 + pclmulqdq $16, %xmm0, %xmm11 + aesenc 144(%rdi), %xmm8 + pshufd $0x4e, %xmm10, %xmm6 + pxor %xmm11, %xmm6 + pxor %xmm3, %xmm6 + cmpl $11, %esi + movdqa 160(%rdi), %xmm9 + jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 176(%rdi), %xmm8 + cmpl $13, %esi + movdqa 192(%rdi), %xmm9 + jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last + aesenc %xmm9, %xmm8 + aesenc 208(%rdi), %xmm8 + movdqa 224(%rdi), %xmm9 +L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last: + aesenclast %xmm9, %xmm8 + movdqu (%rcx), %xmm9 + pxor %xmm9, %xmm8 + movdqu %xmm8, (%rdx) + addl $16, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_decrypt_update_aesni_last_block_start +L_AES_GCM_decrypt_update_aesni_last_block_done: +L_AES_GCM_decrypt_update_aesni_done_dec: + movdqa %xmm6, (%r9) + addq $0xa8, %rsp + popq %r15 + popq %r14 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_update_aesni,.-AES_GCM_decrypt_update_aesni +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_final_aesni +.type AES_GCM_decrypt_final_aesni,@function +.align 16 +AES_GCM_decrypt_final_aesni: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_final_aesni +.p2align 4 +_AES_GCM_decrypt_final_aesni: +#endif /* __APPLE__ */ + pushq %r13 + pushq %rbp + pushq %r12 + movq %rdx, %rax + movl %ecx, %r10d + movl %r8d, %r11d + movq 32(%rsp), %r8 + movq 40(%rsp), %rbp + subq $16, %rsp + movdqa (%rdi), %xmm6 + movdqa (%r9), %xmm5 + movdqa (%r8), %xmm15 + movdqa %xmm5, %xmm9 + movdqa %xmm5, %xmm8 + psrlq $63, %xmm9 + psllq $0x01, %xmm8 + pslldq $8, %xmm9 + por %xmm9, %xmm8 + pshufd $0xff, %xmm5, %xmm5 + psrad $31, %xmm5 + pand L_aes_gcm_mod2_128(%rip), %xmm5 + pxor %xmm8, %xmm5 + movl %r10d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + pinsrq $0x00, %rdx, %xmm0 + pinsrq $0x01, %rcx, %xmm0 + pxor %xmm0, %xmm6 + pshufd $0x4e, %xmm5, %xmm9 + pshufd $0x4e, %xmm6, %xmm10 + movdqa %xmm6, %xmm11 + movdqa %xmm6, %xmm8 + pclmulqdq $0x11, %xmm5, %xmm11 + pclmulqdq $0x00, %xmm5, %xmm8 + pxor %xmm5, %xmm9 + pxor %xmm6, %xmm10 + pclmulqdq $0x00, %xmm10, %xmm9 + pxor %xmm8, %xmm9 + pxor %xmm11, %xmm9 + movdqa %xmm9, %xmm10 + movdqa %xmm11, %xmm6 + pslldq $8, %xmm10 + psrldq $8, %xmm9 + pxor %xmm10, %xmm8 + pxor %xmm9, %xmm6 + movdqa %xmm8, %xmm12 + movdqa %xmm8, %xmm13 + movdqa %xmm8, %xmm14 + pslld $31, %xmm12 + pslld $30, %xmm13 + pslld $25, %xmm14 + pxor %xmm13, %xmm12 + pxor %xmm14, %xmm12 + movdqa %xmm12, %xmm13 + psrldq $4, %xmm13 + pslldq $12, %xmm12 + pxor %xmm12, %xmm8 + movdqa %xmm8, %xmm14 + movdqa %xmm8, %xmm10 + movdqa %xmm8, %xmm9 + psrld $0x01, %xmm14 + psrld $2, %xmm10 + psrld $7, %xmm9 + pxor %xmm10, %xmm14 + pxor %xmm9, %xmm14 + pxor %xmm13, %xmm14 + pxor %xmm8, %xmm14 + pxor %xmm14, %xmm6 + pshufb L_aes_gcm_bswap_mask(%rip), %xmm6 + movdqa %xmm15, %xmm0 + pxor %xmm6, %xmm0 + cmpl $16, %eax + je L_AES_GCM_decrypt_final_aesni_cmp_tag_16 + subq $16, %rsp + xorq %rcx, %rcx + xorq %r12, %r12 + movdqa %xmm0, (%rsp) +L_AES_GCM_decrypt_final_aesni_cmp_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + xorb (%rsi,%rcx,1), %r13b + orb %r13b, %r12b + incl %ecx + cmpl %eax, %ecx + jne L_AES_GCM_decrypt_final_aesni_cmp_tag_loop + cmpb $0x00, %r12b + sete %r12b + addq $16, %rsp + xorq %rcx, %rcx + jmp L_AES_GCM_decrypt_final_aesni_cmp_tag_done +L_AES_GCM_decrypt_final_aesni_cmp_tag_16: + movdqu (%rsi), %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %rdx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %r12d, %r12d + cmpl $0xffff, %edx + sete %r12b +L_AES_GCM_decrypt_final_aesni_cmp_tag_done: + movl %r12d, (%rbp) + addq $16, %rsp + popq %r12 + popq %rbp + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_final_aesni,.-AES_GCM_decrypt_final_aesni +#endif /* __APPLE__ */ +#endif /* WOLFSSL_AESGCM_STREAM */ #ifdef HAVE_INTEL_AVX1 #ifndef __APPLE__ .data @@ -3732,7 +6504,7 @@ L_AES_GCM_encrypt_avx1_calc_iv_lt16: subq $16, %rsp vpxor %xmm8, %xmm8, %xmm8 xorl %ebx, %ebx - vmovdqa %xmm8, (%rsp) + vmovdqu %xmm8, (%rsp) L_AES_GCM_encrypt_avx1_calc_iv_loop: movzbl (%rax,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -3740,7 +6512,7 @@ L_AES_GCM_encrypt_avx1_calc_iv_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_encrypt_avx1_calc_iv_loop - vmovdqa (%rsp), %xmm8 + vmovdqu (%rsp), %xmm8 addq $16, %rsp vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 vpxor %xmm8, %xmm4, %xmm4 @@ -3929,7 +6701,7 @@ L_AES_GCM_encrypt_avx1_calc_aad_lt16: subq $16, %rsp vpxor %xmm8, %xmm8, %xmm8 xorl %ebx, %ebx - vmovdqa %xmm8, (%rsp) + vmovdqu %xmm8, (%rsp) L_AES_GCM_encrypt_avx1_calc_aad_loop: movzbl (%r12,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -3937,7 +6709,7 @@ L_AES_GCM_encrypt_avx1_calc_aad_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_encrypt_avx1_calc_aad_loop - vmovdqa (%rsp), %xmm8 + vmovdqu (%rsp), %xmm8 addq $16, %rsp vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 vpxor %xmm8, %xmm6, %xmm6 @@ -5012,7 +7784,7 @@ L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last: vaesenclast %xmm9, %xmm4, %xmm4 subq $16, %rsp xorl %ecx, %ecx - vmovdqa %xmm4, (%rsp) + vmovdqu %xmm4, (%rsp) L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop: movzbl (%rdi,%rbx,1), %r13d xorb (%rsp,%rcx,1), %r13b @@ -5031,7 +7803,7 @@ L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop: cmpl $16, %ecx jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc: - vmovdqa (%rsp), %xmm4 + vmovdqu (%rsp), %xmm4 addq $16, %rsp vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 vpxor %xmm4, %xmm6, %xmm6 @@ -5109,7 +7881,7 @@ L_AES_GCM_encrypt_avx1_done_enc: cmpl $16, %r14d je L_AES_GCM_encrypt_avx1_store_tag_16 xorq %rcx, %rcx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_encrypt_avx1_store_tag_loop: movzbl (%rsp,%rcx,1), %r13d movb %r13b, (%r8,%rcx,1) @@ -5314,7 +8086,7 @@ L_AES_GCM_decrypt_avx1_calc_iv_lt16: subq $16, %rsp vpxor %xmm8, %xmm8, %xmm8 xorl %ebx, %ebx - vmovdqa %xmm8, (%rsp) + vmovdqu %xmm8, (%rsp) L_AES_GCM_decrypt_avx1_calc_iv_loop: movzbl (%rax,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -5322,7 +8094,7 @@ L_AES_GCM_decrypt_avx1_calc_iv_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_decrypt_avx1_calc_iv_loop - vmovdqa (%rsp), %xmm8 + vmovdqu (%rsp), %xmm8 addq $16, %rsp vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 vpxor %xmm8, %xmm4, %xmm4 @@ -5511,7 +8283,7 @@ L_AES_GCM_decrypt_avx1_calc_aad_lt16: subq $16, %rsp vpxor %xmm8, %xmm8, %xmm8 xorl %ebx, %ebx - vmovdqa %xmm8, (%rsp) + vmovdqu %xmm8, (%rsp) L_AES_GCM_decrypt_avx1_calc_aad_loop: movzbl (%r12,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -5519,7 +8291,7 @@ L_AES_GCM_decrypt_avx1_calc_aad_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_decrypt_avx1_calc_aad_loop - vmovdqa (%rsp), %xmm8 + vmovdqu (%rsp), %xmm8 addq $16, %rsp vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 vpxor %xmm8, %xmm6, %xmm6 @@ -6178,9 +8950,9 @@ L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last: vaesenclast %xmm9, %xmm4, %xmm4 subq $32, %rsp xorl %ecx, %ecx - vmovdqa %xmm4, (%rsp) + vmovdqu %xmm4, (%rsp) vpxor %xmm0, %xmm0, %xmm0 - vmovdqa %xmm0, 16(%rsp) + vmovdqu %xmm0, 16(%rsp) L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop: movzbl (%rdi,%rbx,1), %r13d movb %r13b, 16(%rsp,%rcx,1) @@ -6190,7 +8962,7 @@ L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop: incl %ecx cmpl %edx, %ebx jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop - vmovdqa 16(%rsp), %xmm4 + vmovdqu 16(%rsp), %xmm4 addq $32, %rsp vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 vpxor %xmm4, %xmm6, %xmm6 @@ -6270,7 +9042,7 @@ L_AES_GCM_decrypt_avx1_done_dec: subq $16, %rsp xorq %rcx, %rcx xorq %rbx, %rbx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_decrypt_avx1_cmp_tag_loop: movzbl (%rsp,%rcx,1), %r13d xorb (%r8,%rcx,1), %r13b @@ -6305,6 +9077,2345 @@ L_AES_GCM_decrypt_avx1_cmp_tag_done: #ifndef __APPLE__ .size AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1 #endif /* __APPLE__ */ +#ifdef WOLFSSL_AESGCM_STREAM +#ifndef __APPLE__ +.text +.globl AES_GCM_init_avx1 +.type AES_GCM_init_avx1,@function +.align 16 +AES_GCM_init_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_init_avx1 +.p2align 4 +_AES_GCM_init_avx1: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + movq %rdx, %r10 + movl %ecx, %r11d + movq 24(%rsp), %rax + subq $16, %rsp + vpxor %xmm4, %xmm4, %xmm4 + movl %r11d, %edx + cmpl $12, %edx + jne L_AES_GCM_init_avx1_iv_not_12 + # # Calculate values when IV is 12 bytes + # Set counter based on IV + movl $0x1000000, %ecx + vpinsrq $0x00, (%r10), %xmm4, %xmm4 + vpinsrd $2, 8(%r10), %xmm4, %xmm4 + vpinsrd $3, %ecx, %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa (%rdi), %xmm5 + vpxor %xmm5, %xmm4, %xmm1 + vmovdqa 16(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 32(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 48(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 64(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 80(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 96(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 112(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 128(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 144(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_init_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_init_avx1_calc_iv_12_last + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm5, %xmm5 + vaesenc %xmm7, %xmm1, %xmm1 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_init_avx1_calc_iv_12_last: + vaesenclast %xmm7, %xmm5, %xmm5 + vaesenclast %xmm7, %xmm1, %xmm1 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + vmovdqa %xmm1, %xmm15 + jmp L_AES_GCM_init_avx1_iv_done +L_AES_GCM_init_avx1_iv_not_12: + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%rdi), %xmm5 + vaesenc 16(%rdi), %xmm5, %xmm5 + vaesenc 32(%rdi), %xmm5, %xmm5 + vaesenc 48(%rdi), %xmm5, %xmm5 + vaesenc 64(%rdi), %xmm5, %xmm5 + vaesenc 80(%rdi), %xmm5, %xmm5 + vaesenc 96(%rdi), %xmm5, %xmm5 + vaesenc 112(%rdi), %xmm5, %xmm5 + vaesenc 128(%rdi), %xmm5, %xmm5 + vaesenc 144(%rdi), %xmm5, %xmm5 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm9 + jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm9, %xmm5, %xmm5 + vaesenc 176(%rdi), %xmm5, %xmm5 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm9 + jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last + vaesenc %xmm9, %xmm5, %xmm5 + vaesenc 208(%rdi), %xmm5, %xmm5 + vmovdqa 224(%rdi), %xmm9 +L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm9, %xmm5, %xmm5 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_init_avx1_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_init_avx1_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_init_avx1_calc_iv_16_loop: + vmovdqu (%r10,%rcx,1), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx1_calc_iv_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_init_avx1_calc_iv_done +L_AES_GCM_init_avx1_calc_iv_lt16: + subq $16, %rsp + vpxor %xmm8, %xmm8, %xmm8 + xorl %r13d, %r13d + vmovdqu %xmm8, (%rsp) +L_AES_GCM_init_avx1_calc_iv_loop: + movzbl (%r10,%rcx,1), %r12d + movb %r12b, (%rsp,%r13,1) + incl %ecx + incl %r13d + cmpl %edx, %ecx + jl L_AES_GCM_init_avx1_calc_iv_loop + vmovdqu (%rsp), %xmm8 + addq $16, %rsp + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 +L_AES_GCM_init_avx1_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm7 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm7, %xmm7 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm7, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm7, %xmm7 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm7, %xmm7 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm7, %xmm0 + vpslld $30, %xmm7, %xmm1 + vpslld $25, %xmm7, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm7, %xmm7 + vpsrld $0x01, %xmm7, %xmm2 + vpsrld $2, %xmm7, %xmm3 + vpsrld $7, %xmm7, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%rdi), %xmm8 + vpxor %xmm4, %xmm8, %xmm8 + vaesenc 16(%rdi), %xmm8, %xmm8 + vaesenc 32(%rdi), %xmm8, %xmm8 + vaesenc 48(%rdi), %xmm8, %xmm8 + vaesenc 64(%rdi), %xmm8, %xmm8 + vaesenc 80(%rdi), %xmm8, %xmm8 + vaesenc 96(%rdi), %xmm8, %xmm8 + vaesenc 112(%rdi), %xmm8, %xmm8 + vaesenc 128(%rdi), %xmm8, %xmm8 + vaesenc 144(%rdi), %xmm8, %xmm8 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm9 + jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%rdi), %xmm8, %xmm8 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm9 + jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%rdi), %xmm8, %xmm8 + vmovdqa 224(%rdi), %xmm9 +L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm8, %xmm15 +L_AES_GCM_init_avx1_iv_done: + vmovdqa %xmm15, (%rax) + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm4, %xmm4 + vmovdqa %xmm5, (%r8) + vmovdqa %xmm4, (%r9) + vzeroupper + addq $16, %rsp + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_GCM_init_avx1,.-AES_GCM_init_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_aad_update_avx1 +.type AES_GCM_aad_update_avx1,@function +.align 16 +AES_GCM_aad_update_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_aad_update_avx1 +.p2align 4 +_AES_GCM_aad_update_avx1: +#endif /* __APPLE__ */ + movq %rcx, %rax + vmovdqa (%rdx), %xmm5 + vmovdqa (%rax), %xmm6 + xorl %ecx, %ecx +L_AES_GCM_aad_update_avx1_16_loop: + vmovdqu (%rdi,%rcx,1), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm5, %xmm5 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm5, %xmm1 + vpshufd $0x4e, %xmm6, %xmm2 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm0 + vpxor %xmm5, %xmm1, %xmm1 + vpxor %xmm6, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm4 + vmovdqa %xmm3, %xmm5 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm5, %xmm5 + vpsrld $31, %xmm4, %xmm0 + vpsrld $31, %xmm5, %xmm1 + vpslld $0x01, %xmm4, %xmm4 + vpslld $0x01, %xmm5, %xmm5 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm5, %xmm5 + vpor %xmm0, %xmm4, %xmm4 + vpor %xmm1, %xmm5, %xmm5 + vpslld $31, %xmm4, %xmm0 + vpslld $30, %xmm4, %xmm1 + vpslld $25, %xmm4, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm2 + vpsrld $2, %xmm4, %xmm3 + vpsrld $7, %xmm4, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm2, %xmm5, %xmm5 + addl $16, %ecx + cmpl %esi, %ecx + jl L_AES_GCM_aad_update_avx1_16_loop + vmovdqa %xmm5, (%rdx) + vzeroupper + repz retq +#ifndef __APPLE__ +.size AES_GCM_aad_update_avx1,.-AES_GCM_aad_update_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_block_avx1 +.type AES_GCM_encrypt_block_avx1,@function +.align 16 +AES_GCM_encrypt_block_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_block_avx1 +.p2align 4 +_AES_GCM_encrypt_block_avx1: +#endif /* __APPLE__ */ + movq %rdx, %r10 + movq %rcx, %r11 + vmovdqa (%r8), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, (%r8) + vpxor (%rdi), %xmm8, %xmm8 + vaesenc 16(%rdi), %xmm8, %xmm8 + vaesenc 32(%rdi), %xmm8, %xmm8 + vaesenc 48(%rdi), %xmm8, %xmm8 + vaesenc 64(%rdi), %xmm8, %xmm8 + vaesenc 80(%rdi), %xmm8, %xmm8 + vaesenc 96(%rdi), %xmm8, %xmm8 + vaesenc 112(%rdi), %xmm8, %xmm8 + vaesenc 128(%rdi), %xmm8, %xmm8 + vaesenc 144(%rdi), %xmm8, %xmm8 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm9 + jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%rdi), %xmm8, %xmm8 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm9 + jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%rdi), %xmm8, %xmm8 + vmovdqa 224(%rdi), %xmm9 +L_AES_GCM_encrypt_block_avx1_aesenc_block_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqu (%r11), %xmm9 + vpxor %xmm9, %xmm8, %xmm8 + vmovdqu %xmm8, (%r10) + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vzeroupper + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_block_avx1,.-AES_GCM_encrypt_block_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_ghash_block_avx1 +.type AES_GCM_ghash_block_avx1,@function +.align 16 +AES_GCM_ghash_block_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_ghash_block_avx1 +.p2align 4 +_AES_GCM_ghash_block_avx1: +#endif /* __APPLE__ */ + vmovdqa (%rsi), %xmm4 + vmovdqa (%rdx), %xmm5 + vmovdqu (%rdi), %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm4, %xmm4 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm4, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpxor %xmm4, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm6 + vmovdqa %xmm3, %xmm4 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm6, %xmm6 + vpxor %xmm1, %xmm4, %xmm4 + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + vpslld $31, %xmm6, %xmm0 + vpslld $30, %xmm6, %xmm1 + vpslld $25, %xmm6, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + vpsrld $0x01, %xmm6, %xmm2 + vpsrld $2, %xmm6, %xmm3 + vpsrld $7, %xmm6, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm6, %xmm2, %xmm2 + vpxor %xmm2, %xmm4, %xmm4 + vmovdqa %xmm4, (%rsi) + vzeroupper + repz retq +#ifndef __APPLE__ +.size AES_GCM_ghash_block_avx1,.-AES_GCM_ghash_block_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_update_avx1 +.type AES_GCM_encrypt_update_avx1,@function +.align 16 +AES_GCM_encrypt_update_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_update_avx1 +.p2align 4 +_AES_GCM_encrypt_update_avx1: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r14 + movq %rdx, %r10 + movq %rcx, %r11 + movq 32(%rsp), %rax + movq 40(%rsp), %r12 + subq $0xa0, %rsp + vmovdqa (%r9), %xmm6 + vmovdqa (%rax), %xmm5 + vpsrlq $63, %xmm5, %xmm9 + vpsllq $0x01, %xmm5, %xmm8 + vpslldq $8, %xmm9, %xmm9 + vpor %xmm9, %xmm8, %xmm8 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm8, %xmm5, %xmm5 + xorl %r14d, %r14d + cmpl $0x80, %r8d + movl %r8d, %r13d + jl L_AES_GCM_encrypt_update_avx1_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm6, %xmm2 + # H ^ 1 + vmovdqa %xmm5, (%rsp) + # H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm8 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm0 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm0, %xmm0 + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm0, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm0, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm1 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm1, %xmm1 + vmovdqa %xmm1, 32(%rsp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm8 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm3, %xmm3 + vmovdqa %xmm3, 48(%rsp) + # H ^ 5 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm0, %xmm9 + vpshufd $0x4e, %xmm1, %xmm10 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm8 + vpxor %xmm0, %xmm9, %xmm9 + vpxor %xmm1, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 64(%rsp) + # H ^ 6 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm8 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 80(%rsp) + # H ^ 7 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm9 + vpshufd $0x4e, %xmm3, %xmm10 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm11 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm3, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 96(%rsp) + # H ^ 8 + vpclmulqdq $0x00, %xmm3, %xmm3, %xmm8 + vpclmulqdq $0x11, %xmm3, %xmm3, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 112(%rsp) + # First 128 bytes of input + vmovdqa (%r12), %xmm0 + vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm15, %xmm15 + vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0 + vmovdqa (%rdi), %xmm7 + vmovdqa %xmm0, (%r12) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 16(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 32(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 48(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 64(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 80(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 96(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 128(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 144(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done: + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vmovdqu (%r11), %xmm0 + vmovdqu 16(%r11), %xmm1 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vmovdqu %xmm8, (%r10) + vmovdqu %xmm9, 16(%r10) + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu 32(%r11), %xmm0 + vmovdqu 48(%r11), %xmm1 + vpxor %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm11, %xmm11 + vmovdqu %xmm10, 32(%r10) + vmovdqu %xmm11, 48(%r10) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vmovdqu 64(%r11), %xmm0 + vmovdqu 80(%r11), %xmm1 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vmovdqu %xmm12, 64(%r10) + vmovdqu %xmm13, 80(%r10) + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 96(%r11), %xmm0 + vmovdqu 112(%r11), %xmm1 + vpxor %xmm0, %xmm14, %xmm14 + vpxor %xmm1, %xmm15, %xmm15 + vmovdqu %xmm14, 96(%r10) + vmovdqu %xmm15, 112(%r10) + cmpl $0x80, %r13d + movl $0x80, %r14d + jle L_AES_GCM_encrypt_update_avx1_end_128 + # More 128 bytes of input +L_AES_GCM_encrypt_update_avx1_ghash_128: + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + vmovdqa (%r12), %xmm0 + vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm15, %xmm15 + vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0 + vmovdqa (%rdi), %xmm7 + vmovdqa %xmm0, (%r12) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rsp), %xmm7 + vmovdqu -128(%rdx), %xmm0 + vaesenc 16(%rdi), %xmm8, %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vaesenc 16(%rdi), %xmm9, %xmm9 + vaesenc 16(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vaesenc 16(%rdi), %xmm11, %xmm11 + vaesenc 16(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vaesenc 16(%rdi), %xmm13, %xmm13 + vaesenc 16(%rdi), %xmm14, %xmm14 + vaesenc 16(%rdi), %xmm15, %xmm15 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa 96(%rsp), %xmm7 + vmovdqu -112(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 32(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 32(%rdi), %xmm9, %xmm9 + vaesenc 32(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 32(%rdi), %xmm11, %xmm11 + vaesenc 32(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 32(%rdi), %xmm13, %xmm13 + vaesenc 32(%rdi), %xmm14, %xmm14 + vaesenc 32(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 80(%rsp), %xmm7 + vmovdqu -96(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 48(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 48(%rdi), %xmm9, %xmm9 + vaesenc 48(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 48(%rdi), %xmm11, %xmm11 + vaesenc 48(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 48(%rdi), %xmm13, %xmm13 + vaesenc 48(%rdi), %xmm14, %xmm14 + vaesenc 48(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 64(%rsp), %xmm7 + vmovdqu -80(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 64(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 64(%rdi), %xmm9, %xmm9 + vaesenc 64(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 64(%rdi), %xmm11, %xmm11 + vaesenc 64(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 64(%rdi), %xmm13, %xmm13 + vaesenc 64(%rdi), %xmm14, %xmm14 + vaesenc 64(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 48(%rsp), %xmm7 + vmovdqu -64(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 80(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 80(%rdi), %xmm9, %xmm9 + vaesenc 80(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 80(%rdi), %xmm11, %xmm11 + vaesenc 80(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 80(%rdi), %xmm13, %xmm13 + vaesenc 80(%rdi), %xmm14, %xmm14 + vaesenc 80(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 32(%rsp), %xmm7 + vmovdqu -48(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 96(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 96(%rdi), %xmm9, %xmm9 + vaesenc 96(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 96(%rdi), %xmm11, %xmm11 + vaesenc 96(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 96(%rdi), %xmm13, %xmm13 + vaesenc 96(%rdi), %xmm14, %xmm14 + vaesenc 96(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 16(%rsp), %xmm7 + vmovdqu -32(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 112(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 112(%rdi), %xmm9, %xmm9 + vaesenc 112(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 112(%rdi), %xmm11, %xmm11 + vaesenc 112(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 112(%rdi), %xmm13, %xmm13 + vaesenc 112(%rdi), %xmm14, %xmm14 + vaesenc 112(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa (%rsp), %xmm7 + vmovdqu -16(%rdx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 128(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 128(%rdi), %xmm9, %xmm9 + vaesenc 128(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 128(%rdi), %xmm11, %xmm11 + vaesenc 128(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 128(%rdi), %xmm13, %xmm13 + vaesenc 128(%rdi), %xmm14, %xmm14 + vaesenc 128(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vaesenc 144(%rdi), %xmm8, %xmm8 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vaesenc 144(%rdi), %xmm9, %xmm9 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vaesenc 144(%rdi), %xmm10, %xmm10 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vaesenc 144(%rdi), %xmm11, %xmm11 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vaesenc 144(%rdi), %xmm12, %xmm12 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vaesenc 144(%rdi), %xmm13, %xmm13 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vaesenc 144(%rdi), %xmm14, %xmm14 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vaesenc 144(%rdi), %xmm15, %xmm15 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done: + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu 32(%rcx), %xmm0 + vmovdqu 48(%rcx), %xmm1 + vpxor %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm11, %xmm11 + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 96(%rcx), %xmm0 + vmovdqu 112(%rcx), %xmm1 + vpxor %xmm0, %xmm14, %xmm14 + vpxor %xmm1, %xmm15, %xmm15 + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + addl $0x80, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_encrypt_update_avx1_ghash_128 +L_AES_GCM_encrypt_update_avx1_end_128: + vmovdqa L_avx1_aes_gcm_bswap_mask(%rip), %xmm4 + vpshufb %xmm4, %xmm8, %xmm8 + vpshufb %xmm4, %xmm9, %xmm9 + vpshufb %xmm4, %xmm10, %xmm10 + vpshufb %xmm4, %xmm11, %xmm11 + vpxor %xmm2, %xmm8, %xmm8 + vpshufb %xmm4, %xmm12, %xmm12 + vpshufb %xmm4, %xmm13, %xmm13 + vpshufb %xmm4, %xmm14, %xmm14 + vpshufb %xmm4, %xmm15, %xmm15 + vmovdqa (%rsp), %xmm7 + vmovdqa 16(%rsp), %xmm5 + # ghash_gfmul_avx + vpshufd $0x4e, %xmm15, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm15, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm15, %xmm7, %xmm0 + vpxor %xmm15, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa %xmm0, %xmm4 + vmovdqa %xmm3, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm14, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm14, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm14, %xmm5, %xmm0 + vpxor %xmm14, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa 32(%rsp), %xmm7 + vmovdqa 48(%rsp), %xmm5 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm13, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm13, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm13, %xmm7, %xmm0 + vpxor %xmm13, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm12, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm12, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm12, %xmm5, %xmm0 + vpxor %xmm12, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa 64(%rsp), %xmm7 + vmovdqa 80(%rsp), %xmm5 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm11, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm11, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm11, %xmm7, %xmm0 + vpxor %xmm11, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm10, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm10, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm10, %xmm5, %xmm0 + vpxor %xmm10, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa 96(%rsp), %xmm7 + vmovdqa 112(%rsp), %xmm5 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm9, %xmm1 + vpshufd $0x4e, %xmm7, %xmm2 + vpclmulqdq $0x11, %xmm9, %xmm7, %xmm3 + vpclmulqdq $0x00, %xmm9, %xmm7, %xmm0 + vpxor %xmm9, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + # ghash_gfmul_xor_avx + vpshufd $0x4e, %xmm8, %xmm1 + vpshufd $0x4e, %xmm5, %xmm2 + vpclmulqdq $0x11, %xmm8, %xmm5, %xmm3 + vpclmulqdq $0x00, %xmm8, %xmm5, %xmm0 + vpxor %xmm8, %xmm1, %xmm1 + vpxor %xmm5, %xmm2, %xmm2 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vpxor %xmm0, %xmm4, %xmm4 + vpxor %xmm3, %xmm6, %xmm6 + vpslldq $8, %xmm1, %xmm2 + vpsrldq $8, %xmm1, %xmm1 + vpxor %xmm2, %xmm4, %xmm4 + vpxor %xmm1, %xmm6, %xmm6 + vpslld $31, %xmm4, %xmm0 + vpslld $30, %xmm4, %xmm1 + vpslld $25, %xmm4, %xmm2 + vpxor %xmm1, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vmovdqa %xmm0, %xmm1 + vpsrldq $4, %xmm1, %xmm1 + vpslldq $12, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + vpsrld $0x01, %xmm4, %xmm2 + vpsrld $2, %xmm4, %xmm3 + vpsrld $7, %xmm4, %xmm0 + vpxor %xmm3, %xmm2, %xmm2 + vpxor %xmm0, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm4, %xmm2, %xmm2 + vpxor %xmm2, %xmm6, %xmm6 + vmovdqa (%rsp), %xmm5 +L_AES_GCM_encrypt_update_avx1_done_128: + movl %r8d, %edx + cmpl %edx, %r14d + jge L_AES_GCM_encrypt_update_avx1_done_enc + movl %r8d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %r14d + jge L_AES_GCM_encrypt_update_avx1_last_block_done + vmovdqa (%r12), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, (%r12) + vpxor (%rdi), %xmm8, %xmm8 + vaesenc 16(%rdi), %xmm8, %xmm8 + vaesenc 32(%rdi), %xmm8, %xmm8 + vaesenc 48(%rdi), %xmm8, %xmm8 + vaesenc 64(%rdi), %xmm8, %xmm8 + vaesenc 80(%rdi), %xmm8, %xmm8 + vaesenc 96(%rdi), %xmm8, %xmm8 + vaesenc 112(%rdi), %xmm8, %xmm8 + vaesenc 128(%rdi), %xmm8, %xmm8 + vaesenc 144(%rdi), %xmm8, %xmm8 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%rdi), %xmm8, %xmm8 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%rdi), %xmm8, %xmm8 + vmovdqa 224(%rdi), %xmm9 +L_AES_GCM_encrypt_update_avx1_aesenc_block_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqu (%r11,%r14,1), %xmm9 + vpxor %xmm9, %xmm8, %xmm8 + vmovdqu %xmm8, (%r10,%r14,1) + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + vpxor %xmm8, %xmm6, %xmm6 + addl $16, %r14d + cmpl %r13d, %r14d + jge L_AES_GCM_encrypt_update_avx1_last_block_ghash +L_AES_GCM_encrypt_update_avx1_last_block_start: + vmovdqu (%r11,%r14,1), %xmm13 + vmovdqa (%r12), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, (%r12) + vpxor (%rdi), %xmm8, %xmm8 + vpclmulqdq $16, %xmm5, %xmm6, %xmm10 + vaesenc 16(%rdi), %xmm8, %xmm8 + vaesenc 32(%rdi), %xmm8, %xmm8 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm11 + vaesenc 48(%rdi), %xmm8, %xmm8 + vaesenc 64(%rdi), %xmm8, %xmm8 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm12 + vaesenc 80(%rdi), %xmm8, %xmm8 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm1 + vaesenc 96(%rdi), %xmm8, %xmm8 + vpxor %xmm11, %xmm10, %xmm10 + vpslldq $8, %xmm10, %xmm2 + vpsrldq $8, %xmm10, %xmm10 + vaesenc 112(%rdi), %xmm8, %xmm8 + vpxor %xmm12, %xmm2, %xmm2 + vpxor %xmm10, %xmm1, %xmm3 + vmovdqa L_avx1_aes_gcm_mod2_128(%rip), %xmm0 + vpclmulqdq $16, %xmm0, %xmm2, %xmm11 + vaesenc 128(%rdi), %xmm8, %xmm8 + vpshufd $0x4e, %xmm2, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpclmulqdq $16, %xmm0, %xmm10, %xmm11 + vaesenc 144(%rdi), %xmm8, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpxor %xmm3, %xmm10, %xmm6 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%rdi), %xmm8, %xmm8 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm9 + jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%rdi), %xmm8, %xmm8 + vmovdqa 224(%rdi), %xmm9 +L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm13, %xmm0 + vpxor %xmm0, %xmm8, %xmm8 + vmovdqu %xmm8, (%r10,%r14,1) + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm8, %xmm8 + addl $16, %r14d + vpxor %xmm8, %xmm6, %xmm6 + cmpl %r13d, %r14d + jl L_AES_GCM_encrypt_update_avx1_last_block_start +L_AES_GCM_encrypt_update_avx1_last_block_ghash: + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 +L_AES_GCM_encrypt_update_avx1_last_block_done: +L_AES_GCM_encrypt_update_avx1_done_enc: + vmovdqa %xmm6, (%r9) + vzeroupper + addq $0xa0, %rsp + popq %r14 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_update_avx1,.-AES_GCM_encrypt_update_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_final_avx1 +.type AES_GCM_encrypt_final_avx1,@function +.align 16 +AES_GCM_encrypt_final_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_final_avx1 +.p2align 4 +_AES_GCM_encrypt_final_avx1: +#endif /* __APPLE__ */ + pushq %r13 + movq %rdx, %rax + movl %ecx, %r10d + movl %r8d, %r11d + movq 16(%rsp), %r8 + subq $16, %rsp + vmovdqa (%rdi), %xmm4 + vmovdqa (%r9), %xmm5 + vmovdqa (%r8), %xmm6 + vpsrlq $63, %xmm5, %xmm9 + vpsllq $0x01, %xmm5, %xmm8 + vpslldq $8, %xmm9, %xmm9 + vpor %xmm9, %xmm8, %xmm8 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm8, %xmm5, %xmm5 + movl %r10d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpinsrq $0x01, %rcx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm4, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm4, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm4, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm4 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm4, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + vpxor %xmm6, %xmm4, %xmm0 + cmpl $16, %eax + je L_AES_GCM_encrypt_final_avx1_store_tag_16 + xorq %rcx, %rcx + vmovdqu %xmm0, (%rsp) +L_AES_GCM_encrypt_final_avx1_store_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + movb %r13b, (%rsi,%rcx,1) + incl %ecx + cmpl %eax, %ecx + jne L_AES_GCM_encrypt_final_avx1_store_tag_loop + jmp L_AES_GCM_encrypt_final_avx1_store_tag_done +L_AES_GCM_encrypt_final_avx1_store_tag_16: + vmovdqu %xmm0, (%rsi) +L_AES_GCM_encrypt_final_avx1_store_tag_done: + vzeroupper + addq $16, %rsp + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_final_avx1,.-AES_GCM_encrypt_final_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_update_avx1 +.type AES_GCM_decrypt_update_avx1,@function +.align 16 +AES_GCM_decrypt_update_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_update_avx1 +.p2align 4 +_AES_GCM_decrypt_update_avx1: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r14 + movq %rdx, %r10 + movq %rcx, %r11 + movq 32(%rsp), %rax + movq 40(%rsp), %r12 + subq $0xa8, %rsp + vmovdqa (%r9), %xmm6 + vmovdqa (%rax), %xmm5 + vpsrlq $63, %xmm5, %xmm9 + vpsllq $0x01, %xmm5, %xmm8 + vpslldq $8, %xmm9, %xmm9 + vpor %xmm9, %xmm8, %xmm8 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm8, %xmm5, %xmm5 + xorl %r14d, %r14d + cmpl $0x80, %r8d + movl %r8d, %r13d + jl L_AES_GCM_decrypt_update_avx1_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm6, %xmm2 + # H ^ 1 + vmovdqa %xmm5, (%rsp) + # H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm8 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm0 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm0, %xmm0 + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm0, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm0, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm1 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm1, %xmm1 + vmovdqa %xmm1, 32(%rsp) + # H ^ 4 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm8 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm3 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm3, %xmm3 + vmovdqa %xmm3, 48(%rsp) + # H ^ 5 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm0, %xmm9 + vpshufd $0x4e, %xmm1, %xmm10 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm8 + vpxor %xmm0, %xmm9, %xmm9 + vpxor %xmm1, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 64(%rsp) + # H ^ 6 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm8 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 80(%rsp) + # H ^ 7 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm1, %xmm9 + vpshufd $0x4e, %xmm3, %xmm10 + vpclmulqdq $0x11, %xmm1, %xmm3, %xmm11 + vpclmulqdq $0x00, %xmm1, %xmm3, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm3, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 96(%rsp) + # H ^ 8 + vpclmulqdq $0x00, %xmm3, %xmm3, %xmm8 + vpclmulqdq $0x11, %xmm3, %xmm3, %xmm7 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm7, %xmm7 + vmovdqa %xmm7, 112(%rsp) +L_AES_GCM_decrypt_update_avx1_ghash_128: + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + vmovdqa (%r12), %xmm0 + vmovdqa L_avx1_aes_gcm_bswap_epi64(%rip), %xmm1 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx1_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx1_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx1_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx1_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx1_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx1_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm15, %xmm15 + vpaddd L_avx1_aes_gcm_eight(%rip), %xmm0, %xmm0 + vmovdqa (%rdi), %xmm7 + vmovdqa %xmm0, (%r12) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rsp), %xmm7 + vmovdqu (%rcx), %xmm0 + vaesenc 16(%rdi), %xmm8, %xmm8 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufd $0x4e, %xmm7, %xmm1 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm3 + vaesenc 16(%rdi), %xmm9, %xmm9 + vaesenc 16(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm2 + vaesenc 16(%rdi), %xmm11, %xmm11 + vaesenc 16(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm1, %xmm1 + vaesenc 16(%rdi), %xmm13, %xmm13 + vaesenc 16(%rdi), %xmm14, %xmm14 + vaesenc 16(%rdi), %xmm15, %xmm15 + vpxor %xmm2, %xmm1, %xmm1 + vpxor %xmm3, %xmm1, %xmm1 + vmovdqa 96(%rsp), %xmm7 + vmovdqu 16(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 32(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 32(%rdi), %xmm9, %xmm9 + vaesenc 32(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 32(%rdi), %xmm11, %xmm11 + vaesenc 32(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 32(%rdi), %xmm13, %xmm13 + vaesenc 32(%rdi), %xmm14, %xmm14 + vaesenc 32(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 80(%rsp), %xmm7 + vmovdqu 32(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 48(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 48(%rdi), %xmm9, %xmm9 + vaesenc 48(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 48(%rdi), %xmm11, %xmm11 + vaesenc 48(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 48(%rdi), %xmm13, %xmm13 + vaesenc 48(%rdi), %xmm14, %xmm14 + vaesenc 48(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 64(%rsp), %xmm7 + vmovdqu 48(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 64(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 64(%rdi), %xmm9, %xmm9 + vaesenc 64(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 64(%rdi), %xmm11, %xmm11 + vaesenc 64(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 64(%rdi), %xmm13, %xmm13 + vaesenc 64(%rdi), %xmm14, %xmm14 + vaesenc 64(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 48(%rsp), %xmm7 + vmovdqu 64(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 80(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 80(%rdi), %xmm9, %xmm9 + vaesenc 80(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 80(%rdi), %xmm11, %xmm11 + vaesenc 80(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 80(%rdi), %xmm13, %xmm13 + vaesenc 80(%rdi), %xmm14, %xmm14 + vaesenc 80(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 32(%rsp), %xmm7 + vmovdqu 80(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 96(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 96(%rdi), %xmm9, %xmm9 + vaesenc 96(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 96(%rdi), %xmm11, %xmm11 + vaesenc 96(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 96(%rdi), %xmm13, %xmm13 + vaesenc 96(%rdi), %xmm14, %xmm14 + vaesenc 96(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa 16(%rsp), %xmm7 + vmovdqu 96(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 112(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 112(%rdi), %xmm9, %xmm9 + vaesenc 112(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 112(%rdi), %xmm11, %xmm11 + vaesenc 112(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 112(%rdi), %xmm13, %xmm13 + vaesenc 112(%rdi), %xmm14, %xmm14 + vaesenc 112(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vmovdqa (%rsp), %xmm7 + vmovdqu 112(%rcx), %xmm0 + vpshufd $0x4e, %xmm7, %xmm4 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vaesenc 128(%rdi), %xmm8, %xmm8 + vpxor %xmm7, %xmm4, %xmm4 + vpshufd $0x4e, %xmm0, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vpclmulqdq $0x11, %xmm7, %xmm0, %xmm6 + vaesenc 128(%rdi), %xmm9, %xmm9 + vaesenc 128(%rdi), %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm7, %xmm0, %xmm7 + vaesenc 128(%rdi), %xmm11, %xmm11 + vaesenc 128(%rdi), %xmm12, %xmm12 + vpclmulqdq $0x00, %xmm5, %xmm4, %xmm4 + vaesenc 128(%rdi), %xmm13, %xmm13 + vaesenc 128(%rdi), %xmm14, %xmm14 + vaesenc 128(%rdi), %xmm15, %xmm15 + vpxor %xmm7, %xmm1, %xmm1 + vpxor %xmm7, %xmm2, %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpxor %xmm6, %xmm3, %xmm3 + vpxor %xmm4, %xmm1, %xmm1 + vpslldq $8, %xmm1, %xmm5 + vpsrldq $8, %xmm1, %xmm1 + vaesenc 144(%rdi), %xmm8, %xmm8 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm1, %xmm3, %xmm3 + vaesenc 144(%rdi), %xmm9, %xmm9 + vpslld $31, %xmm2, %xmm7 + vpslld $30, %xmm2, %xmm4 + vpslld $25, %xmm2, %xmm5 + vaesenc 144(%rdi), %xmm10, %xmm10 + vpxor %xmm4, %xmm7, %xmm7 + vpxor %xmm5, %xmm7, %xmm7 + vaesenc 144(%rdi), %xmm11, %xmm11 + vpsrldq $4, %xmm7, %xmm4 + vpslldq $12, %xmm7, %xmm7 + vaesenc 144(%rdi), %xmm12, %xmm12 + vpxor %xmm7, %xmm2, %xmm2 + vpsrld $0x01, %xmm2, %xmm5 + vaesenc 144(%rdi), %xmm13, %xmm13 + vpsrld $2, %xmm2, %xmm1 + vpsrld $7, %xmm2, %xmm0 + vaesenc 144(%rdi), %xmm14, %xmm14 + vpxor %xmm1, %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + vaesenc 144(%rdi), %xmm15, %xmm15 + vpxor %xmm4, %xmm5, %xmm5 + vpxor %xmm5, %xmm2, %xmm2 + vpxor %xmm3, %xmm2, %xmm2 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done: + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu 32(%rcx), %xmm0 + vmovdqu 48(%rcx), %xmm1 + vpxor %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm11, %xmm11 + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 96(%rcx), %xmm0 + vmovdqu 112(%rcx), %xmm1 + vpxor %xmm0, %xmm14, %xmm14 + vpxor %xmm1, %xmm15, %xmm15 + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + addl $0x80, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_decrypt_update_avx1_ghash_128 + vmovdqa %xmm2, %xmm6 + vmovdqa (%rsp), %xmm5 +L_AES_GCM_decrypt_update_avx1_done_128: + movl %r8d, %edx + cmpl %edx, %r14d + jge L_AES_GCM_decrypt_update_avx1_done_dec + movl %r8d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %r14d + jge L_AES_GCM_decrypt_update_avx1_last_block_done +L_AES_GCM_decrypt_update_avx1_last_block_start: + vmovdqu (%r11,%r14,1), %xmm13 + vmovdqa %xmm5, %xmm0 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm13, %xmm1 + vpxor %xmm6, %xmm1, %xmm1 + vmovdqa (%r12), %xmm9 + vpshufb L_avx1_aes_gcm_bswap_epi64(%rip), %xmm9, %xmm8 + vpaddd L_avx1_aes_gcm_one(%rip), %xmm9, %xmm9 + vmovdqa %xmm9, (%r12) + vpxor (%rdi), %xmm8, %xmm8 + vpclmulqdq $16, %xmm0, %xmm1, %xmm10 + vaesenc 16(%rdi), %xmm8, %xmm8 + vaesenc 32(%rdi), %xmm8, %xmm8 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm11 + vaesenc 48(%rdi), %xmm8, %xmm8 + vaesenc 64(%rdi), %xmm8, %xmm8 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm12 + vaesenc 80(%rdi), %xmm8, %xmm8 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vaesenc 96(%rdi), %xmm8, %xmm8 + vpxor %xmm11, %xmm10, %xmm10 + vpslldq $8, %xmm10, %xmm2 + vpsrldq $8, %xmm10, %xmm10 + vaesenc 112(%rdi), %xmm8, %xmm8 + vpxor %xmm12, %xmm2, %xmm2 + vpxor %xmm10, %xmm1, %xmm3 + vmovdqa L_avx1_aes_gcm_mod2_128(%rip), %xmm0 + vpclmulqdq $16, %xmm0, %xmm2, %xmm11 + vaesenc 128(%rdi), %xmm8, %xmm8 + vpshufd $0x4e, %xmm2, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpclmulqdq $16, %xmm0, %xmm10, %xmm11 + vaesenc 144(%rdi), %xmm8, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpxor %xmm11, %xmm10, %xmm10 + vpxor %xmm3, %xmm10, %xmm6 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm9 + jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 176(%rdi), %xmm8, %xmm8 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm9 + jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last + vaesenc %xmm9, %xmm8, %xmm8 + vaesenc 208(%rdi), %xmm8, %xmm8 + vmovdqa 224(%rdi), %xmm9 +L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last: + vaesenclast %xmm9, %xmm8, %xmm8 + vmovdqa %xmm13, %xmm0 + vpxor %xmm0, %xmm8, %xmm8 + vmovdqu %xmm8, (%r10,%r14,1) + addl $16, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_decrypt_update_avx1_last_block_start +L_AES_GCM_decrypt_update_avx1_last_block_done: +L_AES_GCM_decrypt_update_avx1_done_dec: + vmovdqa %xmm6, (%r9) + vzeroupper + addq $0xa8, %rsp + popq %r14 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_update_avx1,.-AES_GCM_decrypt_update_avx1 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_final_avx1 +.type AES_GCM_decrypt_final_avx1,@function +.align 16 +AES_GCM_decrypt_final_avx1: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_final_avx1 +.p2align 4 +_AES_GCM_decrypt_final_avx1: +#endif /* __APPLE__ */ + pushq %r13 + pushq %rbp + pushq %r12 + movq %rdx, %rax + movl %ecx, %r10d + movl %r8d, %r11d + movq 32(%rsp), %r8 + movq 40(%rsp), %rbp + subq $16, %rsp + vmovdqa (%rdi), %xmm6 + vmovdqa (%r9), %xmm5 + vmovdqa (%r8), %xmm15 + vpsrlq $63, %xmm5, %xmm9 + vpsllq $0x01, %xmm5, %xmm8 + vpslldq $8, %xmm9, %xmm9 + vpor %xmm9, %xmm8, %xmm8 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx1_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm8, %xmm5, %xmm5 + movl %r10d, %edx + movl %r11d, %ecx + shlq $3, %rdx + shlq $3, %rcx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpinsrq $0x01, %rcx, %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + # ghash_gfmul_red_avx + vpshufd $0x4e, %xmm5, %xmm9 + vpshufd $0x4e, %xmm6, %xmm10 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm11 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm5, %xmm9, %xmm9 + vpxor %xmm6, %xmm10, %xmm10 + vpclmulqdq $0x00, %xmm10, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm11, %xmm9, %xmm9 + vpslldq $8, %xmm9, %xmm10 + vpsrldq $8, %xmm9, %xmm9 + vpxor %xmm10, %xmm8, %xmm8 + vpxor %xmm9, %xmm11, %xmm6 + vpslld $31, %xmm8, %xmm12 + vpslld $30, %xmm8, %xmm13 + vpslld $25, %xmm8, %xmm14 + vpxor %xmm13, %xmm12, %xmm12 + vpxor %xmm14, %xmm12, %xmm12 + vpsrldq $4, %xmm12, %xmm13 + vpslldq $12, %xmm12, %xmm12 + vpxor %xmm12, %xmm8, %xmm8 + vpsrld $0x01, %xmm8, %xmm14 + vpsrld $2, %xmm8, %xmm10 + vpsrld $7, %xmm8, %xmm9 + vpxor %xmm10, %xmm14, %xmm14 + vpxor %xmm9, %xmm14, %xmm14 + vpxor %xmm13, %xmm14, %xmm14 + vpxor %xmm8, %xmm14, %xmm14 + vpxor %xmm14, %xmm6, %xmm6 + vpshufb L_avx1_aes_gcm_bswap_mask(%rip), %xmm6, %xmm6 + vpxor %xmm15, %xmm6, %xmm0 + cmpl $16, %eax + je L_AES_GCM_decrypt_final_avx1_cmp_tag_16 + subq $16, %rsp + xorq %rcx, %rcx + xorq %r12, %r12 + vmovdqu %xmm0, (%rsp) +L_AES_GCM_decrypt_final_avx1_cmp_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + xorb (%rsi,%rcx,1), %r13b + orb %r13b, %r12b + incl %ecx + cmpl %eax, %ecx + jne L_AES_GCM_decrypt_final_avx1_cmp_tag_loop + cmpb $0x00, %r12b + sete %r12b + addq $16, %rsp + xorq %rcx, %rcx + jmp L_AES_GCM_decrypt_final_avx1_cmp_tag_done +L_AES_GCM_decrypt_final_avx1_cmp_tag_16: + vmovdqu (%rsi), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %rdx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %r12d, %r12d + cmpl $0xffff, %edx + sete %r12b +L_AES_GCM_decrypt_final_avx1_cmp_tag_done: + movl %r12d, (%rbp) + vzeroupper + addq $16, %rsp + popq %r12 + popq %rbp + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_final_avx1,.-AES_GCM_decrypt_final_avx1 +#endif /* __APPLE__ */ +#endif /* WOLFSSL_AESGCM_STREAM */ #endif /* HAVE_INTEL_AVX1 */ #ifdef HAVE_INTEL_AVX2 #ifndef __APPLE__ @@ -6561,7 +11672,7 @@ L_AES_GCM_encrypt_avx2_calc_iv_16_loop: L_AES_GCM_encrypt_avx2_calc_iv_lt16: vpxor %xmm0, %xmm0, %xmm0 xorl %ebx, %ebx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_encrypt_avx2_calc_iv_loop: movzbl (%rax,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -6569,7 +11680,7 @@ L_AES_GCM_encrypt_avx2_calc_iv_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_encrypt_avx2_calc_iv_loop - vmovdqa (%rsp), %xmm0 + vmovdqu (%rsp), %xmm0 vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 vpxor %xmm0, %xmm4, %xmm4 # ghash_gfmul_avx @@ -6773,7 +11884,7 @@ L_AES_GCM_encrypt_avx2_calc_aad_16_loop: L_AES_GCM_encrypt_avx2_calc_aad_lt16: vpxor %xmm0, %xmm0, %xmm0 xorl %ebx, %ebx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_encrypt_avx2_calc_aad_loop: movzbl (%r12,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -6781,7 +11892,7 @@ L_AES_GCM_encrypt_avx2_calc_aad_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_encrypt_avx2_calc_aad_loop - vmovdqa (%rsp), %xmm0 + vmovdqu (%rsp), %xmm0 vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 vpxor %xmm0, %xmm6, %xmm6 # ghash_gfmul_avx @@ -7674,8 +12785,8 @@ L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last: vaesenclast %xmm0, %xmm4, %xmm4 xorl %ecx, %ecx vpxor %xmm0, %xmm0, %xmm0 - vmovdqa %xmm4, (%rsp) - vmovdqa %xmm0, 16(%rsp) + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm0, 16(%rsp) L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop: movzbl (%rdi,%rbx,1), %r13d xorb (%rsp,%rcx,1), %r13b @@ -7686,7 +12797,7 @@ L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop: cmpl %edx, %ebx jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc: - vmovdqa 16(%rsp), %xmm4 + vmovdqu 16(%rsp), %xmm4 vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 vpxor %xmm4, %xmm6, %xmm6 # ghash_gfmul_red @@ -7737,7 +12848,7 @@ L_AES_GCM_encrypt_avx2_done_enc: cmpl $16, %r14d je L_AES_GCM_encrypt_avx2_store_tag_16 xorq %rcx, %rcx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_encrypt_avx2_store_tag_loop: movzbl (%rsp,%rcx,1), %r13d movb %r13b, (%r15,%rcx,1) @@ -7871,7 +12982,7 @@ L_AES_GCM_decrypt_avx2_calc_iv_16_loop: L_AES_GCM_decrypt_avx2_calc_iv_lt16: vpxor %xmm0, %xmm0, %xmm0 xorl %ebx, %ebx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_decrypt_avx2_calc_iv_loop: movzbl (%rax,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -7879,7 +12990,7 @@ L_AES_GCM_decrypt_avx2_calc_iv_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_decrypt_avx2_calc_iv_loop - vmovdqa (%rsp), %xmm0 + vmovdqu (%rsp), %xmm0 vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 vpxor %xmm0, %xmm4, %xmm4 # ghash_gfmul_avx @@ -8083,7 +13194,7 @@ L_AES_GCM_decrypt_avx2_calc_aad_16_loop: L_AES_GCM_decrypt_avx2_calc_aad_lt16: vpxor %xmm0, %xmm0, %xmm0 xorl %ebx, %ebx - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_decrypt_avx2_calc_aad_loop: movzbl (%r12,%rcx,1), %r13d movb %r13b, (%rsp,%rbx,1) @@ -8091,7 +13202,7 @@ L_AES_GCM_decrypt_avx2_calc_aad_loop: incl %ebx cmpl %edx, %ecx jl L_AES_GCM_decrypt_avx2_calc_aad_loop - vmovdqa (%rsp), %xmm0 + vmovdqu (%rsp), %xmm0 vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 vpxor %xmm0, %xmm6, %xmm6 # ghash_gfmul_avx @@ -8634,8 +13745,8 @@ L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last: vaesenclast %xmm1, %xmm4, %xmm4 xorl %ecx, %ecx vpxor %xmm0, %xmm0, %xmm0 - vmovdqa %xmm4, (%rsp) - vmovdqa %xmm0, 16(%rsp) + vmovdqu %xmm4, (%rsp) + vmovdqu %xmm0, 16(%rsp) L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop: movzbl (%rdi,%rbx,1), %r13d movb %r13b, 16(%rsp,%rcx,1) @@ -8645,7 +13756,7 @@ L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop: incl %ecx cmpl %edx, %ebx jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop - vmovdqa 16(%rsp), %xmm4 + vmovdqu 16(%rsp), %xmm4 vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 vpxor %xmm4, %xmm6, %xmm6 # ghash_gfmul_red @@ -8697,7 +13808,7 @@ L_AES_GCM_decrypt_avx2_done_dec: je L_AES_GCM_decrypt_avx2_cmp_tag_16 xorq %rdx, %rdx xorq %rax, %rax - vmovdqa %xmm0, (%rsp) + vmovdqu %xmm0, (%rsp) L_AES_GCM_decrypt_avx2_cmp_tag_loop: movzbl (%rsp,%rdx,1), %r13d xorb (%r14,%rdx,1), %r13b @@ -8730,6 +13841,1993 @@ L_AES_GCM_decrypt_avx2_cmp_tag_done: #ifndef __APPLE__ .size AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2 #endif /* __APPLE__ */ +#ifdef WOLFSSL_AESGCM_STREAM +#ifndef __APPLE__ +.text +.globl AES_GCM_init_avx2 +.type AES_GCM_init_avx2,@function +.align 16 +AES_GCM_init_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_init_avx2 +.p2align 4 +_AES_GCM_init_avx2: +#endif /* __APPLE__ */ + pushq %rbx + pushq %r12 + movq %rdx, %r10 + movl %ecx, %r11d + movq 24(%rsp), %rax + subq $16, %rsp + vpxor %xmm4, %xmm4, %xmm4 + movl %r11d, %edx + cmpl $12, %edx + je L_AES_GCM_init_avx2_iv_12 + # Calculate values when IV is not 12 bytes + # H = Encrypt X(=0) + vmovdqa (%rdi), %xmm5 + vaesenc 16(%rdi), %xmm5, %xmm5 + vaesenc 32(%rdi), %xmm5, %xmm5 + vaesenc 48(%rdi), %xmm5, %xmm5 + vaesenc 64(%rdi), %xmm5, %xmm5 + vaesenc 80(%rdi), %xmm5, %xmm5 + vaesenc 96(%rdi), %xmm5, %xmm5 + vaesenc 112(%rdi), %xmm5, %xmm5 + vaesenc 128(%rdi), %xmm5, %xmm5 + vaesenc 144(%rdi), %xmm5, %xmm5 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 176(%rdi), %xmm5, %xmm5 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc 208(%rdi), %xmm5, %xmm5 + vmovdqa 224(%rdi), %xmm0 +L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 + # Calc counter + # Initialization vector + cmpl $0x00, %edx + movq $0x00, %rcx + je L_AES_GCM_init_avx2_calc_iv_done + cmpl $16, %edx + jl L_AES_GCM_init_avx2_calc_iv_lt16 + andl $0xfffffff0, %edx +L_AES_GCM_init_avx2_calc_iv_16_loop: + vmovdqu (%r10,%rcx,1), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx2_calc_iv_16_loop + movl %r11d, %edx + cmpl %edx, %ecx + je L_AES_GCM_init_avx2_calc_iv_done +L_AES_GCM_init_avx2_calc_iv_lt16: + vpxor %xmm0, %xmm0, %xmm0 + xorl %ebx, %ebx + vmovdqu %xmm0, (%rsp) +L_AES_GCM_init_avx2_calc_iv_loop: + movzbl (%r10,%rcx,1), %r12d + movb %r12b, (%rsp,%rbx,1) + incl %ecx + incl %ebx + cmpl %edx, %ecx + jl L_AES_GCM_init_avx2_calc_iv_loop + vmovdqu (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 +L_AES_GCM_init_avx2_calc_iv_done: + # T = Encrypt counter + vpxor %xmm0, %xmm0, %xmm0 + shll $3, %edx + vpinsrq $0x00, %rdx, %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm4, %xmm4 + # Encrypt counter + vmovdqa (%rdi), %xmm7 + vpxor %xmm4, %xmm7, %xmm7 + vaesenc 16(%rdi), %xmm7, %xmm7 + vaesenc 32(%rdi), %xmm7, %xmm7 + vaesenc 48(%rdi), %xmm7, %xmm7 + vaesenc 64(%rdi), %xmm7, %xmm7 + vaesenc 80(%rdi), %xmm7, %xmm7 + vaesenc 96(%rdi), %xmm7, %xmm7 + vaesenc 112(%rdi), %xmm7, %xmm7 + vaesenc 128(%rdi), %xmm7, %xmm7 + vaesenc 144(%rdi), %xmm7, %xmm7 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 176(%rdi), %xmm7, %xmm7 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last + vaesenc %xmm0, %xmm7, %xmm7 + vaesenc 208(%rdi), %xmm7, %xmm7 + vmovdqa 224(%rdi), %xmm0 +L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last: + vaesenclast %xmm0, %xmm7, %xmm7 + jmp L_AES_GCM_init_avx2_iv_done +L_AES_GCM_init_avx2_iv_12: + # # Calculate values when IV is 12 bytes + # Set counter based on IV + vmovdqa L_avx2_aes_gcm_bswap_one(%rip), %xmm4 + vmovdqa (%rdi), %xmm5 + vpblendd $7, (%r10), %xmm4, %xmm4 + # H = Encrypt X(=0) and T = Encrypt counter + vmovdqa 16(%rdi), %xmm6 + vpxor %xmm5, %xmm4, %xmm7 + vaesenc %xmm6, %xmm5, %xmm5 + vaesenc %xmm6, %xmm7, %xmm7 + vmovdqa 32(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 48(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 64(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 80(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 96(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 112(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 128(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 144(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 176(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm0 + jl L_AES_GCM_init_avx2_calc_iv_12_last + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 208(%rdi), %xmm0 + vaesenc %xmm0, %xmm5, %xmm5 + vaesenc %xmm0, %xmm7, %xmm7 + vmovdqa 224(%rdi), %xmm0 +L_AES_GCM_init_avx2_calc_iv_12_last: + vaesenclast %xmm0, %xmm5, %xmm5 + vaesenclast %xmm0, %xmm7, %xmm7 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm5, %xmm5 +L_AES_GCM_init_avx2_iv_done: + vmovdqa %xmm7, (%rax) + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm4 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + vmovdqa %xmm5, (%r8) + vmovdqa %xmm4, (%r9) + vzeroupper + addq $16, %rsp + popq %r12 + popq %rbx + repz retq +#ifndef __APPLE__ +.size AES_GCM_init_avx2,.-AES_GCM_init_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_aad_update_avx2 +.type AES_GCM_aad_update_avx2,@function +.align 16 +AES_GCM_aad_update_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_aad_update_avx2 +.p2align 4 +_AES_GCM_aad_update_avx2: +#endif /* __APPLE__ */ + movq %rcx, %rax + vmovdqa (%rdx), %xmm4 + vmovdqa (%rax), %xmm5 + xorl %ecx, %ecx +L_AES_GCM_aad_update_avx2_16_loop: + vmovdqu (%rdi,%rcx,1), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + addl $16, %ecx + cmpl %esi, %ecx + jl L_AES_GCM_aad_update_avx2_16_loop + vmovdqa %xmm4, (%rdx) + vzeroupper + repz retq +#ifndef __APPLE__ +.size AES_GCM_aad_update_avx2,.-AES_GCM_aad_update_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_block_avx2 +.type AES_GCM_encrypt_block_avx2,@function +.align 16 +AES_GCM_encrypt_block_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_block_avx2 +.p2align 4 +_AES_GCM_encrypt_block_avx2: +#endif /* __APPLE__ */ + movq %rdx, %r10 + movq %rcx, %r11 + subq $0x98, %rsp + vmovdqa (%r8), %xmm3 + # aesenc_block + vmovdqa %xmm3, %xmm1 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1, %xmm0 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm1, %xmm1 + vpxor (%rdi), %xmm0, %xmm0 + vmovdqa 16(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 32(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 48(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 64(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 80(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 96(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 112(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 128(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 144(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa %xmm1, %xmm3 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm1 + jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last + vaesenc %xmm1, %xmm0, %xmm0 + vmovdqa 176(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm1 + jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last + vaesenc %xmm1, %xmm0, %xmm0 + vmovdqa 208(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 224(%rdi), %xmm1 +L_AES_GCM_encrypt_block_avx2_aesenc_block_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%r11), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%r10) + vmovdqa %xmm3, (%r8) + vzeroupper + addq $0x98, %rsp + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_block_avx2,.-AES_GCM_encrypt_block_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_ghash_block_avx2 +.type AES_GCM_ghash_block_avx2,@function +.align 16 +AES_GCM_ghash_block_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_ghash_block_avx2 +.p2align 4 +_AES_GCM_ghash_block_avx2: +#endif /* __APPLE__ */ + vmovdqa (%rsi), %xmm4 + vmovdqa (%rdx), %xmm5 + vmovdqu (%rdi), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm4, %xmm4 + # ghash_gfmul_avx + vpclmulqdq $16, %xmm4, %xmm5, %xmm2 + vpclmulqdq $0x01, %xmm4, %xmm5, %xmm1 + vpclmulqdq $0x00, %xmm4, %xmm5, %xmm0 + vpclmulqdq $0x11, %xmm4, %xmm5, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpslldq $8, %xmm2, %xmm1 + vpsrldq $8, %xmm2, %xmm2 + vpxor %xmm1, %xmm0, %xmm6 + vpxor %xmm2, %xmm3, %xmm4 + # ghash_mid + vpsrld $31, %xmm6, %xmm0 + vpsrld $31, %xmm4, %xmm1 + vpslld $0x01, %xmm6, %xmm6 + vpslld $0x01, %xmm4, %xmm4 + vpsrldq $12, %xmm0, %xmm2 + vpslldq $4, %xmm0, %xmm0 + vpslldq $4, %xmm1, %xmm1 + vpor %xmm2, %xmm4, %xmm4 + vpor %xmm0, %xmm6, %xmm6 + vpor %xmm1, %xmm4, %xmm4 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm6, %xmm0 + vpshufd $0x4e, %xmm6, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm4, %xmm4 + vmovdqa %xmm4, (%rsi) + vzeroupper + repz retq +#ifndef __APPLE__ +.size AES_GCM_ghash_block_avx2,.-AES_GCM_ghash_block_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_update_avx2 +.type AES_GCM_encrypt_update_avx2,@function +.align 16 +AES_GCM_encrypt_update_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_update_avx2 +.p2align 4 +_AES_GCM_encrypt_update_avx2: +#endif /* __APPLE__ */ + pushq %r12 + pushq %r13 + pushq %r14 + movq %rdx, %r10 + movq %rcx, %r11 + movq 32(%rsp), %rax + movq 40(%rsp), %r12 + subq $0x98, %rsp + vmovdqa (%r9), %xmm6 + vmovdqa (%rax), %xmm5 + vmovdqa (%r12), %xmm4 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + xorl %r14d, %r14d + cmpl $0x80, %r8d + movl %r8d, %r13d + jl L_AES_GCM_encrypt_update_avx2_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm4, 128(%rsp) + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm3 + # H ^ 1 and H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm10 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm9, %xmm10, %xmm0 + vmovdqa %xmm5, (%rsp) + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 and H ^ 4 + vpclmulqdq $16, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm10 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm12 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm13 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm2 + vpxor %xmm9, %xmm10, %xmm1 + vmovdqa %xmm1, 32(%rsp) + vmovdqa %xmm2, 48(%rsp) + # H ^ 5 and H ^ 6 + vpclmulqdq $16, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm10 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm9 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm12 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm13 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 64(%rsp) + vmovdqa %xmm0, 80(%rsp) + # H ^ 7 and H ^ 8 + vpclmulqdq $16, %xmm1, %xmm2, %xmm11 + vpclmulqdq $0x01, %xmm1, %xmm2, %xmm10 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm9 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm12 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm13 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 96(%rsp) + vmovdqa %xmm0, 112(%rsp) + # First 128 bytes of input + # aesenc_128 + # aesenc_ctr + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0 + vpshufb %xmm1, %xmm15, %xmm15 + # aesenc_xor + vmovdqa (%rdi), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + vmovdqa 16(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 32(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 48(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 64(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 80(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 96(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 112(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 128(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 144(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done: + # aesenc_last + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu (%r11), %xmm0 + vmovdqu 16(%r11), %xmm1 + vmovdqu 32(%r11), %xmm2 + vmovdqu 48(%r11), %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%r10) + vmovdqu %xmm9, 16(%r10) + vmovdqu %xmm10, 32(%r10) + vmovdqu %xmm11, 48(%r10) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 64(%r11), %xmm0 + vmovdqu 80(%r11), %xmm1 + vmovdqu 96(%r11), %xmm2 + vmovdqu 112(%r11), %xmm3 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vpxor %xmm2, %xmm14, %xmm14 + vpxor %xmm3, %xmm15, %xmm15 + vmovdqu %xmm12, 64(%r10) + vmovdqu %xmm13, 80(%r10) + vmovdqu %xmm14, 96(%r10) + vmovdqu %xmm15, 112(%r10) + cmpl $0x80, %r13d + movl $0x80, %r14d + jle L_AES_GCM_encrypt_update_avx2_end_128 + # More 128 bytes of input +L_AES_GCM_encrypt_update_avx2_ghash_128: + # aesenc_128_ghash + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + # aesenc_ctr + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0 + vpshufb %xmm1, %xmm15, %xmm15 + # aesenc_xor + vmovdqa (%rdi), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + # aesenc_pclmul_1 + vmovdqu -128(%rdx), %xmm1 + vmovdqu 16(%rdi), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vmovdqa 112(%rsp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_2 + vmovdqu -112(%rdx), %xmm1 + vmovdqa 96(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 32(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -96(%rdx), %xmm1 + vmovdqa 80(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 48(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -80(%rdx), %xmm1 + vmovdqa 64(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 64(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -64(%rdx), %xmm1 + vmovdqa 48(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 80(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -48(%rdx), %xmm1 + vmovdqa 32(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 96(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -32(%rdx), %xmm1 + vmovdqa 16(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 112(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu -16(%rdx), %xmm1 + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 128(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqa 144(%rdi), %xmm4 + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm0 + vaesenc %xmm4, %xmm8, %xmm8 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm12, %xmm12 + vaesenc %xmm4, %xmm13, %xmm13 + vaesenc %xmm4, %xmm14, %xmm14 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vaesenc %xmm4, %xmm15, %xmm15 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done: + # aesenc_last + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vmovdqu 32(%rcx), %xmm2 + vmovdqu 48(%rcx), %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vmovdqu 96(%rcx), %xmm2 + vmovdqu 112(%rcx), %xmm3 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vpxor %xmm2, %xmm14, %xmm14 + vpxor %xmm3, %xmm15, %xmm15 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + # aesenc_128_ghash - end + addl $0x80, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_encrypt_update_avx2_ghash_128 +L_AES_GCM_encrypt_update_avx2_end_128: + vmovdqa L_avx2_aes_gcm_bswap_mask(%rip), %xmm4 + vpshufb %xmm4, %xmm8, %xmm8 + vpshufb %xmm4, %xmm9, %xmm9 + vpshufb %xmm4, %xmm10, %xmm10 + vpshufb %xmm4, %xmm11, %xmm11 + vpshufb %xmm4, %xmm12, %xmm12 + vpshufb %xmm4, %xmm13, %xmm13 + vpshufb %xmm4, %xmm14, %xmm14 + vpshufb %xmm4, %xmm15, %xmm15 + vpxor %xmm6, %xmm8, %xmm8 + vmovdqu (%rsp), %xmm7 + vpclmulqdq $16, %xmm15, %xmm7, %xmm5 + vpclmulqdq $0x01, %xmm15, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm15, %xmm7, %xmm4 + vpclmulqdq $0x11, %xmm15, %xmm7, %xmm6 + vpxor %xmm1, %xmm5, %xmm5 + vmovdqu 16(%rsp), %xmm7 + vpclmulqdq $16, %xmm14, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm14, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm14, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm14, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 32(%rsp), %xmm15 + vmovdqu 48(%rsp), %xmm7 + vpclmulqdq $16, %xmm13, %xmm15, %xmm2 + vpclmulqdq $0x01, %xmm13, %xmm15, %xmm1 + vpclmulqdq $0x00, %xmm13, %xmm15, %xmm0 + vpclmulqdq $0x11, %xmm13, %xmm15, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpclmulqdq $16, %xmm12, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm12, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm12, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm12, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 64(%rsp), %xmm15 + vmovdqu 80(%rsp), %xmm7 + vpclmulqdq $16, %xmm11, %xmm15, %xmm2 + vpclmulqdq $0x01, %xmm11, %xmm15, %xmm1 + vpclmulqdq $0x00, %xmm11, %xmm15, %xmm0 + vpclmulqdq $0x11, %xmm11, %xmm15, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpclmulqdq $16, %xmm10, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm10, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm10, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm10, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vmovdqu 96(%rsp), %xmm15 + vmovdqu 112(%rsp), %xmm7 + vpclmulqdq $16, %xmm9, %xmm15, %xmm2 + vpclmulqdq $0x01, %xmm9, %xmm15, %xmm1 + vpclmulqdq $0x00, %xmm9, %xmm15, %xmm0 + vpclmulqdq $0x11, %xmm9, %xmm15, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpclmulqdq $16, %xmm8, %xmm7, %xmm2 + vpclmulqdq $0x01, %xmm8, %xmm7, %xmm1 + vpclmulqdq $0x00, %xmm8, %xmm7, %xmm0 + vpclmulqdq $0x11, %xmm8, %xmm7, %xmm3 + vpxor %xmm1, %xmm2, %xmm2 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm0, %xmm4, %xmm4 + vpslldq $8, %xmm5, %xmm7 + vpsrldq $8, %xmm5, %xmm5 + vpxor %xmm7, %xmm4, %xmm4 + vpxor %xmm5, %xmm6, %xmm6 + # ghash_red + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm2 + vpclmulqdq $16, %xmm2, %xmm4, %xmm0 + vpshufd $0x4e, %xmm4, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm0 + vpshufd $0x4e, %xmm1, %xmm1 + vpxor %xmm0, %xmm1, %xmm1 + vpxor %xmm1, %xmm6, %xmm6 + vmovdqa (%rsp), %xmm5 + vmovdqu 128(%rsp), %xmm4 +L_AES_GCM_encrypt_update_avx2_done_128: + cmpl %r8d, %r14d + je L_AES_GCM_encrypt_update_avx2_done_enc + movl %r8d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %r14d + jge L_AES_GCM_encrypt_update_avx2_last_block_done + # aesenc_block + vmovdqa %xmm4, %xmm1 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1, %xmm0 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm1, %xmm1 + vpxor (%rdi), %xmm0, %xmm0 + vmovdqa 16(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 32(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 48(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 64(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 80(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 96(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 112(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 128(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 144(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa %xmm1, %xmm4 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm1 + jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last + vaesenc %xmm1, %xmm0, %xmm0 + vmovdqa 176(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm1 + jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last + vaesenc %xmm1, %xmm0, %xmm0 + vmovdqa 208(%rdi), %xmm2 + vaesenc %xmm2, %xmm0, %xmm0 + vmovdqa 224(%rdi), %xmm1 +L_AES_GCM_encrypt_update_avx2_aesenc_block_last: + vaesenclast %xmm1, %xmm0, %xmm0 + vmovdqu (%r11,%r14,1), %xmm1 + vpxor %xmm1, %xmm0, %xmm0 + vmovdqu %xmm0, (%r10,%r14,1) + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm0, %xmm6, %xmm6 + addl $16, %r14d + cmpl %r13d, %r14d + jge L_AES_GCM_encrypt_update_avx2_last_block_ghash +L_AES_GCM_encrypt_update_avx2_last_block_start: + vmovdqu (%r11,%r14,1), %xmm12 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm11 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm2 + vpclmulqdq $16, %xmm5, %xmm6, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm8 + vpxor (%rdi), %xmm11, %xmm11 + vaesenc 16(%rdi), %xmm11, %xmm11 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%rdi), %xmm11, %xmm11 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 48(%rdi), %xmm11, %xmm11 + vaesenc 64(%rdi), %xmm11, %xmm11 + vaesenc 80(%rdi), %xmm11, %xmm11 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 96(%rdi), %xmm11, %xmm11 + vaesenc 112(%rdi), %xmm11, %xmm11 + vaesenc 128(%rdi), %xmm11, %xmm11 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%rdi), %xmm11, %xmm11 + vpxor %xmm3, %xmm8, %xmm8 + vpxor %xmm8, %xmm2, %xmm2 + vmovdqa 160(%rdi), %xmm0 + cmpl $11, %esi + jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc 176(%rdi), %xmm11, %xmm11 + vmovdqa 192(%rdi), %xmm0 + cmpl $13, %esi + jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc 208(%rdi), %xmm11, %xmm11 + vmovdqa 224(%rdi), %xmm0 +L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm11, %xmm11 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm12, %xmm11, %xmm11 + vmovdqu %xmm11, (%r10,%r14,1) + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm11, %xmm11 + vpxor %xmm11, %xmm6, %xmm6 + addl $16, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_encrypt_update_avx2_last_block_start +L_AES_GCM_encrypt_update_avx2_last_block_ghash: + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm6, %xmm10 + vpclmulqdq $0x01, %xmm5, %xmm6, %xmm9 + vpclmulqdq $0x00, %xmm5, %xmm6, %xmm8 + vpxor %xmm9, %xmm10, %xmm10 + vpslldq $8, %xmm10, %xmm9 + vpsrldq $8, %xmm10, %xmm10 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm6, %xmm6 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm10, %xmm6, %xmm6 + vpxor %xmm9, %xmm6, %xmm6 + vpxor %xmm8, %xmm6, %xmm6 +L_AES_GCM_encrypt_update_avx2_last_block_done: +L_AES_GCM_encrypt_update_avx2_done_enc: + vmovdqa %xmm6, (%r9) + vmovdqa %xmm4, (%r12) + vzeroupper + addq $0x98, %rsp + popq %r14 + popq %r13 + popq %r12 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_update_avx2,.-AES_GCM_encrypt_update_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_encrypt_final_avx2 +.type AES_GCM_encrypt_final_avx2,@function +.align 16 +AES_GCM_encrypt_final_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_encrypt_final_avx2 +.p2align 4 +_AES_GCM_encrypt_final_avx2: +#endif /* __APPLE__ */ + pushq %r13 + movl %ecx, %r10d + movl %r8d, %r11d + movq 16(%rsp), %rax + subq $16, %rsp + vmovdqa (%rdi), %xmm4 + vmovdqa (%r9), %xmm5 + vmovdqa (%rax), %xmm6 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + # calc_tag + shlq $3, %r10 + vpinsrq $0x00, %r10, %xmm0, %xmm0 + shlq $3, %r11 + vpinsrq $0x01, %r11, %xmm1, %xmm1 + vpblendd $12, %xmm1, %xmm0, %xmm0 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm7 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm7, %xmm7 + vpslldq $8, %xmm7, %xmm3 + vpsrldq $8, %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + # store_tag + cmpl $16, %edx + je L_AES_GCM_encrypt_final_avx2_store_tag_16 + xorq %rcx, %rcx + vmovdqu %xmm0, (%rsp) +L_AES_GCM_encrypt_final_avx2_store_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + movb %r13b, (%rsi,%rcx,1) + incl %ecx + cmpl %edx, %ecx + jne L_AES_GCM_encrypt_final_avx2_store_tag_loop + jmp L_AES_GCM_encrypt_final_avx2_store_tag_done +L_AES_GCM_encrypt_final_avx2_store_tag_16: + vmovdqu %xmm0, (%rsi) +L_AES_GCM_encrypt_final_avx2_store_tag_done: + vzeroupper + addq $16, %rsp + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_encrypt_final_avx2,.-AES_GCM_encrypt_final_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_update_avx2 +.type AES_GCM_decrypt_update_avx2,@function +.align 16 +AES_GCM_decrypt_update_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_update_avx2 +.p2align 4 +_AES_GCM_decrypt_update_avx2: +#endif /* __APPLE__ */ + pushq %r13 + pushq %r12 + pushq %r14 + movq %rdx, %r10 + movq %rcx, %r11 + movq 32(%rsp), %rax + movq 40(%rsp), %r12 + subq $0xa8, %rsp + vmovdqa (%r9), %xmm6 + vmovdqa (%rax), %xmm5 + vmovdqa (%r12), %xmm4 + # Calculate H + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + xorl %r14d, %r14d + cmpl $0x80, %r8d + movl %r8d, %r13d + jl L_AES_GCM_decrypt_update_avx2_done_128 + andl $0xffffff80, %r13d + vmovdqa %xmm4, 128(%rsp) + vmovdqa %xmm15, 144(%rsp) + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm3 + # H ^ 1 and H ^ 2 + vpclmulqdq $0x00, %xmm5, %xmm5, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm5, %xmm10 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpclmulqdq $16, %xmm3, %xmm9, %xmm8 + vpshufd $0x4e, %xmm9, %xmm9 + vpxor %xmm8, %xmm9, %xmm9 + vpxor %xmm9, %xmm10, %xmm0 + vmovdqa %xmm5, (%rsp) + vmovdqa %xmm0, 16(%rsp) + # H ^ 3 and H ^ 4 + vpclmulqdq $16, %xmm5, %xmm0, %xmm11 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm10 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm9 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm12 + vpclmulqdq $0x00, %xmm0, %xmm0, %xmm13 + vpclmulqdq $0x11, %xmm0, %xmm0, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm2 + vpxor %xmm9, %xmm10, %xmm1 + vmovdqa %xmm1, 32(%rsp) + vmovdqa %xmm2, 48(%rsp) + # H ^ 5 and H ^ 6 + vpclmulqdq $16, %xmm0, %xmm1, %xmm11 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm10 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm9 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm12 + vpclmulqdq $0x00, %xmm1, %xmm1, %xmm13 + vpclmulqdq $0x11, %xmm1, %xmm1, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 64(%rsp) + vmovdqa %xmm0, 80(%rsp) + # H ^ 7 and H ^ 8 + vpclmulqdq $16, %xmm1, %xmm2, %xmm11 + vpclmulqdq $0x01, %xmm1, %xmm2, %xmm10 + vpclmulqdq $0x00, %xmm1, %xmm2, %xmm9 + vpclmulqdq $0x11, %xmm1, %xmm2, %xmm12 + vpclmulqdq $0x00, %xmm2, %xmm2, %xmm13 + vpclmulqdq $0x11, %xmm2, %xmm2, %xmm14 + vpxor %xmm10, %xmm11, %xmm11 + vpslldq $8, %xmm11, %xmm10 + vpsrldq $8, %xmm11, %xmm11 + vpxor %xmm9, %xmm10, %xmm10 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm9, %xmm10, %xmm10 + vpxor %xmm8, %xmm13, %xmm13 + vpclmulqdq $16, %xmm3, %xmm10, %xmm9 + vpclmulqdq $16, %xmm3, %xmm13, %xmm8 + vpshufd $0x4e, %xmm10, %xmm10 + vpshufd $0x4e, %xmm13, %xmm13 + vpxor %xmm11, %xmm12, %xmm12 + vpxor %xmm8, %xmm13, %xmm13 + vpxor %xmm12, %xmm10, %xmm10 + vpxor %xmm14, %xmm13, %xmm0 + vpxor %xmm9, %xmm10, %xmm7 + vmovdqa %xmm7, 96(%rsp) + vmovdqa %xmm0, 112(%rsp) +L_AES_GCM_decrypt_update_avx2_ghash_128: + # aesenc_128_ghash + leaq (%r11,%r14,1), %rcx + leaq (%r10,%r14,1), %rdx + # aesenc_ctr + vmovdqa 128(%rsp), %xmm0 + vmovdqa L_avx2_aes_gcm_bswap_epi64(%rip), %xmm1 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm0, %xmm9 + vpshufb %xmm1, %xmm0, %xmm8 + vpaddd L_avx2_aes_gcm_two(%rip), %xmm0, %xmm10 + vpshufb %xmm1, %xmm9, %xmm9 + vpaddd L_avx2_aes_gcm_three(%rip), %xmm0, %xmm11 + vpshufb %xmm1, %xmm10, %xmm10 + vpaddd L_avx2_aes_gcm_four(%rip), %xmm0, %xmm12 + vpshufb %xmm1, %xmm11, %xmm11 + vpaddd L_avx2_aes_gcm_five(%rip), %xmm0, %xmm13 + vpshufb %xmm1, %xmm12, %xmm12 + vpaddd L_avx2_aes_gcm_six(%rip), %xmm0, %xmm14 + vpshufb %xmm1, %xmm13, %xmm13 + vpaddd L_avx2_aes_gcm_seven(%rip), %xmm0, %xmm15 + vpshufb %xmm1, %xmm14, %xmm14 + vpaddd L_avx2_aes_gcm_eight(%rip), %xmm0, %xmm0 + vpshufb %xmm1, %xmm15, %xmm15 + # aesenc_xor + vmovdqa (%rdi), %xmm7 + vmovdqa %xmm0, 128(%rsp) + vpxor %xmm7, %xmm8, %xmm8 + vpxor %xmm7, %xmm9, %xmm9 + vpxor %xmm7, %xmm10, %xmm10 + vpxor %xmm7, %xmm11, %xmm11 + vpxor %xmm7, %xmm12, %xmm12 + vpxor %xmm7, %xmm13, %xmm13 + vpxor %xmm7, %xmm14, %xmm14 + vpxor %xmm7, %xmm15, %xmm15 + # aesenc_pclmul_1 + vmovdqu (%rcx), %xmm1 + vmovdqu 16(%rdi), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vmovdqa 112(%rsp), %xmm2 + vpxor %xmm6, %xmm1, %xmm1 + vpclmulqdq $16, %xmm2, %xmm1, %xmm5 + vpclmulqdq $0x01, %xmm2, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm2, %xmm1, %xmm6 + vpclmulqdq $0x11, %xmm2, %xmm1, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_2 + vmovdqu 16(%rcx), %xmm1 + vmovdqa 96(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 32(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 32(%rcx), %xmm1 + vmovdqa 80(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 48(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 48(%rcx), %xmm1 + vmovdqa 64(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 64(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 64(%rcx), %xmm1 + vmovdqa 48(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 80(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 80(%rcx), %xmm1 + vmovdqa 32(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 96(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 96(%rcx), %xmm1 + vmovdqa 16(%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 112(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_n + vmovdqu 112(%rcx), %xmm1 + vmovdqa (%rsp), %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm1, %xmm1 + vpxor %xmm2, %xmm5, %xmm5 + vpclmulqdq $16, %xmm0, %xmm1, %xmm2 + vpxor %xmm3, %xmm5, %xmm5 + vpclmulqdq $0x01, %xmm0, %xmm1, %xmm3 + vpxor %xmm4, %xmm6, %xmm6 + vpclmulqdq $0x00, %xmm0, %xmm1, %xmm4 + vpclmulqdq $0x11, %xmm0, %xmm1, %xmm1 + vmovdqu 128(%rdi), %xmm0 + vpxor %xmm1, %xmm7, %xmm7 + vaesenc %xmm0, %xmm8, %xmm8 + vaesenc %xmm0, %xmm9, %xmm9 + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc %xmm0, %xmm11, %xmm11 + vaesenc %xmm0, %xmm12, %xmm12 + vaesenc %xmm0, %xmm13, %xmm13 + vaesenc %xmm0, %xmm14, %xmm14 + vaesenc %xmm0, %xmm15, %xmm15 + # aesenc_pclmul_l + vpxor %xmm2, %xmm5, %xmm5 + vpxor %xmm4, %xmm6, %xmm6 + vpxor %xmm3, %xmm5, %xmm5 + vpslldq $8, %xmm5, %xmm1 + vpsrldq $8, %xmm5, %xmm5 + vmovdqa 144(%rdi), %xmm4 + vmovdqa L_avx2_aes_gcm_mod2_128(%rip), %xmm0 + vaesenc %xmm4, %xmm8, %xmm8 + vpxor %xmm1, %xmm6, %xmm6 + vpxor %xmm5, %xmm7, %xmm7 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm9, %xmm9 + vaesenc %xmm4, %xmm10, %xmm10 + vaesenc %xmm4, %xmm11, %xmm11 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpclmulqdq $16, %xmm0, %xmm6, %xmm3 + vaesenc %xmm4, %xmm12, %xmm12 + vaesenc %xmm4, %xmm13, %xmm13 + vaesenc %xmm4, %xmm14, %xmm14 + vpshufd $0x4e, %xmm6, %xmm6 + vpxor %xmm3, %xmm6, %xmm6 + vpxor %xmm7, %xmm6, %xmm6 + vaesenc %xmm4, %xmm15, %xmm15 + cmpl $11, %esi + vmovdqa 160(%rdi), %xmm7 + jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 176(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + cmpl $13, %esi + vmovdqa 192(%rdi), %xmm7 + jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 208(%rdi), %xmm7 + vaesenc %xmm7, %xmm8, %xmm8 + vaesenc %xmm7, %xmm9, %xmm9 + vaesenc %xmm7, %xmm10, %xmm10 + vaesenc %xmm7, %xmm11, %xmm11 + vaesenc %xmm7, %xmm12, %xmm12 + vaesenc %xmm7, %xmm13, %xmm13 + vaesenc %xmm7, %xmm14, %xmm14 + vaesenc %xmm7, %xmm15, %xmm15 + vmovdqa 224(%rdi), %xmm7 +L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done: + # aesenc_last + vaesenclast %xmm7, %xmm8, %xmm8 + vaesenclast %xmm7, %xmm9, %xmm9 + vaesenclast %xmm7, %xmm10, %xmm10 + vaesenclast %xmm7, %xmm11, %xmm11 + vmovdqu (%rcx), %xmm0 + vmovdqu 16(%rcx), %xmm1 + vmovdqu 32(%rcx), %xmm2 + vmovdqu 48(%rcx), %xmm3 + vpxor %xmm0, %xmm8, %xmm8 + vpxor %xmm1, %xmm9, %xmm9 + vpxor %xmm2, %xmm10, %xmm10 + vpxor %xmm3, %xmm11, %xmm11 + vmovdqu %xmm8, (%rdx) + vmovdqu %xmm9, 16(%rdx) + vmovdqu %xmm10, 32(%rdx) + vmovdqu %xmm11, 48(%rdx) + vaesenclast %xmm7, %xmm12, %xmm12 + vaesenclast %xmm7, %xmm13, %xmm13 + vaesenclast %xmm7, %xmm14, %xmm14 + vaesenclast %xmm7, %xmm15, %xmm15 + vmovdqu 64(%rcx), %xmm0 + vmovdqu 80(%rcx), %xmm1 + vmovdqu 96(%rcx), %xmm2 + vmovdqu 112(%rcx), %xmm3 + vpxor %xmm0, %xmm12, %xmm12 + vpxor %xmm1, %xmm13, %xmm13 + vpxor %xmm2, %xmm14, %xmm14 + vpxor %xmm3, %xmm15, %xmm15 + vmovdqu %xmm12, 64(%rdx) + vmovdqu %xmm13, 80(%rdx) + vmovdqu %xmm14, 96(%rdx) + vmovdqu %xmm15, 112(%rdx) + # aesenc_128_ghash - end + addl $0x80, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_decrypt_update_avx2_ghash_128 + vmovdqa (%rsp), %xmm5 + vmovdqa 128(%rsp), %xmm4 + vmovdqa 144(%rsp), %xmm15 +L_AES_GCM_decrypt_update_avx2_done_128: + cmpl %r8d, %r14d + jge L_AES_GCM_decrypt_update_avx2_done_dec + movl %r8d, %r13d + andl $0xfffffff0, %r13d + cmpl %r13d, %r14d + jge L_AES_GCM_decrypt_update_avx2_last_block_done +L_AES_GCM_decrypt_update_avx2_last_block_start: + vmovdqu (%r11,%r14,1), %xmm11 + vpshufb L_avx2_aes_gcm_bswap_epi64(%rip), %xmm4, %xmm10 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm11, %xmm12 + vpaddd L_avx2_aes_gcm_one(%rip), %xmm4, %xmm4 + vpxor %xmm6, %xmm12, %xmm12 + # aesenc_gfmul_sb + vpclmulqdq $0x01, %xmm5, %xmm12, %xmm2 + vpclmulqdq $16, %xmm5, %xmm12, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm12, %xmm1 + vpclmulqdq $0x11, %xmm5, %xmm12, %xmm8 + vpxor (%rdi), %xmm10, %xmm10 + vaesenc 16(%rdi), %xmm10, %xmm10 + vpxor %xmm2, %xmm3, %xmm3 + vpslldq $8, %xmm3, %xmm2 + vpsrldq $8, %xmm3, %xmm3 + vaesenc 32(%rdi), %xmm10, %xmm10 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 48(%rdi), %xmm10, %xmm10 + vaesenc 64(%rdi), %xmm10, %xmm10 + vaesenc 80(%rdi), %xmm10, %xmm10 + vpshufd $0x4e, %xmm2, %xmm2 + vpxor %xmm1, %xmm2, %xmm2 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm2, %xmm1 + vaesenc 96(%rdi), %xmm10, %xmm10 + vaesenc 112(%rdi), %xmm10, %xmm10 + vaesenc 128(%rdi), %xmm10, %xmm10 + vpshufd $0x4e, %xmm2, %xmm2 + vaesenc 144(%rdi), %xmm10, %xmm10 + vpxor %xmm3, %xmm8, %xmm8 + vpxor %xmm8, %xmm2, %xmm2 + vmovdqa 160(%rdi), %xmm0 + cmpl $11, %esi + jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc 176(%rdi), %xmm10, %xmm10 + vmovdqa 192(%rdi), %xmm0 + cmpl $13, %esi + jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last + vaesenc %xmm0, %xmm10, %xmm10 + vaesenc 208(%rdi), %xmm10, %xmm10 + vmovdqa 224(%rdi), %xmm0 +L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last: + vaesenclast %xmm0, %xmm10, %xmm10 + vpxor %xmm1, %xmm2, %xmm6 + vpxor %xmm11, %xmm10, %xmm10 + vmovdqu %xmm10, (%r10,%r14,1) + addl $16, %r14d + cmpl %r13d, %r14d + jl L_AES_GCM_decrypt_update_avx2_last_block_start +L_AES_GCM_decrypt_update_avx2_last_block_done: +L_AES_GCM_decrypt_update_avx2_done_dec: + vmovdqa %xmm6, (%r9) + vmovdqa %xmm4, (%r12) + vzeroupper + addq $0xa8, %rsp + popq %r14 + popq %r12 + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_update_avx2,.-AES_GCM_decrypt_update_avx2 +#endif /* __APPLE__ */ +#ifndef __APPLE__ +.text +.globl AES_GCM_decrypt_final_avx2 +.type AES_GCM_decrypt_final_avx2,@function +.align 16 +AES_GCM_decrypt_final_avx2: +#else +.section __TEXT,__text +.globl _AES_GCM_decrypt_final_avx2 +.p2align 4 +_AES_GCM_decrypt_final_avx2: +#endif /* __APPLE__ */ + pushq %r13 + pushq %rbp + movl %ecx, %r10d + movl %r8d, %r11d + movq 24(%rsp), %rax + movq 32(%rsp), %rbp + subq $16, %rsp + vmovdqa (%rdi), %xmm4 + vmovdqa (%r9), %xmm5 + vmovdqa (%rax), %xmm6 + vpsrlq $63, %xmm5, %xmm1 + vpsllq $0x01, %xmm5, %xmm0 + vpslldq $8, %xmm1, %xmm1 + vpor %xmm1, %xmm0, %xmm0 + vpshufd $0xff, %xmm5, %xmm5 + vpsrad $31, %xmm5, %xmm5 + vpand L_avx2_aes_gcm_mod2_128(%rip), %xmm5, %xmm5 + vpxor %xmm0, %xmm5, %xmm5 + # calc_tag + shlq $3, %r10 + vpinsrq $0x00, %r10, %xmm0, %xmm0 + shlq $3, %r11 + vpinsrq $0x01, %r11, %xmm1, %xmm1 + vpblendd $12, %xmm1, %xmm0, %xmm0 + vpxor %xmm4, %xmm0, %xmm0 + # ghash_gfmul_red + vpclmulqdq $16, %xmm5, %xmm0, %xmm7 + vpclmulqdq $0x01, %xmm5, %xmm0, %xmm3 + vpclmulqdq $0x00, %xmm5, %xmm0, %xmm2 + vpxor %xmm3, %xmm7, %xmm7 + vpslldq $8, %xmm7, %xmm3 + vpsrldq $8, %xmm7, %xmm7 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $0x11, %xmm5, %xmm0, %xmm0 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm2, %xmm3, %xmm3 + vpclmulqdq $16, L_avx2_aes_gcm_mod2_128(%rip), %xmm3, %xmm2 + vpshufd $0x4e, %xmm3, %xmm3 + vpxor %xmm7, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpxor %xmm2, %xmm0, %xmm0 + vpshufb L_avx2_aes_gcm_bswap_mask(%rip), %xmm0, %xmm0 + vpxor %xmm6, %xmm0, %xmm0 + # cmp_tag + cmpl $16, %edx + je L_AES_GCM_decrypt_final_avx2_cmp_tag_16 + xorq %rcx, %rcx + xorq %r9, %r9 + vmovdqu %xmm0, (%rsp) +L_AES_GCM_decrypt_final_avx2_cmp_tag_loop: + movzbl (%rsp,%rcx,1), %r13d + xorb (%rsi,%rcx,1), %r13b + orb %r13b, %r9b + incl %ecx + cmpl %edx, %ecx + jne L_AES_GCM_decrypt_final_avx2_cmp_tag_loop + cmpb $0x00, %r9b + sete %r9b + jmp L_AES_GCM_decrypt_final_avx2_cmp_tag_done +L_AES_GCM_decrypt_final_avx2_cmp_tag_16: + vmovdqu (%rsi), %xmm1 + vpcmpeqb %xmm1, %xmm0, %xmm0 + vpmovmskb %xmm0, %rcx + # %%edx == 0xFFFF then return 1 else => return 0 + xorl %r9d, %r9d + cmpl $0xffff, %ecx + sete %r9b +L_AES_GCM_decrypt_final_avx2_cmp_tag_done: + movl %r9d, (%rbp) + vzeroupper + addq $16, %rsp + popq %rbp + popq %r13 + repz retq +#ifndef __APPLE__ +.size AES_GCM_decrypt_final_avx2,.-AES_GCM_decrypt_final_avx2 +#endif /* __APPLE__ */ +#endif /* WOLFSSL_AESGCM_STREAM */ #endif /* HAVE_INTEL_AVX2 */ #if defined(__linux__) && defined(__ELF__) diff --git a/wolfcrypt/src/error.c b/wolfcrypt/src/error.c index 24eb04449..3680eac86 100644 --- a/wolfcrypt/src/error.c +++ b/wolfcrypt/src/error.c @@ -521,6 +521,12 @@ const char* wc_GetErrorString(int error) case SAKKE_VERIFY_FAIL_E: return "SAKKE derivation verification error"; + case MISSING_IV: + return "Required IV not set"; + + case MISSING_KEY: + return "Required key not set"; + default: return "unknown error number"; diff --git a/wolfcrypt/src/evp.c b/wolfcrypt/src/evp.c index 75c165c42..ff0569854 100644 --- a/wolfcrypt/src/evp.c +++ b/wolfcrypt/src/evp.c @@ -553,6 +553,7 @@ static int evpCipherBlock(WOLFSSL_EVP_CIPHER_CTX *ctx, } #if defined(HAVE_AESGCM) +#ifndef WOLFSSL_AESGCM_STREAM static int wolfSSL_EVP_CipherUpdate_GCM_AAD(WOLFSSL_EVP_CIPHER_CTX *ctx, const unsigned char *in, int inl) { if (in && inl > 0) { @@ -570,11 +571,13 @@ static int wolfSSL_EVP_CipherUpdate_GCM_AAD(WOLFSSL_EVP_CIPHER_CTX *ctx, } return 0; } +#endif /* WOLFSSL_AESGCM_STREAM */ static int wolfSSL_EVP_CipherUpdate_GCM(WOLFSSL_EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl, const unsigned char *in, int inl) { +#ifndef WOLFSSL_AESGCM_STREAM int ret = 0; *outl = inl; @@ -606,8 +609,42 @@ static int wolfSSL_EVP_CipherUpdate_GCM(WOLFSSL_EVP_CIPHER_CTX *ctx, } return WOLFSSL_SUCCESS; +#else + int ret; + + /* When out is NULL then this is AAD. */ + if (out == NULL) { + if (ctx->enc) { + ret = wc_AesGcmEncryptUpdate(&ctx->cipher.aes, NULL, NULL, 0, in, + inl); + } + else { + ret = wc_AesGcmDecryptUpdate(&ctx->cipher.aes, NULL, NULL, 0, in, + inl); + } + } + /* When out is not NULL then this is plaintext/cipher text. */ + else { + if (ctx->enc) { + ret = wc_AesGcmEncryptUpdate(&ctx->cipher.aes, out, in, inl, NULL, + 0); + } + else { + ret = wc_AesGcmDecryptUpdate(&ctx->cipher.aes, out, in, inl, NULL, + 0); + } + } + *outl = inl; + if (ret == 0) { + ret = WOLFSSL_SUCCESS; + } + else { + ret = WOLFSSL_FAILURE; + } + return ret; +#endif /* WOLFSSL_AESGCM_STREAM */ } -#endif +#endif /* HAVE_AESGCM */ /* returns WOLFSSL_SUCCESS on success and WOLFSSL_FAILURE on failure */ WOLFSSL_API int wolfSSL_EVP_CipherUpdate(WOLFSSL_EVP_CIPHER_CTX *ctx, @@ -626,16 +663,16 @@ WOLFSSL_API int wolfSSL_EVP_CipherUpdate(WOLFSSL_EVP_CIPHER_CTX *ctx, *outl = 0; #if !defined(NO_AES) && defined(HAVE_AESGCM) - switch (ctx->cipherType) { - case AES_128_GCM_TYPE: - case AES_192_GCM_TYPE: - case AES_256_GCM_TYPE: -/* if out == NULL, in/inl contains the additional authenticated data for GCM */ - return wolfSSL_EVP_CipherUpdate_GCM(ctx, out, outl, in, inl); - default: - /* fall-through */ - break; - } + switch (ctx->cipherType) { + case AES_128_GCM_TYPE: + case AES_192_GCM_TYPE: + case AES_256_GCM_TYPE: + /* if out == NULL, in/inl contains the additional authenticated data * for GCM */ + return wolfSSL_EVP_CipherUpdate_GCM(ctx, out, outl, in, inl); + default: + /* fall-through */ + break; + } #endif /* !defined(NO_AES) && defined(HAVE_AESGCM) */ if (out == NULL) { @@ -770,6 +807,7 @@ int wolfSSL_EVP_CipherFinal(WOLFSSL_EVP_CIPHER_CTX *ctx, case AES_128_GCM_TYPE: case AES_192_GCM_TYPE: case AES_256_GCM_TYPE: +#ifndef WOLFSSL_AESGCM_STREAM if ((ctx->gcmBuffer && ctx->gcmBufferLen > 0) || (ctx->gcmBufferLen == 0)) { if (ctx->enc) @@ -799,6 +837,24 @@ int wolfSSL_EVP_CipherFinal(WOLFSSL_EVP_CIPHER_CTX *ctx, else { *outl = 0; } +#else + /* No data to return - all handled in Update. */ + *outl = 0; + if (ctx->enc) { + ret = wc_AesGcmEncryptFinal(&ctx->cipher.aes, ctx->authTag, + ctx->authTagSz); + } + else { + ret = wc_AesGcmDecryptFinal(&ctx->cipher.aes, ctx->authTag, + ctx->authTagSz); + } + if (ret == 0) { + ret = WOLFSSL_SUCCESS; + } + else { + ret = WOLFSSL_FAILURE; + } +#endif /* WOLFSSL_AESGCM_STREAM */ /* Clear IV, since IV reuse is not recommended for AES GCM. */ XMEMSET(ctx->iv, 0, AES_BLOCK_SIZE); break; @@ -4073,10 +4129,18 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) WOLFSSL_MSG("Key or IV not set"); break; } - if ((ret = wc_AesGcmSetExtIV(&ctx->cipher.aes, ctx->iv, ctx->ivSz)) != 0) { + if ((ret = wc_AesGcmSetExtIV(&ctx->cipher.aes, ctx->iv, + ctx->ivSz)) != 0) { WOLFSSL_MSG("wc_AesGcmSetIV failed"); ret = WOLFSSL_FAILURE; } +#ifdef WOLFSSL_AESGCM_STREAM + /* Initialize using IV cached in Aes object. */ + if (wc_AesGcmInit(&ctx->cipher.aes, NULL, 0, NULL, 0) != 0) { + WOLFSSL_MSG("wc_AesGcmInit failed"); + ret = WOLFSSL_FAILURE; + } +#endif /* WOLFSSL_AESGCM_STREAM */ /* OpenSSL increments the IV. Not sure why */ IncCtr(ctx->iv, ctx->ivSz); break; @@ -4373,14 +4437,25 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) ctx->authTagSz = AES_BLOCK_SIZE; ctx->ivSz = GCM_NONCE_MID_SZ; +#ifndef WOLFSSL_AESGCM_STREAM if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) { WOLFSSL_MSG("wc_AesGcmSetKey() failed"); return WOLFSSL_FAILURE; } +#endif /* !WOLFSSL_AESGCM_STREAM */ if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) { WOLFSSL_MSG("wc_AesGcmSetExtIV() failed"); return WOLFSSL_FAILURE; } +#ifdef WOLFSSL_AESGCM_STREAM + /* Initialize with key and IV if available. */ + if (wc_AesGcmInit(&ctx->cipher.aes, key, + (key == NULL) ? 0 : ctx->keyLen, iv, + (iv == NULL) ? 0 : GCM_NONCE_MID_SZ) != 0) { + WOLFSSL_MSG("wc_AesGcmInit() failed"); + return WOLFSSL_FAILURE; + } +#endif /* WOLFSSL_AESGCM_STREAM */ if (enc == 0 || enc == 1) ctx->enc = enc ? 1 : 0; } @@ -4398,14 +4473,25 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) ctx->authTagSz = AES_BLOCK_SIZE; ctx->ivSz = GCM_NONCE_MID_SZ; +#ifndef WOLFSSL_AESGCM_STREAM if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) { WOLFSSL_MSG("wc_AesGcmSetKey() failed"); return WOLFSSL_FAILURE; } +#endif /* !WOLFSSL_AESGCM_STREAM */ if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) { WOLFSSL_MSG("wc_AesGcmSetExtIV() failed"); return WOLFSSL_FAILURE; } +#ifdef WOLFSSL_AESGCM_STREAM + /* Initialize with key and IV if available. */ + if (wc_AesGcmInit(&ctx->cipher.aes, key, + (key == NULL) ? 0 : ctx->keyLen, iv, + (iv == NULL) ? 0 : GCM_NONCE_MID_SZ) != 0) { + WOLFSSL_MSG("wc_AesGcmInit() failed"); + return WOLFSSL_FAILURE; + } +#endif /* WOLFSSL_AESGCM_STREAM */ if (enc == 0 || enc == 1) ctx->enc = enc ? 1 : 0; } @@ -4423,14 +4509,25 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) ctx->authTagSz = AES_BLOCK_SIZE; ctx->ivSz = GCM_NONCE_MID_SZ; +#ifndef WOLFSSL_AESGCM_STREAM if (key && wc_AesGcmSetKey(&ctx->cipher.aes, key, ctx->keyLen)) { WOLFSSL_MSG("wc_AesGcmSetKey() failed"); return WOLFSSL_FAILURE; } +#endif /* !WOLFSSL_AESGCM_STREAM */ if (iv && wc_AesGcmSetExtIV(&ctx->cipher.aes, iv, GCM_NONCE_MID_SZ)) { WOLFSSL_MSG("wc_AesGcmSetExtIV() failed"); return WOLFSSL_FAILURE; } +#ifdef WOLFSSL_AESGCM_STREAM + /* Initialize with key and IV if available. */ + if (wc_AesGcmInit(&ctx->cipher.aes, + key, (key == NULL) ? 0 : ctx->keyLen, + iv, (iv == NULL) ? 0 : GCM_NONCE_MID_SZ) != 0) { + WOLFSSL_MSG("wc_AesGcmInit() failed"); + return WOLFSSL_FAILURE; + } +#endif /* WOLFSSL_AESGCM_STREAM */ if (enc == 0 || enc == 1) ctx->enc = enc ? 1 : 0; } @@ -4482,7 +4579,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) if (enc == 0 || enc == 1) ctx->enc = enc ? 1 : 0; if (key) { - ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, AES_ENCRYPTION, 1); if (ret != 0) return WOLFSSL_FAILURE; @@ -4510,7 +4607,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) if (enc == 0 || enc == 1) ctx->enc = enc ? 1 : 0; if (key) { - ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, iv, AES_ENCRYPTION, 1); if (ret != 0) return WOLFSSL_FAILURE; @@ -4536,7 +4633,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) if (enc == 0 || enc == 1) ctx->enc = enc ? 1 : 0; if (key) { - ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL, + ret = AesSetKey_ex(&ctx->cipher.aes, key, ctx->keyLen, NULL, ctx->enc ? AES_ENCRYPTION : AES_DECRYPTION, 1); } if (ret != 0) @@ -5262,6 +5359,7 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) case AES_192_GCM_TYPE : case AES_256_GCM_TYPE : WOLFSSL_MSG("AES GCM"); +#ifndef WOLFSSL_AESGCM_STREAM if (!dst) { ret = wolfSSL_EVP_CipherUpdate_GCM_AAD(ctx, src, len); } @@ -5275,6 +5373,50 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD *md) len, ctx->iv, ctx->ivSz, ctx->authTag, ctx->authTagSz, ctx->gcmAuthIn, ctx->gcmAuthInSz); } +#else + /* Do one shot operation with streaming API as other + * initialization set up for streaming. */ + ret = wc_AesGcmInit(&ctx->cipher.aes, NULL, 0, ctx->iv, + ctx->ivSz); + /* No destination means only AAD. */ + if ((ret == 0) && (dst == NULL)) { + if (ctx->enc) { + ret = wc_AesGcmEncryptUpdate(&ctx->cipher.aes, NULL, + NULL, 0, src, len); + } + else { + ret = wc_AesGcmDecryptUpdate(&ctx->cipher.aes, NULL, + NULL, 0, src, len); + } + } + /* Only plaintext/cipher text. */ + else if (ret == 0) { + if (ctx->enc) { + ret = wc_AesGcmEncryptUpdate(&ctx->cipher.aes, dst, src, + len, NULL, 0); + } + else { + ret = wc_AesGcmDecryptUpdate(&ctx->cipher.aes, dst, src, + len, NULL, 0); + if (ret == 0) { + ret = wc_AesGcmDecryptFinal(&ctx->cipher.aes, + ctx->authTag, ctx->authTagSz); + } + } + } + if (ret == 0) { + if (ctx->enc) { + /* Calculate authentication tag. */ + ret = wc_AesGcmEncryptFinal(&ctx->cipher.aes, + ctx->authTag, ctx->authTagSz); + } + else { + /* Calculate authentication tag and compare. */ + ret = wc_AesGcmDecryptFinal(&ctx->cipher.aes, + ctx->authTag, ctx->authTagSz); + } + } +#endif /* WOLFSSL_AESGCM_STREAM */ if (ret == 0) ret = len; break; diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index ea36ed9f6..40cc061f3 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -10045,6 +10045,103 @@ WOLFSSL_TEST_SUBROUTINE int aesgcm_test(void) #endif /* WOLFSSL_AES_256 && !(WC_NO_RNG || HAVE_SELFTEST) */ #endif /* HAVE_FIPS_VERSION >= 2 */ +#if !defined(WOLFSSL_AFALG_XILINX_AES) && !defined(WOLFSSL_XILINX_CRYPT) +#ifdef WOLFSSL_AES_256 +#ifdef WOLFSSL_AESGCM_STREAM + result = wc_AesGcmEncryptInit(enc, k1, sizeof(k1), iv1, sizeof(iv1)); + if (result != 0) + ERROR_OUT(-6360, out); + result = wc_AesGcmEncryptUpdate(enc, resultC, p, sizeof(p), a, sizeof(a)); + if (result != 0) + ERROR_OUT(-6361, out); + result = wc_AesGcmEncryptFinal(enc, resultT, sizeof(resultT)); + if (result != 0) + ERROR_OUT(-6362, out); + if (XMEMCMP(resultC, c1, sizeof(c1)) != 0) + ERROR_OUT(-6363, out); + if (XMEMCMP(resultT, t1, sizeof(t1)) != 0) + ERROR_OUT(-6364, out); + +#ifdef HAVE_AES_DECRYPT + result = wc_AesGcmDecryptInit(enc, k1, sizeof(k1), iv1, sizeof(iv1)); + if (result != 0) + ERROR_OUT(-6370, out); + result = wc_AesGcmDecryptUpdate(enc, resultP, c1, sizeof(c1), a, sizeof(a)); + if (result != 0) + ERROR_OUT(-6371, out); + result = wc_AesGcmDecryptFinal(enc, t1, sizeof(t1)); + if (result != 0) + ERROR_OUT(-6372, out); + if (XMEMCMP(resultP, p, sizeof(p)) != 0) + ERROR_OUT(-6373, out); +#endif + + /* alen is the size to pass in with each update. */ + for (alen = 1; alen < AES_BLOCK_SIZE + 1; alen++) { + result = wc_AesGcmEncryptInit(enc, k1, sizeof(k1), iv1, sizeof(iv1)); + if (result != 0) + ERROR_OUT(-6380, out); + + /* plen is the offset into AAD to update with. */ + for (plen = 0; plen < (int)sizeof(a); plen += alen) { + int len = sizeof(a) - plen; + if (len > alen) len = alen; + result = wc_AesGcmEncryptUpdate(enc, NULL, NULL, 0, a + plen, len); + if (result != 0) + ERROR_OUT(-6381, out); + } + /* plen is the offset into plaintext to update with. */ + for (plen = 0; plen < (int)sizeof(p); plen += alen) { + int len = sizeof(p) - plen; + if (len > alen) len = alen; + result = wc_AesGcmEncryptUpdate(enc, resultC + plen, p + plen, len, + NULL, 0); + if (result != 0) + ERROR_OUT(-6382, out); + } + result = wc_AesGcmEncryptFinal(enc, resultT, sizeof(resultT)); + if (result != 0) + ERROR_OUT(-6383, out); + if (XMEMCMP(resultC, c1, sizeof(c1)) != 0) + ERROR_OUT(-6384, out); + if (XMEMCMP(resultT, t1, sizeof(t1)) != 0) + ERROR_OUT(-6385, out); + } + +#ifdef HAVE_AES_DECRYPT + for (alen = 1; alen < AES_BLOCK_SIZE + 1; alen++) { + result = wc_AesGcmDecryptInit(enc, k1, sizeof(k1), iv1, sizeof(iv1)); + if (result != 0) + ERROR_OUT(-6390, out); + + /* plen is the offset into AAD to update with. */ + for (plen = 0; plen < (int)sizeof(a); plen += alen) { + int len = sizeof(a) - plen; + if (len > alen) len = alen; + result = wc_AesGcmDecryptUpdate(enc, NULL, NULL, 0, a + plen, len); + if (result != 0) + ERROR_OUT(-6391, out); + } + /* plen is the offset into cipher text to update with. */ + for (plen = 0; plen < (int)sizeof(c1); plen += alen) { + int len = sizeof(c1) - plen; + if (len > alen) len = alen; + result = wc_AesGcmDecryptUpdate(enc, resultP + plen, c1 + plen, len, + NULL, 0); + if (result != 0) + ERROR_OUT(-6392, out); + } + result = wc_AesGcmDecryptFinal(enc, t1, sizeof(t1)); + if (result != 0) + ERROR_OUT(-6393, out); + if (XMEMCMP(resultP, p, sizeof(p)) != 0) + ERROR_OUT(-6394, out); + } +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AESGCM_STREAM */ +#endif /* WOLFSSL_AES_256 */ +#endif /* !WOLFSSL_AFALG_XILINX_AES && !WOLFSSL_XILINX_CRYPT */ + wc_AesFree(enc); wc_AesFree(dec); diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index 6ef8352c6..21908ac3c 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -250,6 +250,21 @@ struct Aes { silabs_aes_t ctx; #endif void* heap; /* memory hint to use */ +#ifdef WOLFSSL_AESGCM_STREAM +#if !defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_AESNI) + ALIGN16 byte streamData[5 * AES_BLOCK_SIZE]; +#else + byte* streamData; +#endif + word32 aSz; + word32 cSz; + byte over; + byte aOver; + byte cOver; + byte gcmKeySet:1; + byte nonceSet:1; + byte ctrSet:1; +#endif }; #ifndef WC_AES_TYPE_DEFINED @@ -361,6 +376,26 @@ WOLFSSL_API int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz); +#ifdef WOLFSSL_AESGCM_STREAM +WOLFSSL_API int wc_AesGcmInit(Aes* aes, const byte* key, word32 len, + const byte* iv, word32 ivSz); + +WOLFSSL_API int wc_AesGcmEncryptInit(Aes* aes, const byte* key, word32 len, + const byte* iv, word32 ivSz); +WOLFSSL_API int wc_AesGcmEncryptInit_ex(Aes* aes, const byte* key, word32 len, + byte* ivOut, word32 ivOutSz); +WOLFSSL_API int wc_AesGcmEncryptUpdate(Aes* aes, byte* out, const byte* in, + word32 sz, const byte* authIn, word32 authInSz); +WOLFSSL_API int wc_AesGcmEncryptFinal(Aes* aes, byte* authTag, + word32 authTagSz); + +WOLFSSL_API int wc_AesGcmDecryptInit(Aes* aes, const byte* key, word32 len, + const byte* iv, word32 ivSz); +WOLFSSL_API int wc_AesGcmDecryptUpdate(Aes* aes, byte* out, const byte* in, + word32 sz, const byte* authIn, word32 authInSz); +WOLFSSL_API int wc_AesGcmDecryptFinal(Aes* aes, const byte* authTag, + word32 authTagSz); +#endif #ifndef WC_NO_RNG WOLFSSL_API int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz); diff --git a/wolfssl/wolfcrypt/error-crypt.h b/wolfssl/wolfcrypt/error-crypt.h index 138e3cbe4..73da12f6e 100644 --- a/wolfssl/wolfcrypt/error-crypt.h +++ b/wolfssl/wolfcrypt/error-crypt.h @@ -235,8 +235,10 @@ enum { CHACHA_POLY_OVERFLOW =-274, /* ChaCha20Poly1305 limit overflow */ ASN_SELF_SIGNED_E = -275, /* ASN self-signed certificate error */ SAKKE_VERIFY_FAIL_E = -276, /* SAKKE derivation verification error */ + MISSING_IV = -277, /* IV was not set */ + MISSING_KEY = -278, /* Key was not set */ - WC_LAST_E = -276, /* Update this to indicate last error */ + WC_LAST_E = -278, /* Update this to indicate last error */ MIN_CODE_E = -300 /* errors -101 - -299 */ /* add new companion error id strings for any new error codes