mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-07-05 10:40:52 +02:00
Merge pull request #10728 from SparkiDev/intel_asm_fixup
Intel x86/x64 assembly fixes
This commit is contained in:
+6
-2
@@ -1885,7 +1885,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ge_operations.c
|
||||
if BUILD_CURVE25519_INTELASM
|
||||
if !BUILD_X86_ASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S
|
||||
endif !BUILD_X86_ASM
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_operations.c
|
||||
endif BUILD_X86_ASM
|
||||
else
|
||||
if BUILD_ARMASM
|
||||
if !BUILD_FIPS_V6_PLUS
|
||||
@@ -1946,7 +1948,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/ge_operations.c
|
||||
if BUILD_CURVE25519_INTELASM
|
||||
if !BUILD_X86_ASM
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S
|
||||
endif !BUILD_X86_ASM
|
||||
else
|
||||
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/fe_operations.c
|
||||
endif BUILD_X86_ASM
|
||||
else
|
||||
if !BUILD_FIPS_V6_PLUS
|
||||
if BUILD_ARMASM
|
||||
|
||||
+16
-16
@@ -15778,7 +15778,7 @@ int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP_FOR_AES) && !defined(USE_INTEL_SPEEDUP)
|
||||
#define USE_INTEL_SPEEDUP
|
||||
@@ -15841,7 +15841,7 @@ void AES_XTS_decrypt_update_avx1(const unsigned char *in, unsigned char *out, wo
|
||||
#endif /* HAVE_INTEL_AVX1 */
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
#endif /* WOLFSSL_AESNI && !WOLFSSL_X86_BUILD */
|
||||
|
||||
#ifdef HAVE_AES_ECB
|
||||
#if (!defined(WOLFSSL_ARMASM) || (!defined(__aarch64__) && \
|
||||
@@ -16094,7 +16094,7 @@ int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
|
||||
AES_XTS_encrypt_AARCH32(in, out, sz, i, (byte*)xaes->aes.key,
|
||||
(byte*)xaes->tweak.key, (byte*)xaes->aes.tmp, xaes->aes.rounds);
|
||||
ret = 0;
|
||||
#elif defined(WOLFSSL_AESNI)
|
||||
#elif defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
if (aes->use_aesni) {
|
||||
SAVE_VECTOR_REGISTERS(return _svr_ret;);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
@@ -16196,7 +16196,7 @@ int wc_AesXtsEncryptInit(XtsAes* xaes, const byte* i, word32 iSz,
|
||||
stream->bytes_crypted_with_this_tweak = 0;
|
||||
|
||||
{
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
if (aes->use_aesni) {
|
||||
SAVE_VECTOR_REGISTERS(return _svr_ret;);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
@@ -16217,7 +16217,7 @@ int wc_AesXtsEncryptInit(XtsAes* xaes, const byte* i, word32 iSz,
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
}
|
||||
else
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
#endif /* WOLFSSL_AESNI && !WOLFSSL_X86_BUILD */
|
||||
{
|
||||
ret = AesXtsInitTweak_sw(xaes, stream->tweak_block);
|
||||
}
|
||||
@@ -16247,7 +16247,7 @@ static int AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
{
|
||||
int ret;
|
||||
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
Aes *aes;
|
||||
#endif
|
||||
|
||||
@@ -16255,7 +16255,7 @@ static int AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
return BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
aes = &xaes->aes;
|
||||
#endif
|
||||
|
||||
@@ -16291,7 +16291,7 @@ static int AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
}
|
||||
#endif
|
||||
{
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
if (aes->use_aesni) {
|
||||
SAVE_VECTOR_REGISTERS(return _svr_ret;);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
@@ -16314,7 +16314,7 @@ static int AesXtsEncryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
}
|
||||
else
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
#endif /* WOLFSSL_AESNI && !WOLFSSL_X86_BUILD */
|
||||
{
|
||||
ret = AesXtsEncryptUpdate_sw(xaes, out, in, sz, stream->tweak_block);
|
||||
}
|
||||
@@ -16575,7 +16575,7 @@ int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz,
|
||||
AES_XTS_decrypt_AARCH32(in, out, sz, i, (byte*)xaes->aes.key,
|
||||
(byte*)xaes->tweak.key, (byte*)xaes->aes.tmp, xaes->aes.rounds);
|
||||
ret = 0;
|
||||
#elif defined(WOLFSSL_AESNI)
|
||||
#elif defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
if (aes->use_aesni) {
|
||||
SAVE_VECTOR_REGISTERS(return _svr_ret;);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
@@ -16680,7 +16680,7 @@ int wc_AesXtsDecryptInit(XtsAes* xaes, const byte* i, word32 iSz,
|
||||
stream->bytes_crypted_with_this_tweak = 0;
|
||||
|
||||
{
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
if (aes->use_aesni) {
|
||||
SAVE_VECTOR_REGISTERS(return _svr_ret;);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
@@ -16701,7 +16701,7 @@ int wc_AesXtsDecryptInit(XtsAes* xaes, const byte* i, word32 iSz,
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
}
|
||||
else
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
#endif /* WOLFSSL_AESNI && !WOLFSSL_X86_BUILD */
|
||||
{
|
||||
ret = AesXtsInitTweak_sw(xaes, stream->tweak_block);
|
||||
}
|
||||
@@ -16729,7 +16729,7 @@ static int AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
struct XtsAesStreamData *stream)
|
||||
{
|
||||
int ret;
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
Aes *aes;
|
||||
#endif
|
||||
|
||||
@@ -16737,7 +16737,7 @@ static int AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
return BAD_FUNC_ARG;
|
||||
}
|
||||
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
#ifdef WC_AES_XTS_SUPPORT_SIMULTANEOUS_ENC_AND_DEC_KEYS
|
||||
aes = &xaes->aes_decrypt;
|
||||
#else
|
||||
@@ -16767,7 +16767,7 @@ static int AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
#endif
|
||||
|
||||
{
|
||||
#ifdef WOLFSSL_AESNI
|
||||
#if defined(WOLFSSL_AESNI) && !defined(WOLFSSL_X86_BUILD)
|
||||
if (aes->use_aesni) {
|
||||
SAVE_VECTOR_REGISTERS(return _svr_ret;);
|
||||
#if defined(HAVE_INTEL_AVX1)
|
||||
@@ -16790,7 +16790,7 @@ static int AesXtsDecryptUpdate(XtsAes* xaes, byte* out, const byte* in, word32 s
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
}
|
||||
else
|
||||
#endif /* WOLFSSL_AESNI */
|
||||
#endif /* WOLFSSL_AESNI && !WOLFSSL_X86_BUILD */
|
||||
{
|
||||
ret = AesXtsDecryptUpdate_sw(xaes, out, in, sz,
|
||||
stream->tweak_block);
|
||||
|
||||
@@ -1831,11 +1831,11 @@ _AES_ECB_decrypt_AESNI:
|
||||
push %edi
|
||||
push %esi
|
||||
push %ebx
|
||||
movl 20(%esp), %edi
|
||||
movl 24(%esp), %esi
|
||||
movl 28(%esp), %edx
|
||||
movl 32(%esp), %ecx
|
||||
movl 36(%esp), %eax
|
||||
movl 16(%esp), %edi
|
||||
movl 20(%esp), %esi
|
||||
movl 24(%esp), %edx
|
||||
movl 28(%esp), %ecx
|
||||
movl 32(%esp), %eax
|
||||
|
||||
|
||||
movl %edx, %ebx
|
||||
|
||||
@@ -3485,7 +3485,6 @@ L_AES_GCM_decrypt_aesni_last_block_start:
|
||||
movdqa %xmm1, %xmm12
|
||||
pclmulqdq $0x00, %xmm0, %xmm12
|
||||
aesenc 80(%r15), %xmm8
|
||||
movdqa %xmm1, %xmm1
|
||||
pclmulqdq $0x11, %xmm0, %xmm1
|
||||
aesenc 96(%r15), %xmm8
|
||||
pxor %xmm11, %xmm10
|
||||
@@ -6303,7 +6302,6 @@ L_AES_GCM_decrypt_update_aesni_last_block_start:
|
||||
movdqa %xmm1, %xmm12
|
||||
pclmulqdq $0x00, %xmm0, %xmm12
|
||||
aesenc 80(%rdi), %xmm8
|
||||
movdqa %xmm1, %xmm1
|
||||
pclmulqdq $0x11, %xmm0, %xmm1
|
||||
aesenc 96(%rdi), %xmm8
|
||||
pxor %xmm11, %xmm10
|
||||
|
||||
@@ -750,6 +750,9 @@ L_AES_GCM_encrypt_aesni_calc_aad_done:
|
||||
# First 64 bytes of input
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm4
|
||||
movdqu %xmm4, %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm3
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm4, %xmm6
|
||||
@@ -761,9 +764,6 @@ L_AES_GCM_encrypt_aesni_calc_aad_done:
|
||||
pshufb %xmm3, %xmm6
|
||||
paddd L_aes_gcm_three, %xmm7
|
||||
pshufb %xmm3, %xmm7
|
||||
movdqu 64(%esp), %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa (%ebp), %xmm3
|
||||
pxor %xmm3, %xmm4
|
||||
pxor %xmm3, %xmm5
|
||||
@@ -867,6 +867,9 @@ L_AES_GCM_encrypt_aesni_ghash_64:
|
||||
leal (%edi,%ebx,1), %edx
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm4
|
||||
movdqu %xmm4, %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm3
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm4, %xmm6
|
||||
@@ -878,9 +881,6 @@ L_AES_GCM_encrypt_aesni_ghash_64:
|
||||
pshufb %xmm3, %xmm6
|
||||
paddd L_aes_gcm_three, %xmm7
|
||||
pshufb %xmm3, %xmm7
|
||||
movdqu 64(%esp), %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa (%ebp), %xmm3
|
||||
pxor %xmm3, %xmm4
|
||||
pxor %xmm3, %xmm5
|
||||
@@ -2146,6 +2146,9 @@ L_AES_GCM_decrypt_aesni_ghash_64_inplace:
|
||||
leal (%edi,%ebx,1), %edx
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm4
|
||||
movdqu %xmm4, %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm3
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm4, %xmm6
|
||||
@@ -2157,9 +2160,6 @@ L_AES_GCM_decrypt_aesni_ghash_64_inplace:
|
||||
pshufb %xmm3, %xmm6
|
||||
paddd L_aes_gcm_three, %xmm7
|
||||
pshufb %xmm3, %xmm7
|
||||
movdqu 64(%esp), %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa (%ebp), %xmm3
|
||||
pxor %xmm3, %xmm4
|
||||
pxor %xmm3, %xmm5
|
||||
@@ -2359,6 +2359,9 @@ L_AES_GCM_decrypt_aesni_ghash_64:
|
||||
leal (%edi,%ebx,1), %edx
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm4
|
||||
movdqu %xmm4, %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm3
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm4, %xmm6
|
||||
@@ -2370,9 +2373,6 @@ L_AES_GCM_decrypt_aesni_ghash_64:
|
||||
pshufb %xmm3, %xmm6
|
||||
paddd L_aes_gcm_three, %xmm7
|
||||
pshufb %xmm3, %xmm7
|
||||
movdqu 64(%esp), %xmm3
|
||||
paddd L_aes_gcm_four, %xmm3
|
||||
movdqu %xmm3, 64(%esp)
|
||||
movdqa (%ebp), %xmm3
|
||||
pxor %xmm3, %xmm4
|
||||
pxor %xmm3, %xmm5
|
||||
@@ -2455,8 +2455,6 @@ L_AES_GCM_decrypt_aesni_aesenc_64_ghash_avx_done:
|
||||
movdqu 16(%ecx), %xmm1
|
||||
pxor %xmm0, %xmm4
|
||||
pxor %xmm1, %xmm5
|
||||
movdqu %xmm0, (%ecx)
|
||||
movdqu %xmm1, 16(%ecx)
|
||||
movdqu %xmm4, (%edx)
|
||||
movdqu %xmm5, 16(%edx)
|
||||
aesenclast %xmm3, %xmm6
|
||||
@@ -2465,8 +2463,6 @@ L_AES_GCM_decrypt_aesni_aesenc_64_ghash_avx_done:
|
||||
movdqu 48(%ecx), %xmm1
|
||||
pxor %xmm0, %xmm6
|
||||
pxor %xmm1, %xmm7
|
||||
movdqu %xmm0, 32(%ecx)
|
||||
movdqu %xmm1, 48(%ecx)
|
||||
movdqu %xmm6, 32(%edx)
|
||||
movdqu %xmm7, 48(%edx)
|
||||
# ghash encrypted counter
|
||||
@@ -3536,6 +3532,9 @@ AES_GCM_encrypt_update_aesni:
|
||||
# First 64 bytes of input
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm0
|
||||
movdqu %xmm0, %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm7
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm0, %xmm2
|
||||
@@ -3547,9 +3546,6 @@ AES_GCM_encrypt_update_aesni:
|
||||
pshufb %xmm7, %xmm2
|
||||
paddd L_aes_gcm_three, %xmm3
|
||||
pshufb %xmm7, %xmm3
|
||||
movdqu 64(%esp), %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa (%ebp), %xmm7
|
||||
pxor %xmm7, %xmm0
|
||||
pxor %xmm7, %xmm1
|
||||
@@ -3644,6 +3640,8 @@ L_AES_GCM_encrypt_update_aesni_enc_done:
|
||||
movdqu %xmm3, 48(%edi)
|
||||
cmpl $0x40, %eax
|
||||
movl $0x40, %ebx
|
||||
movl %esi, %ecx
|
||||
movl %edi, %edx
|
||||
jle L_AES_GCM_encrypt_update_aesni_end_64
|
||||
# More 64 bytes of input
|
||||
L_AES_GCM_encrypt_update_aesni_ghash_64:
|
||||
@@ -3651,6 +3649,9 @@ L_AES_GCM_encrypt_update_aesni_ghash_64:
|
||||
leal (%edi,%ebx,1), %edx
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm0
|
||||
movdqu %xmm0, %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm7
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm0, %xmm2
|
||||
@@ -3662,9 +3663,6 @@ L_AES_GCM_encrypt_update_aesni_ghash_64:
|
||||
pshufb %xmm7, %xmm2
|
||||
paddd L_aes_gcm_three, %xmm3
|
||||
pshufb %xmm7, %xmm3
|
||||
movdqu 64(%esp), %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa (%ebp), %xmm7
|
||||
pxor %xmm7, %xmm0
|
||||
pxor %xmm7, %xmm1
|
||||
@@ -4406,6 +4404,9 @@ L_AES_GCM_decrypt_update_aesni_ghash_64_inplace:
|
||||
leal (%edi,%ebx,1), %edx
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm0
|
||||
movdqu %xmm0, %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm7
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm0, %xmm2
|
||||
@@ -4417,9 +4418,6 @@ L_AES_GCM_decrypt_update_aesni_ghash_64_inplace:
|
||||
pshufb %xmm7, %xmm2
|
||||
paddd L_aes_gcm_three, %xmm3
|
||||
pshufb %xmm7, %xmm3
|
||||
movdqu 64(%esp), %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa (%ebp), %xmm7
|
||||
pxor %xmm7, %xmm0
|
||||
pxor %xmm7, %xmm1
|
||||
@@ -4619,6 +4617,9 @@ L_AES_GCM_decrypt_update_aesni_ghash_64:
|
||||
leal (%edi,%ebx,1), %edx
|
||||
# Encrypt 64 bytes of counter
|
||||
movdqu 64(%esp), %xmm0
|
||||
movdqu %xmm0, %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa L_aes_gcm_bswap_epi64, %xmm7
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm0, %xmm2
|
||||
@@ -4630,9 +4631,6 @@ L_AES_GCM_decrypt_update_aesni_ghash_64:
|
||||
pshufb %xmm7, %xmm2
|
||||
paddd L_aes_gcm_three, %xmm3
|
||||
pshufb %xmm7, %xmm3
|
||||
movdqu 64(%esp), %xmm7
|
||||
paddd L_aes_gcm_four, %xmm7
|
||||
movdqu %xmm7, 64(%esp)
|
||||
movdqa (%ebp), %xmm7
|
||||
pxor %xmm7, %xmm0
|
||||
pxor %xmm7, %xmm1
|
||||
@@ -4715,8 +4713,6 @@ L_AES_GCM_decrypt_update_aesni_aesenc_64_ghash_avx_done:
|
||||
movdqu 16(%ecx), %xmm5
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm5, %xmm1
|
||||
movdqu %xmm4, (%ecx)
|
||||
movdqu %xmm5, 16(%ecx)
|
||||
movdqu %xmm0, (%edx)
|
||||
movdqu %xmm1, 16(%edx)
|
||||
aesenclast %xmm7, %xmm2
|
||||
@@ -4725,8 +4721,6 @@ L_AES_GCM_decrypt_update_aesni_aesenc_64_ghash_avx_done:
|
||||
movdqu 48(%ecx), %xmm5
|
||||
pxor %xmm4, %xmm2
|
||||
pxor %xmm5, %xmm3
|
||||
movdqu %xmm4, 32(%ecx)
|
||||
movdqu %xmm5, 48(%ecx)
|
||||
movdqu %xmm2, 32(%edx)
|
||||
movdqu %xmm3, 48(%edx)
|
||||
# ghash encrypted counter
|
||||
@@ -5556,6 +5550,8 @@ L_AES_GCM_encrypt_avx1_calc_aad_done:
|
||||
vmovdqu %xmm3, 48(%esp)
|
||||
# First 64 bytes of input
|
||||
vmovdqu 64(%esp), %xmm4
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm4, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5
|
||||
vpshufb %xmm3, %xmm5, %xmm5
|
||||
@@ -5564,9 +5560,6 @@ L_AES_GCM_encrypt_avx1_calc_aad_done:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7
|
||||
vpshufb %xmm3, %xmm7, %xmm7
|
||||
vpshufb %xmm3, %xmm4, %xmm4
|
||||
vmovdqu 64(%esp), %xmm3
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm3
|
||||
vpxor %xmm3, %xmm4, %xmm4
|
||||
vpxor %xmm3, %xmm5, %xmm5
|
||||
@@ -5649,8 +5642,6 @@ L_AES_GCM_encrypt_avx1_aesenc_64_enc_done:
|
||||
vmovdqu 16(%esi), %xmm1
|
||||
vpxor %xmm0, %xmm4, %xmm4
|
||||
vpxor %xmm1, %xmm5, %xmm5
|
||||
vmovdqu %xmm0, (%esi)
|
||||
vmovdqu %xmm1, 16(%esi)
|
||||
vmovdqu %xmm4, (%edi)
|
||||
vmovdqu %xmm5, 16(%edi)
|
||||
vaesenclast %xmm3, %xmm6, %xmm6
|
||||
@@ -5659,8 +5650,6 @@ L_AES_GCM_encrypt_avx1_aesenc_64_enc_done:
|
||||
vmovdqu 48(%esi), %xmm1
|
||||
vpxor %xmm0, %xmm6, %xmm6
|
||||
vpxor %xmm1, %xmm7, %xmm7
|
||||
vmovdqu %xmm0, 32(%esi)
|
||||
vmovdqu %xmm1, 48(%esi)
|
||||
vmovdqu %xmm6, 32(%edi)
|
||||
vmovdqu %xmm7, 48(%edi)
|
||||
cmpl $0x40, %eax
|
||||
@@ -5673,6 +5662,8 @@ L_AES_GCM_encrypt_avx1_ghash_64:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu 64(%esp), %xmm4
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm4, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5
|
||||
vpshufb %xmm3, %xmm5, %xmm5
|
||||
@@ -5681,9 +5672,6 @@ L_AES_GCM_encrypt_avx1_ghash_64:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7
|
||||
vpshufb %xmm3, %xmm7, %xmm7
|
||||
vpshufb %xmm3, %xmm4, %xmm4
|
||||
vmovdqu 64(%esp), %xmm3
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm3
|
||||
vpxor %xmm3, %xmm4, %xmm4
|
||||
vpxor %xmm3, %xmm5, %xmm5
|
||||
@@ -5864,7 +5852,7 @@ L_AES_GCM_encrypt_avx1_end_64:
|
||||
vmovdqu 96(%esp), %xmm2
|
||||
# Block 1
|
||||
vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4
|
||||
vmovdqa (%edx), %xmm1
|
||||
vmovdqu (%edx), %xmm1
|
||||
vpshufb %xmm4, %xmm1, %xmm1
|
||||
vmovdqu 48(%esp), %xmm3
|
||||
vpxor %xmm2, %xmm1, %xmm1
|
||||
@@ -5886,7 +5874,7 @@ L_AES_GCM_encrypt_avx1_end_64:
|
||||
vpxor %xmm5, %xmm2, %xmm2
|
||||
# Block 2
|
||||
vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4
|
||||
vmovdqa 16(%edx), %xmm1
|
||||
vmovdqu 16(%edx), %xmm1
|
||||
vpshufb %xmm4, %xmm1, %xmm1
|
||||
vmovdqu 32(%esp), %xmm3
|
||||
# ghash_gfmul_xor_avx
|
||||
@@ -5907,7 +5895,7 @@ L_AES_GCM_encrypt_avx1_end_64:
|
||||
vpxor %xmm5, %xmm2, %xmm2
|
||||
# Block 3
|
||||
vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4
|
||||
vmovdqa 32(%edx), %xmm1
|
||||
vmovdqu 32(%edx), %xmm1
|
||||
vpshufb %xmm4, %xmm1, %xmm1
|
||||
vmovdqu 16(%esp), %xmm3
|
||||
# ghash_gfmul_xor_avx
|
||||
@@ -5928,7 +5916,7 @@ L_AES_GCM_encrypt_avx1_end_64:
|
||||
vpxor %xmm5, %xmm2, %xmm2
|
||||
# Block 4
|
||||
vmovdqa L_aes_gcm_avx1_bswap_mask, %xmm4
|
||||
vmovdqa 48(%edx), %xmm1
|
||||
vmovdqu 48(%edx), %xmm1
|
||||
vpshufb %xmm4, %xmm1, %xmm1
|
||||
vmovdqu (%esp), %xmm3
|
||||
# ghash_gfmul_xor_avx
|
||||
@@ -6776,6 +6764,8 @@ L_AES_GCM_decrypt_avx1_ghash_64_inplace:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu 64(%esp), %xmm4
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm4, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5
|
||||
vpshufb %xmm3, %xmm5, %xmm5
|
||||
@@ -6784,9 +6774,6 @@ L_AES_GCM_decrypt_avx1_ghash_64_inplace:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7
|
||||
vpshufb %xmm3, %xmm7, %xmm7
|
||||
vpshufb %xmm3, %xmm4, %xmm4
|
||||
vmovdqu 64(%esp), %xmm3
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm3
|
||||
vpxor %xmm3, %xmm4, %xmm4
|
||||
vpxor %xmm3, %xmm5, %xmm5
|
||||
@@ -6972,6 +6959,8 @@ L_AES_GCM_decrypt_avx1_ghash_64:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu 64(%esp), %xmm4
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm4, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm3
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm4, %xmm5
|
||||
vpshufb %xmm3, %xmm5, %xmm5
|
||||
@@ -6980,9 +6969,6 @@ L_AES_GCM_decrypt_avx1_ghash_64:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm4, %xmm7
|
||||
vpshufb %xmm3, %xmm7, %xmm7
|
||||
vpshufb %xmm3, %xmm4, %xmm4
|
||||
vmovdqu 64(%esp), %xmm3
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm3, %xmm3
|
||||
vmovdqu %xmm3, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm3
|
||||
vpxor %xmm3, %xmm4, %xmm4
|
||||
vpxor %xmm3, %xmm5, %xmm5
|
||||
@@ -7065,8 +7051,6 @@ L_AES_GCM_decrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done:
|
||||
vmovdqu 16(%ecx), %xmm1
|
||||
vpxor %xmm0, %xmm4, %xmm4
|
||||
vpxor %xmm1, %xmm5, %xmm5
|
||||
vmovdqu %xmm0, (%ecx)
|
||||
vmovdqu %xmm1, 16(%ecx)
|
||||
vmovdqu %xmm4, (%edx)
|
||||
vmovdqu %xmm5, 16(%edx)
|
||||
vaesenclast %xmm3, %xmm6, %xmm6
|
||||
@@ -7075,8 +7059,6 @@ L_AES_GCM_decrypt_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done:
|
||||
vmovdqu 48(%ecx), %xmm1
|
||||
vpxor %xmm0, %xmm6, %xmm6
|
||||
vpxor %xmm1, %xmm7, %xmm7
|
||||
vmovdqu %xmm0, 32(%ecx)
|
||||
vmovdqu %xmm1, 48(%ecx)
|
||||
vmovdqu %xmm6, 32(%edx)
|
||||
vmovdqu %xmm7, 48(%edx)
|
||||
# ghash encrypted counter
|
||||
@@ -7181,7 +7163,6 @@ L_AES_GCM_decrypt_avx1_last_block_start:
|
||||
pshufb L_aes_gcm_avx1_bswap_mask, %xmm7
|
||||
pxor %xmm2, %xmm7
|
||||
vmovdqu 64(%esp), %xmm5
|
||||
vmovdqu %xmm7, %xmm7
|
||||
vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm5, %xmm4
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm5, %xmm5
|
||||
vmovdqu %xmm5, 64(%esp)
|
||||
@@ -7995,6 +7976,8 @@ AES_GCM_encrypt_update_avx1:
|
||||
vmovdqu %xmm7, 48(%esp)
|
||||
# First 64 bytes of input
|
||||
vmovdqu 64(%esp), %xmm0
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm0, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1
|
||||
vpshufb %xmm7, %xmm1, %xmm1
|
||||
@@ -8003,9 +7986,6 @@ AES_GCM_encrypt_update_avx1:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3
|
||||
vpshufb %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm7, %xmm0, %xmm0
|
||||
vmovdqu 64(%esp), %xmm7
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm7
|
||||
vpxor %xmm7, %xmm0, %xmm0
|
||||
vpxor %xmm7, %xmm1, %xmm1
|
||||
@@ -8088,8 +8068,6 @@ L_AES_GCM_encrypt_update_avx1_aesenc_64_enc_done:
|
||||
vmovdqu 16(%esi), %xmm5
|
||||
vpxor %xmm4, %xmm0, %xmm0
|
||||
vpxor %xmm5, %xmm1, %xmm1
|
||||
vmovdqu %xmm4, (%esi)
|
||||
vmovdqu %xmm5, 16(%esi)
|
||||
vmovdqu %xmm0, (%edi)
|
||||
vmovdqu %xmm1, 16(%edi)
|
||||
vaesenclast %xmm7, %xmm2, %xmm2
|
||||
@@ -8098,8 +8076,6 @@ L_AES_GCM_encrypt_update_avx1_aesenc_64_enc_done:
|
||||
vmovdqu 48(%esi), %xmm5
|
||||
vpxor %xmm4, %xmm2, %xmm2
|
||||
vpxor %xmm5, %xmm3, %xmm3
|
||||
vmovdqu %xmm4, 32(%esi)
|
||||
vmovdqu %xmm5, 48(%esi)
|
||||
vmovdqu %xmm2, 32(%edi)
|
||||
vmovdqu %xmm3, 48(%edi)
|
||||
cmpl $0x40, %eax
|
||||
@@ -8112,6 +8088,8 @@ L_AES_GCM_encrypt_update_avx1_ghash_64:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu 64(%esp), %xmm0
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm0, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1
|
||||
vpshufb %xmm7, %xmm1, %xmm1
|
||||
@@ -8120,9 +8098,6 @@ L_AES_GCM_encrypt_update_avx1_ghash_64:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3
|
||||
vpshufb %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm7, %xmm0, %xmm0
|
||||
vmovdqu 64(%esp), %xmm7
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm7
|
||||
vpxor %xmm7, %xmm0, %xmm0
|
||||
vpxor %xmm7, %xmm1, %xmm1
|
||||
@@ -8754,6 +8729,8 @@ L_AES_GCM_decrypt_update_avx1_ghash_64_inplace:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu 64(%esp), %xmm0
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm0, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1
|
||||
vpshufb %xmm7, %xmm1, %xmm1
|
||||
@@ -8762,9 +8739,6 @@ L_AES_GCM_decrypt_update_avx1_ghash_64_inplace:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3
|
||||
vpshufb %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm7, %xmm0, %xmm0
|
||||
vmovdqu 64(%esp), %xmm7
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm7
|
||||
vpxor %xmm7, %xmm0, %xmm0
|
||||
vpxor %xmm7, %xmm1, %xmm1
|
||||
@@ -8950,6 +8924,8 @@ L_AES_GCM_decrypt_update_avx1_ghash_64:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu 64(%esp), %xmm0
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm0, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa L_aes_gcm_avx1_bswap_epi64, %xmm7
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm0, %xmm1
|
||||
vpshufb %xmm7, %xmm1, %xmm1
|
||||
@@ -8958,9 +8934,6 @@ L_AES_GCM_decrypt_update_avx1_ghash_64:
|
||||
vpaddd L_aes_gcm_avx1_three, %xmm0, %xmm3
|
||||
vpshufb %xmm7, %xmm3, %xmm3
|
||||
vpshufb %xmm7, %xmm0, %xmm0
|
||||
vmovdqu 64(%esp), %xmm7
|
||||
vpaddd L_aes_gcm_avx1_four, %xmm7, %xmm7
|
||||
vmovdqu %xmm7, 64(%esp)
|
||||
vmovdqa (%ebp), %xmm7
|
||||
vpxor %xmm7, %xmm0, %xmm0
|
||||
vpxor %xmm7, %xmm1, %xmm1
|
||||
@@ -9043,8 +9016,6 @@ L_AES_GCM_decrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done:
|
||||
vmovdqu 16(%ecx), %xmm5
|
||||
vpxor %xmm4, %xmm0, %xmm0
|
||||
vpxor %xmm5, %xmm1, %xmm1
|
||||
vmovdqu %xmm4, (%ecx)
|
||||
vmovdqu %xmm5, 16(%ecx)
|
||||
vmovdqu %xmm0, (%edx)
|
||||
vmovdqu %xmm1, 16(%edx)
|
||||
vaesenclast %xmm7, %xmm2, %xmm2
|
||||
@@ -9053,8 +9024,6 @@ L_AES_GCM_decrypt_update_avx1_aesenc_64_ghash_avx_aesenc_64_enc_done:
|
||||
vmovdqu 48(%ecx), %xmm5
|
||||
vpxor %xmm4, %xmm2, %xmm2
|
||||
vpxor %xmm5, %xmm3, %xmm3
|
||||
vmovdqu %xmm4, 32(%ecx)
|
||||
vmovdqu %xmm5, 48(%ecx)
|
||||
vmovdqu %xmm2, 32(%edx)
|
||||
vmovdqu %xmm3, 48(%edx)
|
||||
# ghash encrypted counter
|
||||
@@ -9155,12 +9124,10 @@ L_AES_GCM_decrypt_update_avx1_done_64:
|
||||
L_AES_GCM_decrypt_update_avx1_last_block_start:
|
||||
leal (%esi,%ebx,1), %ecx
|
||||
leal (%edi,%ebx,1), %edx
|
||||
vmovdqu (%ecx), %xmm1
|
||||
vpshufb L_aes_gcm_avx1_bswap_mask, %xmm1, %xmm1
|
||||
vpxor %xmm6, %xmm1, %xmm1
|
||||
vmovdqu %xmm1, (%esp)
|
||||
vmovdqu (%ecx), %xmm3
|
||||
vpshufb L_aes_gcm_avx1_bswap_mask, %xmm3, %xmm3
|
||||
vpxor %xmm6, %xmm3, %xmm3
|
||||
vmovdqu 64(%esp), %xmm1
|
||||
vmovdqu (%esp), %xmm3
|
||||
vpshufb L_aes_gcm_avx1_bswap_epi64, %xmm1, %xmm0
|
||||
vpaddd L_aes_gcm_avx1_one, %xmm1, %xmm1
|
||||
vmovdqu %xmm1, 64(%esp)
|
||||
@@ -11036,8 +11003,6 @@ L_AES_GCM_decrypt_avx2_aesenc_64_ghash_aesenc_64_enc_done:
|
||||
vmovdqu 16(%ecx), %xmm4
|
||||
vpxor %xmm7, %xmm0, %xmm0
|
||||
vpxor %xmm4, %xmm1, %xmm1
|
||||
vmovdqu %xmm7, (%ecx)
|
||||
vmovdqu %xmm4, 16(%ecx)
|
||||
vmovdqu %xmm0, (%edx)
|
||||
vmovdqu %xmm1, 16(%edx)
|
||||
vmovdqu 32(%ecx), %xmm7
|
||||
@@ -12733,8 +12698,6 @@ L_AES_GCM_decrypt_update_avx2_aesenc_64_ghash_aesenc_64_enc_done:
|
||||
vmovdqu 16(%ecx), %xmm4
|
||||
vpxor %xmm7, %xmm0, %xmm0
|
||||
vpxor %xmm4, %xmm1, %xmm1
|
||||
vmovdqu %xmm7, (%ecx)
|
||||
vmovdqu %xmm4, 16(%ecx)
|
||||
vmovdqu %xmm0, (%edx)
|
||||
vmovdqu %xmm1, 16(%edx)
|
||||
vmovdqu 32(%ecx), %xmm7
|
||||
|
||||
@@ -504,7 +504,6 @@ _poly1305_calc_powers_avx2:
|
||||
# Reduce 260-bit to 130-bit
|
||||
movq %r15, %rax
|
||||
movq %rsi, %rdx
|
||||
movq %rbx, %rbx
|
||||
andq $-4, %rax
|
||||
andq $3, %r15
|
||||
addq %rax, %r13
|
||||
|
||||
@@ -454,7 +454,6 @@ poly1305_calc_powers_avx2 PROC
|
||||
; Reduce 260-bit to 130-bit
|
||||
mov rax, rdi
|
||||
mov rdx, rsi
|
||||
mov rbx, rbx
|
||||
and rax, -4
|
||||
and rdi, 3
|
||||
add r14, rax
|
||||
|
||||
@@ -45,6 +45,9 @@
|
||||
#undef WOLFSSL_ARMASM
|
||||
#undef WOLFSSL_RISCV_ASM
|
||||
#endif
|
||||
#ifdef WOLFSSL_X86_BUILD
|
||||
#undef USE_INTEL_SPEEDUP
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_PSOC6_CRYPTO)
|
||||
#include <wolfssl/wolfcrypt/port/cypress/psoc6_crypto.h>
|
||||
|
||||
@@ -770,7 +770,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
||||
"mull %[a] \n\t" \
|
||||
"movl %%eax, %[l] \n\t" \
|
||||
"movl %%edx, %[h] \n\t" \
|
||||
: [h] "+r" (vh), [l] "+r" (vl) \
|
||||
: [h] "+rm" (vh), [l] "+rm" (vl) \
|
||||
: [a] "rm" (va), [b] "rm" (vb) \
|
||||
: "eax", "edx", "cc" \
|
||||
)
|
||||
@@ -794,7 +794,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
||||
"addl %%eax, %[l] \n\t" \
|
||||
"adcl %%edx, %[h] \n\t" \
|
||||
"adcl $0 , %[o] \n\t" \
|
||||
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
|
||||
: [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
|
||||
: [a] "rm" (va), [b] "rm" (vb) \
|
||||
: "eax", "edx", "cc" \
|
||||
)
|
||||
@@ -820,7 +820,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
||||
"addl %%eax, %[l] \n\t" \
|
||||
"adcl %%edx, %[h] \n\t" \
|
||||
"adcl $0 , %[o] \n\t" \
|
||||
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
|
||||
: [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
|
||||
: [a] "rm" (va), [b] "rm" (vb) \
|
||||
: "eax", "edx", "cc" \
|
||||
)
|
||||
@@ -859,7 +859,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
|
||||
"addl %%eax, %[l] \n\t" \
|
||||
"adcl %%edx, %[h] \n\t" \
|
||||
"adcl $0 , %[o] \n\t" \
|
||||
: [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
|
||||
: [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
|
||||
: [a] "rm" (va) \
|
||||
: "eax", "edx", "cc" \
|
||||
)
|
||||
|
||||
@@ -7656,7 +7656,7 @@ _sp_2048_sqr_32:
|
||||
subq $0x110, %rsp
|
||||
movq %rdi, 256(%rsp)
|
||||
movq %rsi, 264(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 128(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -7820,7 +7820,7 @@ _sp_2048_sqr_32:
|
||||
movq 256(%rsp), %rsi
|
||||
leaq 128(%rsp), %r8
|
||||
addq $0x180, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -128(%r8), %rax
|
||||
subq -128(%rsi), %rax
|
||||
movq -120(%r8), %rdx
|
||||
@@ -8197,7 +8197,7 @@ _sp_2048_sqr_avx2_32:
|
||||
subq $0x110, %rsp
|
||||
movq %rdi, 256(%rsp)
|
||||
movq %rsi, 264(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 128(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -8361,7 +8361,7 @@ _sp_2048_sqr_avx2_32:
|
||||
movq 256(%rsp), %rsi
|
||||
leaq 128(%rsp), %r8
|
||||
addq $0x180, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -128(%r8), %rax
|
||||
subq -128(%rsi), %rax
|
||||
movq -120(%r8), %rdx
|
||||
@@ -9405,7 +9405,6 @@ L_2048_mont_reduce_16_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $0x80, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_2048_cond_sub_16@plt
|
||||
@@ -10017,7 +10016,6 @@ _sp_2048_mont_reduce_avx2_16:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $0x40, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_2048_mont_reduce_avx2_16_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
@@ -11482,7 +11480,6 @@ L_2048_mont_reduce_32_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $0x100, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_2048_cond_sub_32@plt
|
||||
@@ -12368,7 +12365,6 @@ _sp_2048_mont_reduce_avx2_32:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $0x80, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_2048_mont_reduce_avx2_32_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
@@ -15173,7 +15169,7 @@ sp_2048_lshift_32:
|
||||
_sp_2048_lshift_32:
|
||||
#endif /* __APPLE__ */
|
||||
movb %dl, %cl
|
||||
movq $0x00, %r10
|
||||
xorq %r10, %r10
|
||||
movq 216(%rsi), %r11
|
||||
movq 224(%rsi), %rdx
|
||||
movq 232(%rsi), %rax
|
||||
@@ -22716,7 +22712,7 @@ _sp_3072_sqr_24:
|
||||
subq $0xd0, %rsp
|
||||
movq %rdi, 192(%rsp)
|
||||
movq %rsi, 200(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 96(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -22848,7 +22844,7 @@ _sp_3072_sqr_24:
|
||||
movq 192(%rsp), %rsi
|
||||
leaq 96(%rsp), %r8
|
||||
addq $0x120, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -96(%r8), %rax
|
||||
subq -96(%rsi), %rax
|
||||
movq -88(%r8), %rdx
|
||||
@@ -23141,7 +23137,7 @@ _sp_3072_sqr_avx2_24:
|
||||
subq $0xd0, %rsp
|
||||
movq %rdi, 192(%rsp)
|
||||
movq %rsi, 200(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 96(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -23273,7 +23269,7 @@ _sp_3072_sqr_avx2_24:
|
||||
movq 192(%rsp), %rsi
|
||||
leaq 96(%rsp), %r8
|
||||
addq $0x120, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -96(%r8), %rax
|
||||
subq -96(%rsi), %rax
|
||||
movq -88(%r8), %rdx
|
||||
@@ -23566,7 +23562,7 @@ _sp_3072_sqr_48:
|
||||
subq $0x190, %rsp
|
||||
movq %rdi, 384(%rsp)
|
||||
movq %rsi, 392(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 192(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -23794,7 +23790,7 @@ _sp_3072_sqr_48:
|
||||
movq 384(%rsp), %rsi
|
||||
leaq 192(%rsp), %r8
|
||||
addq $0x240, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -192(%r8), %rax
|
||||
subq -192(%rsi), %rax
|
||||
movq -184(%r8), %rdx
|
||||
@@ -24339,7 +24335,7 @@ _sp_3072_sqr_avx2_48:
|
||||
subq $0x190, %rsp
|
||||
movq %rdi, 384(%rsp)
|
||||
movq %rsi, 392(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 192(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -24567,7 +24563,7 @@ _sp_3072_sqr_avx2_48:
|
||||
movq 384(%rsp), %rsi
|
||||
leaq 192(%rsp), %r8
|
||||
addq $0x240, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -192(%r8), %rax
|
||||
subq -192(%rsi), %rax
|
||||
movq -184(%r8), %rdx
|
||||
@@ -25973,7 +25969,6 @@ L_3072_mont_reduce_24_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $0xc0, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_3072_cond_sub_24@plt
|
||||
@@ -26801,7 +26796,6 @@ _sp_3072_mont_reduce_avx2_24:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $0x60, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_3072_mont_reduce_avx2_24_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
@@ -28885,7 +28879,6 @@ L_3072_mont_reduce_48_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $0x180, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_3072_cond_sub_48@plt
|
||||
@@ -30123,7 +30116,6 @@ _sp_3072_mont_reduce_avx2_48:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $0xc0, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_3072_mont_reduce_avx2_48_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
@@ -31900,7 +31892,7 @@ sp_3072_lshift_48:
|
||||
_sp_3072_lshift_48:
|
||||
#endif /* __APPLE__ */
|
||||
movb %dl, %cl
|
||||
movq $0x00, %r10
|
||||
xorq %r10, %r10
|
||||
movq 344(%rsi), %r11
|
||||
movq 352(%rsi), %rdx
|
||||
movq 360(%rsi), %rax
|
||||
@@ -35658,7 +35650,7 @@ _sp_4096_sqr_64:
|
||||
subq $0x210, %rsp
|
||||
movq %rdi, 512(%rsp)
|
||||
movq %rsi, 520(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 256(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -35950,7 +35942,7 @@ _sp_4096_sqr_64:
|
||||
movq 512(%rsp), %rsi
|
||||
leaq 256(%rsp), %r8
|
||||
addq $0x300, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -256(%r8), %rax
|
||||
subq -256(%rsi), %rax
|
||||
movq -248(%r8), %rdx
|
||||
@@ -36663,7 +36655,7 @@ _sp_4096_sqr_avx2_64:
|
||||
subq $0x210, %rsp
|
||||
movq %rdi, 512(%rsp)
|
||||
movq %rsi, 520(%rsp)
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq %rsp, %r8
|
||||
leaq 256(%rsi), %r9
|
||||
movq (%rsi), %rdx
|
||||
@@ -36955,7 +36947,7 @@ _sp_4096_sqr_avx2_64:
|
||||
movq 512(%rsp), %rsi
|
||||
leaq 256(%rsp), %r8
|
||||
addq $0x300, %rsi
|
||||
movq $0x00, %rcx
|
||||
xorq %rcx, %rcx
|
||||
movq -256(%r8), %rax
|
||||
subq -256(%rsi), %rax
|
||||
movq -248(%r8), %rdx
|
||||
@@ -39337,7 +39329,6 @@ L_4096_mont_reduce_64_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $0x200, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_4096_cond_sub_64@plt
|
||||
@@ -40927,7 +40918,6 @@ _sp_4096_mont_reduce_avx2_64:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $0x100, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_4096_mont_reduce_avx2_64_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
@@ -43260,7 +43250,7 @@ sp_4096_lshift_64:
|
||||
_sp_4096_lshift_64:
|
||||
#endif /* __APPLE__ */
|
||||
movb %dl, %cl
|
||||
movq $0x00, %r10
|
||||
xorq %r10, %r10
|
||||
movq 472(%rsi), %r11
|
||||
movq 480(%rsi), %rdx
|
||||
movq 488(%rsi), %rax
|
||||
@@ -44326,15 +44316,11 @@ _sp_256_mont_sqr_4:
|
||||
# A[0] * A[0]
|
||||
movq (%rsi), %rax
|
||||
mulq %rax
|
||||
movq %rax, %rax
|
||||
movq %rdx, %rdx
|
||||
movq %rax, %r8
|
||||
movq %rdx, %rbx
|
||||
# A[1] * A[1]
|
||||
movq 8(%rsi), %rax
|
||||
mulq %rax
|
||||
movq %rax, %rax
|
||||
movq %rdx, %rdx
|
||||
addq %rbx, %r9
|
||||
adcq %rax, %r10
|
||||
adcq $0x00, %rdx
|
||||
@@ -44342,8 +44328,6 @@ _sp_256_mont_sqr_4:
|
||||
# A[2] * A[2]
|
||||
movq 16(%rsi), %rax
|
||||
mulq %rax
|
||||
movq %rax, %rax
|
||||
movq %rdx, %rdx
|
||||
addq %rbx, %r11
|
||||
adcq %rax, %r12
|
||||
adcq $0x00, %rdx
|
||||
@@ -44351,8 +44335,6 @@ _sp_256_mont_sqr_4:
|
||||
# A[3] * A[3]
|
||||
movq 24(%rsi), %rax
|
||||
mulq %rax
|
||||
movq %rax, %rax
|
||||
movq %rdx, %rdx
|
||||
addq %rbx, %r13
|
||||
adcq %rax, %r14
|
||||
adcq %rdx, %r15
|
||||
@@ -48981,7 +48963,6 @@ L_384_mont_reduce_order_6_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $48, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_384_cond_sub_6@plt
|
||||
@@ -56409,7 +56390,6 @@ _sp_521_mont_reduce_order_avx2_9:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $32, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_521_mont_reduce_order_avx2_9_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
@@ -57531,7 +57511,7 @@ sp_521_lshift_9:
|
||||
_sp_521_lshift_9:
|
||||
#endif /* __APPLE__ */
|
||||
movb %dl, %cl
|
||||
movq $0x00, %r10
|
||||
xorq %r10, %r10
|
||||
movq 32(%rsi), %r11
|
||||
movq 40(%rsi), %rdx
|
||||
movq 48(%rsi), %rax
|
||||
@@ -57584,7 +57564,7 @@ sp_521_lshift_18:
|
||||
_sp_521_lshift_18:
|
||||
#endif /* __APPLE__ */
|
||||
movb %dl, %cl
|
||||
movq $0x00, %r10
|
||||
xorq %r10, %r10
|
||||
movq 104(%rsi), %r11
|
||||
movq 112(%rsi), %rdx
|
||||
movq 120(%rsi), %rax
|
||||
@@ -64747,7 +64727,6 @@ L_1024_mont_reduce_16_loop:
|
||||
movq %rsi, %rdx
|
||||
#endif /* _WIN64 */
|
||||
movq %rdi, %rsi
|
||||
movq %rdi, %rdi
|
||||
subq $0x80, %rdi
|
||||
#ifndef __APPLE__
|
||||
callq sp_1024_cond_sub_16@plt
|
||||
@@ -65797,7 +65776,6 @@ _sp_1024_mont_reduce_avx2_16:
|
||||
movq 16(%rdi), %r14
|
||||
movq 24(%rdi), %r15
|
||||
addq $0x40, %rdi
|
||||
xorq %rbp, %rbp
|
||||
L_1024_mont_reduce_avx2_16_loop:
|
||||
# mu = a[i] * mp
|
||||
movq %r12, %rdx
|
||||
|
||||
@@ -7505,7 +7505,7 @@ sp_2048_sqr_32 PROC
|
||||
sub rsp, 272
|
||||
mov QWORD PTR [rsp+256], rcx
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+128]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -7657,7 +7657,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+256]
|
||||
lea r10, QWORD PTR [rsp+128]
|
||||
add rdx, 384
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-128]
|
||||
sub r8, QWORD PTR [rdx+-128]
|
||||
mov rax, QWORD PTR [r10+-120]
|
||||
@@ -8023,7 +8023,7 @@ sp_2048_sqr_avx2_32 PROC
|
||||
sub rsp, 272
|
||||
mov QWORD PTR [rsp+256], rcx
|
||||
mov QWORD PTR [rsp+264], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+128]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -8175,7 +8175,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+256]
|
||||
lea r10, QWORD PTR [rsp+128]
|
||||
add rdx, 384
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-128]
|
||||
sub r8, QWORD PTR [rdx+-128]
|
||||
mov rax, QWORD PTR [r10+-120]
|
||||
@@ -9179,7 +9179,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 128
|
||||
call sp_2048_cond_sub_16
|
||||
pop rsi
|
||||
@@ -9736,7 +9735,6 @@ sp_2048_mont_reduce_avx2_16 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 64
|
||||
xor rbp, rbp
|
||||
L_2048_mont_reduce_avx2_16_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
@@ -11190,7 +11188,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 256
|
||||
call sp_2048_cond_sub_32
|
||||
pop rsi
|
||||
@@ -12019,7 +12016,6 @@ sp_2048_mont_reduce_avx2_32 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 128
|
||||
xor rbp, rbp
|
||||
L_2048_mont_reduce_avx2_32_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
@@ -14805,7 +14801,7 @@ sp_2048_lshift_32 PROC
|
||||
push r13
|
||||
mov rax, rcx
|
||||
mov cl, r8b
|
||||
mov r12, 0
|
||||
xor r12, r12
|
||||
mov r13, QWORD PTR [rdx+216]
|
||||
mov r8, QWORD PTR [rdx+224]
|
||||
mov r9, QWORD PTR [rdx+232]
|
||||
@@ -22145,7 +22141,7 @@ sp_3072_sqr_24 PROC
|
||||
sub rsp, 208
|
||||
mov QWORD PTR [rsp+192], rcx
|
||||
mov QWORD PTR [rsp+200], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+96]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -22265,7 +22261,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+192]
|
||||
lea r10, QWORD PTR [rsp+96]
|
||||
add rdx, 288
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-96]
|
||||
sub r8, QWORD PTR [rdx+-96]
|
||||
mov rax, QWORD PTR [r10+-88]
|
||||
@@ -22547,7 +22543,7 @@ sp_3072_sqr_avx2_24 PROC
|
||||
sub rsp, 208
|
||||
mov QWORD PTR [rsp+192], rcx
|
||||
mov QWORD PTR [rsp+200], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+96]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -22667,7 +22663,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+192]
|
||||
lea r10, QWORD PTR [rsp+96]
|
||||
add rdx, 288
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-96]
|
||||
sub r8, QWORD PTR [rdx+-96]
|
||||
mov rax, QWORD PTR [r10+-88]
|
||||
@@ -22949,7 +22945,7 @@ sp_3072_sqr_48 PROC
|
||||
sub rsp, 400
|
||||
mov QWORD PTR [rsp+384], rcx
|
||||
mov QWORD PTR [rsp+392], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+192]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -23165,7 +23161,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+384]
|
||||
lea r10, QWORD PTR [rsp+192]
|
||||
add rdx, 576
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-192]
|
||||
sub r8, QWORD PTR [rdx+-192]
|
||||
mov rax, QWORD PTR [r10+-184]
|
||||
@@ -23699,7 +23695,7 @@ sp_3072_sqr_avx2_48 PROC
|
||||
sub rsp, 400
|
||||
mov QWORD PTR [rsp+384], rcx
|
||||
mov QWORD PTR [rsp+392], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+192]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -23915,7 +23911,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+384]
|
||||
lea r10, QWORD PTR [rsp+192]
|
||||
add rdx, 576
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-192]
|
||||
sub r8, QWORD PTR [rdx+-192]
|
||||
mov rax, QWORD PTR [r10+-184]
|
||||
@@ -25292,7 +25288,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 192
|
||||
call sp_3072_cond_sub_24
|
||||
pop rsi
|
||||
@@ -26065,7 +26060,6 @@ sp_3072_mont_reduce_avx2_24 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 96
|
||||
xor rbp, rbp
|
||||
L_3072_mont_reduce_avx2_24_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
@@ -28138,7 +28132,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 384
|
||||
call sp_3072_cond_sub_48
|
||||
pop rsi
|
||||
@@ -29319,7 +29312,6 @@ sp_3072_mont_reduce_avx2_48 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 192
|
||||
xor rbp, rbp
|
||||
L_3072_mont_reduce_avx2_48_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
@@ -31077,7 +31069,7 @@ sp_3072_lshift_48 PROC
|
||||
push r13
|
||||
mov rax, rcx
|
||||
mov cl, r8b
|
||||
mov r12, 0
|
||||
xor r12, r12
|
||||
mov r13, QWORD PTR [rdx+344]
|
||||
mov r8, QWORD PTR [rdx+352]
|
||||
mov r9, QWORD PTR [rdx+360]
|
||||
@@ -34728,7 +34720,7 @@ sp_4096_sqr_64 PROC
|
||||
sub rsp, 528
|
||||
mov QWORD PTR [rsp+512], rcx
|
||||
mov QWORD PTR [rsp+520], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+256]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -35008,7 +35000,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+512]
|
||||
lea r10, QWORD PTR [rsp+256]
|
||||
add rdx, 768
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-256]
|
||||
sub r8, QWORD PTR [rdx+-256]
|
||||
mov rax, QWORD PTR [r10+-248]
|
||||
@@ -35710,7 +35702,7 @@ sp_4096_sqr_avx2_64 PROC
|
||||
sub rsp, 528
|
||||
mov QWORD PTR [rsp+512], rcx
|
||||
mov QWORD PTR [rsp+520], rdx
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r10, rsp
|
||||
lea r11, QWORD PTR [rdx+256]
|
||||
mov rax, QWORD PTR [rdx]
|
||||
@@ -35990,7 +35982,7 @@ ENDIF
|
||||
mov rdx, QWORD PTR [rsp+512]
|
||||
lea r10, QWORD PTR [rsp+256]
|
||||
add rdx, 768
|
||||
mov r9, 0
|
||||
xor r9, r9
|
||||
mov r8, QWORD PTR [r10+-256]
|
||||
sub r8, QWORD PTR [rdx+-256]
|
||||
mov rax, QWORD PTR [r10+-248]
|
||||
@@ -38343,7 +38335,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 512
|
||||
call sp_4096_cond_sub_64
|
||||
pop rsi
|
||||
@@ -39876,7 +39867,6 @@ sp_4096_mont_reduce_avx2_64 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 256
|
||||
xor rbp, rbp
|
||||
L_4096_mont_reduce_avx2_64_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
@@ -42190,7 +42180,7 @@ sp_4096_lshift_64 PROC
|
||||
push r13
|
||||
mov rax, rcx
|
||||
mov cl, r8b
|
||||
mov r12, 0
|
||||
xor r12, r12
|
||||
mov r13, QWORD PTR [rdx+472]
|
||||
mov r8, QWORD PTR [rdx+480]
|
||||
mov r9, QWORD PTR [rdx+488]
|
||||
@@ -43187,15 +43177,11 @@ sp_256_mont_sqr_4 PROC
|
||||
; A[0] * A[0]
|
||||
mov rax, QWORD PTR [r8]
|
||||
mul rax
|
||||
mov rax, rax
|
||||
mov rdx, rdx
|
||||
mov r10, rax
|
||||
mov rbx, rdx
|
||||
; A[1] * A[1]
|
||||
mov rax, QWORD PTR [r8+8]
|
||||
mul rax
|
||||
mov rax, rax
|
||||
mov rdx, rdx
|
||||
add r11, rbx
|
||||
adc r12, rax
|
||||
adc rdx, 0
|
||||
@@ -43203,8 +43189,6 @@ sp_256_mont_sqr_4 PROC
|
||||
; A[2] * A[2]
|
||||
mov rax, QWORD PTR [r8+16]
|
||||
mul rax
|
||||
mov rax, rax
|
||||
mov rdx, rdx
|
||||
add r13, rbx
|
||||
adc r14, rax
|
||||
adc rdx, 0
|
||||
@@ -43212,8 +43196,6 @@ sp_256_mont_sqr_4 PROC
|
||||
; A[3] * A[3]
|
||||
mov rax, QWORD PTR [r8+24]
|
||||
mul rax
|
||||
mov rax, rax
|
||||
mov rdx, rdx
|
||||
add r15, rbx
|
||||
adc rdi, rax
|
||||
adc rsi, rdx
|
||||
@@ -47531,7 +47513,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 48
|
||||
call sp_384_cond_sub_6
|
||||
pop rsi
|
||||
@@ -54689,7 +54670,6 @@ sp_521_mont_reduce_order_avx2_9 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 32
|
||||
xor rbp, rbp
|
||||
L_521_mont_reduce_order_avx2_9_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
@@ -55781,7 +55761,7 @@ sp_521_lshift_9 PROC
|
||||
push r13
|
||||
mov rax, rcx
|
||||
mov cl, r8b
|
||||
mov r12, 0
|
||||
xor r12, r12
|
||||
mov r13, QWORD PTR [rdx+32]
|
||||
mov r8, QWORD PTR [rdx+40]
|
||||
mov r9, QWORD PTR [rdx+48]
|
||||
@@ -55828,7 +55808,7 @@ sp_521_lshift_18 PROC
|
||||
push r13
|
||||
mov rax, rcx
|
||||
mov cl, r8b
|
||||
mov r12, 0
|
||||
xor r12, r12
|
||||
mov r13, QWORD PTR [rdx+104]
|
||||
mov r8, QWORD PTR [rdx+112]
|
||||
mov r9, QWORD PTR [rdx+120]
|
||||
@@ -62803,7 +62783,6 @@ ELSE
|
||||
mov r8, r9
|
||||
ENDIF
|
||||
mov rdx, rcx
|
||||
mov rcx, rcx
|
||||
sub rcx, 128
|
||||
call sp_1024_cond_sub_16
|
||||
pop rsi
|
||||
@@ -63804,7 +63783,6 @@ sp_1024_mont_reduce_avx2_16 PROC
|
||||
mov rdi, QWORD PTR [r9+16]
|
||||
mov rsi, QWORD PTR [r9+24]
|
||||
add r9, 64
|
||||
xor rbp, rbp
|
||||
L_1024_mont_reduce_avx2_16_loop:
|
||||
; mu = a[i] * mp
|
||||
mov rdx, r14
|
||||
|
||||
@@ -166,6 +166,10 @@
|
||||
#include <wolfcrypt/src/misc.c>
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_X86_BUILD
|
||||
#undef USE_INTEL_SPEEDUP
|
||||
#endif
|
||||
|
||||
#if defined(WOLFSSL_MLDSA_SIGN_SMALL_MEM_PRECALC) && \
|
||||
!defined(WOLFSSL_MLDSA_SIGN_SMALL_MEM)
|
||||
#define WOLFSSL_MLDSA_SIGN_SMALL_MEM
|
||||
|
||||
@@ -74,6 +74,9 @@
|
||||
#undef WOLFSSL_ARMASM
|
||||
#undef WOLFSSL_RISCV_ASM
|
||||
#endif
|
||||
#ifdef WOLFSSL_X86_BUILD
|
||||
#undef USE_INTEL_SPEEDUP
|
||||
#endif
|
||||
|
||||
#include <wolfssl/wolfcrypt/wc_mlkem.h>
|
||||
#include <wolfssl/wolfcrypt/sha3.h>
|
||||
|
||||
@@ -52,6 +52,9 @@
|
||||
#undef WOLFSSL_ARMASM
|
||||
#undef WOLFSSL_RISCV_ASM
|
||||
#endif
|
||||
#ifdef WOLFSSL_X86_BUILD
|
||||
#undef USE_INTEL_SPEEDUP
|
||||
#endif
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP)
|
||||
/* CPU information for Intel. */
|
||||
|
||||
@@ -29,7 +29,8 @@
|
||||
|
||||
#include <wolfssl/wolfcrypt/types.h>
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) && !defined(NO_CURVED25519_X64)
|
||||
#if defined(USE_INTEL_SPEEDUP) && defined(WOLFSSL_X86_64_BUILD) && \
|
||||
!defined(NO_CURVED25519_X64)
|
||||
#define CURVED25519_X64
|
||||
#elif defined(HAVE___UINT128_T) && !defined(NO_CURVED25519_128BIT)
|
||||
#define CURVED25519_128BIT
|
||||
|
||||
Reference in New Issue
Block a user