Merge pull request #3850 from SparkiDev/sp_x64_asm_fixes

SP ECC: Fix P-256 modinv for AVX2
This commit is contained in:
toddouska
2021-03-10 14:57:17 -08:00
committed by GitHub
2 changed files with 56 additions and 4 deletions

View File

@ -5261,8 +5261,7 @@ int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng,
}
#endif
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
defined(WOLFSSL_HAVE_SP_ECC)
#if defined(WOLFSSL_HAVE_SP_ECC)
if (key->idx != ECC_CUSTOM_IDX
#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
&& key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC
@ -6537,8 +6536,7 @@ int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash,
}
#endif
#if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
defined(WOLFSSL_HAVE_SP_ECC)
#if defined(WOLFSSL_HAVE_SP_ECC)
if (key->idx != ECC_CUSTOM_IDX
#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_ECC)
&& key->asyncDev.marker != WOLFSSL_ASYNC_MARKER_ECC

View File

@ -39021,6 +39021,13 @@ _sp_256_mont_add_4:
andq %rsi, %r11
subq %rsi, %rax
sbbq %r10, %rcx
sbbq $0x00, %r8
sbbq %r11, %r9
adcq $0x00, %rsi
andq %rsi, %r10
andq %rsi, %r11
subq %rsi, %rax
sbbq %r10, %rcx
movq %rax, (%rdi)
sbbq $0x00, %r8
movq %rcx, 8(%rdi)
@ -39065,6 +39072,13 @@ _sp_256_mont_dbl_4:
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
adcq $0x00, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
movq %rdx, (%rdi)
sbbq $0x00, %rcx
movq %rax, 8(%rdi)
@ -39111,6 +39125,13 @@ _sp_256_mont_tpl_4:
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
adcq $0x00, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
movq $0xffffffff, %r9
movq $0xffffffff00000001, %r10
addq (%rsi), %rdx
@ -39123,6 +39144,13 @@ _sp_256_mont_tpl_4:
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
adcq $0x00, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
movq %rdx, (%rdi)
sbbq $0x00, %rcx
movq %rax, 8(%rdi)
@ -39168,6 +39196,13 @@ _sp_256_mont_sub_4:
andq %rsi, %r11
addq %rsi, %rax
adcq %r10, %rcx
adcq $0x00, %r8
adcq %r11, %r9
adcq $0x00, %rsi
andq %rsi, %r10
andq %rsi, %r11
addq %rsi, %rax
adcq %r10, %rcx
movq %rax, (%rdi)
adcq $0x00, %r8
movq %rcx, 8(%rdi)
@ -41630,6 +41665,13 @@ L_256_mod_inv_avx2_4_usubv_sub_shr1:
vpand %ymm14, %ymm1, %ymm1
vpaddd %ymm5, %ymm0, %ymm0
vpaddd %ymm4, %ymm1, %ymm1
vpsrad $26, %ymm1, %ymm5
vpsrad $26, %ymm0, %ymm4
vpermd %ymm5, %ymm13, %ymm5
vpand %ymm14, %ymm0, %ymm0
vpand %ymm14, %ymm1, %ymm1
vpaddd %ymm5, %ymm0, %ymm0
vpaddd %ymm4, %ymm1, %ymm1
vpextrd $0x00, %xmm0, %eax
vpextrd $0x01, %xmm0, %r8d
vpextrd $2, %xmm0, %r10d
@ -41690,6 +41732,13 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1:
vpand %ymm14, %ymm3, %ymm3
vpaddd %ymm5, %ymm2, %ymm2
vpaddd %ymm4, %ymm3, %ymm3
vpsrad $26, %ymm3, %ymm5
vpsrad $26, %ymm2, %ymm4
vpermd %ymm5, %ymm13, %ymm5
vpand %ymm14, %ymm2, %ymm2
vpand %ymm14, %ymm3, %ymm3
vpaddd %ymm5, %ymm2, %ymm2
vpaddd %ymm4, %ymm3, %ymm3
vpextrd $0x00, %xmm2, %eax
vpextrd $0x01, %xmm2, %r8d
vpextrd $2, %xmm2, %r10d
@ -41703,14 +41752,19 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1:
vpextrd $0x00, %xmm2, %r14d
vpextrd $0x00, %xmm3, %r15d
L_256_mod_inv_avx2_4_store_done:
movslq %eax, %rax
shlq $26, %rcx
addq %rcx, %rax
movslq %r8d, %r8
shlq $26, %r9
addq %r9, %r8
movslq %r10d, %r10
shlq $26, %r11
addq %r11, %r10
movslq %r12d, %r12
shlq $26, %r13
addq %r13, %r12
movslq %r14d, %r14
shlq $26, %r15
addq %r15, %r14
movq %r8, %rcx