forked from wolfSSL/wolfssl
Merge pull request #3877 from SparkiDev/sakke_eccsi_fixup
This commit is contained in:
@@ -39668,6 +39668,13 @@ _sp_256_mont_add_4:
|
|||||||
andq %rsi, %r11
|
andq %rsi, %r11
|
||||||
subq %rsi, %rax
|
subq %rsi, %rax
|
||||||
sbbq %r10, %rcx
|
sbbq %r10, %rcx
|
||||||
|
sbbq $0x00, %r8
|
||||||
|
sbbq %r11, %r9
|
||||||
|
adcq $0x00, %rsi
|
||||||
|
andq %rsi, %r10
|
||||||
|
andq %rsi, %r11
|
||||||
|
subq %rsi, %rax
|
||||||
|
sbbq %r10, %rcx
|
||||||
movq %rax, (%rdi)
|
movq %rax, (%rdi)
|
||||||
sbbq $0x00, %r8
|
sbbq $0x00, %r8
|
||||||
movq %rcx, 8(%rdi)
|
movq %rcx, 8(%rdi)
|
||||||
@@ -39712,6 +39719,13 @@ _sp_256_mont_dbl_4:
|
|||||||
andq %r11, %r10
|
andq %r11, %r10
|
||||||
subq %r11, %rdx
|
subq %r11, %rdx
|
||||||
sbbq %r9, %rax
|
sbbq %r9, %rax
|
||||||
|
sbbq $0x00, %rcx
|
||||||
|
sbbq %r10, %r8
|
||||||
|
adcq $0x00, %r11
|
||||||
|
andq %r11, %r9
|
||||||
|
andq %r11, %r10
|
||||||
|
subq %r11, %rdx
|
||||||
|
sbbq %r9, %rax
|
||||||
movq %rdx, (%rdi)
|
movq %rdx, (%rdi)
|
||||||
sbbq $0x00, %rcx
|
sbbq $0x00, %rcx
|
||||||
movq %rax, 8(%rdi)
|
movq %rax, 8(%rdi)
|
||||||
@@ -39758,6 +39772,13 @@ _sp_256_mont_tpl_4:
|
|||||||
sbbq %r9, %rax
|
sbbq %r9, %rax
|
||||||
sbbq $0x00, %rcx
|
sbbq $0x00, %rcx
|
||||||
sbbq %r10, %r8
|
sbbq %r10, %r8
|
||||||
|
adcq $0x00, %r11
|
||||||
|
andq %r11, %r9
|
||||||
|
andq %r11, %r10
|
||||||
|
subq %r11, %rdx
|
||||||
|
sbbq %r9, %rax
|
||||||
|
sbbq $0x00, %rcx
|
||||||
|
sbbq %r10, %r8
|
||||||
movq $0xffffffff, %r9
|
movq $0xffffffff, %r9
|
||||||
movq $0xffffffff00000001, %r10
|
movq $0xffffffff00000001, %r10
|
||||||
addq (%rsi), %rdx
|
addq (%rsi), %rdx
|
||||||
@@ -39770,6 +39791,13 @@ _sp_256_mont_tpl_4:
|
|||||||
andq %r11, %r10
|
andq %r11, %r10
|
||||||
subq %r11, %rdx
|
subq %r11, %rdx
|
||||||
sbbq %r9, %rax
|
sbbq %r9, %rax
|
||||||
|
sbbq $0x00, %rcx
|
||||||
|
sbbq %r10, %r8
|
||||||
|
adcq $0x00, %r11
|
||||||
|
andq %r11, %r9
|
||||||
|
andq %r11, %r10
|
||||||
|
subq %r11, %rdx
|
||||||
|
sbbq %r9, %rax
|
||||||
movq %rdx, (%rdi)
|
movq %rdx, (%rdi)
|
||||||
sbbq $0x00, %rcx
|
sbbq $0x00, %rcx
|
||||||
movq %rax, 8(%rdi)
|
movq %rax, 8(%rdi)
|
||||||
@@ -39815,6 +39843,13 @@ _sp_256_mont_sub_4:
|
|||||||
andq %rsi, %r11
|
andq %rsi, %r11
|
||||||
addq %rsi, %rax
|
addq %rsi, %rax
|
||||||
adcq %r10, %rcx
|
adcq %r10, %rcx
|
||||||
|
adcq $0x00, %r8
|
||||||
|
adcq %r11, %r9
|
||||||
|
adcq $0x00, %rsi
|
||||||
|
andq %rsi, %r10
|
||||||
|
andq %rsi, %r11
|
||||||
|
addq %rsi, %rax
|
||||||
|
adcq %r10, %rcx
|
||||||
movq %rax, (%rdi)
|
movq %rax, (%rdi)
|
||||||
adcq $0x00, %r8
|
adcq $0x00, %r8
|
||||||
movq %rcx, 8(%rdi)
|
movq %rcx, 8(%rdi)
|
||||||
@@ -42251,6 +42286,13 @@ L_256_mod_inv_avx2_4_usubv_sub_shr1:
|
|||||||
vpand %ymm14, %ymm1, %ymm1
|
vpand %ymm14, %ymm1, %ymm1
|
||||||
vpaddd %ymm5, %ymm0, %ymm0
|
vpaddd %ymm5, %ymm0, %ymm0
|
||||||
vpaddd %ymm4, %ymm1, %ymm1
|
vpaddd %ymm4, %ymm1, %ymm1
|
||||||
|
vpsrad $26, %ymm1, %ymm5
|
||||||
|
vpsrad $26, %ymm0, %ymm4
|
||||||
|
vpermd %ymm5, %ymm13, %ymm5
|
||||||
|
vpand %ymm14, %ymm0, %ymm0
|
||||||
|
vpand %ymm14, %ymm1, %ymm1
|
||||||
|
vpaddd %ymm5, %ymm0, %ymm0
|
||||||
|
vpaddd %ymm4, %ymm1, %ymm1
|
||||||
vpextrd $0x00, %xmm0, %eax
|
vpextrd $0x00, %xmm0, %eax
|
||||||
vpextrd $0x01, %xmm0, %r8d
|
vpextrd $0x01, %xmm0, %r8d
|
||||||
vpextrd $2, %xmm0, %r10d
|
vpextrd $2, %xmm0, %r10d
|
||||||
@@ -42311,6 +42353,13 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1:
|
|||||||
vpand %ymm14, %ymm3, %ymm3
|
vpand %ymm14, %ymm3, %ymm3
|
||||||
vpaddd %ymm5, %ymm2, %ymm2
|
vpaddd %ymm5, %ymm2, %ymm2
|
||||||
vpaddd %ymm4, %ymm3, %ymm3
|
vpaddd %ymm4, %ymm3, %ymm3
|
||||||
|
vpsrad $26, %ymm3, %ymm5
|
||||||
|
vpsrad $26, %ymm2, %ymm4
|
||||||
|
vpermd %ymm5, %ymm13, %ymm5
|
||||||
|
vpand %ymm14, %ymm2, %ymm2
|
||||||
|
vpand %ymm14, %ymm3, %ymm3
|
||||||
|
vpaddd %ymm5, %ymm2, %ymm2
|
||||||
|
vpaddd %ymm4, %ymm3, %ymm3
|
||||||
vpextrd $0x00, %xmm2, %eax
|
vpextrd $0x00, %xmm2, %eax
|
||||||
vpextrd $0x01, %xmm2, %r8d
|
vpextrd $0x01, %xmm2, %r8d
|
||||||
vpextrd $2, %xmm2, %r10d
|
vpextrd $2, %xmm2, %r10d
|
||||||
@@ -42324,14 +42373,19 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1:
|
|||||||
vpextrd $0x00, %xmm2, %r14d
|
vpextrd $0x00, %xmm2, %r14d
|
||||||
vpextrd $0x00, %xmm3, %r15d
|
vpextrd $0x00, %xmm3, %r15d
|
||||||
L_256_mod_inv_avx2_4_store_done:
|
L_256_mod_inv_avx2_4_store_done:
|
||||||
|
movslq %eax, %rax
|
||||||
shlq $26, %rcx
|
shlq $26, %rcx
|
||||||
addq %rcx, %rax
|
addq %rcx, %rax
|
||||||
|
movslq %r8d, %r8
|
||||||
shlq $26, %r9
|
shlq $26, %r9
|
||||||
addq %r9, %r8
|
addq %r9, %r8
|
||||||
|
movslq %r10d, %r10
|
||||||
shlq $26, %r11
|
shlq $26, %r11
|
||||||
addq %r11, %r10
|
addq %r11, %r10
|
||||||
|
movslq %r12d, %r12
|
||||||
shlq $26, %r13
|
shlq $26, %r13
|
||||||
addq %r13, %r12
|
addq %r13, %r12
|
||||||
|
movslq %r14d, %r14
|
||||||
shlq $26, %r15
|
shlq $26, %r15
|
||||||
addq %r15, %r14
|
addq %r15, %r14
|
||||||
movq %r8, %rcx
|
movq %r8, %rcx
|
||||||
@@ -44817,12 +44871,12 @@ L_384_get_entry_65_6_start:
|
|||||||
movdqa %xmm14, %xmm12
|
movdqa %xmm14, %xmm12
|
||||||
paddd %xmm15, %xmm14
|
paddd %xmm15, %xmm14
|
||||||
pcmpeqd %xmm13, %xmm12
|
pcmpeqd %xmm13, %xmm12
|
||||||
movdqa (%rsi), %xmm6
|
movdqu (%rsi), %xmm6
|
||||||
movdqa 16(%rsi), %xmm7
|
movdqu 16(%rsi), %xmm7
|
||||||
movdqa 32(%rsi), %xmm8
|
movdqu 32(%rsi), %xmm8
|
||||||
movdqa 48(%rsi), %xmm9
|
movdqu 48(%rsi), %xmm9
|
||||||
movdqa 64(%rsi), %xmm10
|
movdqu 64(%rsi), %xmm10
|
||||||
movdqa 80(%rsi), %xmm11
|
movdqu 80(%rsi), %xmm11
|
||||||
addq $0x60, %rsi
|
addq $0x60, %rsi
|
||||||
pand %xmm12, %xmm6
|
pand %xmm12, %xmm6
|
||||||
pand %xmm12, %xmm7
|
pand %xmm12, %xmm7
|
||||||
|
@@ -38455,6 +38455,13 @@ sp_256_mont_add_4 PROC
|
|||||||
and r13, rdx
|
and r13, rdx
|
||||||
sub rax, rdx
|
sub rax, rdx
|
||||||
sbb r9, r12
|
sbb r9, r12
|
||||||
|
sbb r10, 0
|
||||||
|
sbb r11, r13
|
||||||
|
adc rdx, 0
|
||||||
|
and r12, rdx
|
||||||
|
and r13, rdx
|
||||||
|
sub rax, rdx
|
||||||
|
sbb r9, r12
|
||||||
mov QWORD PTR [rcx], rax
|
mov QWORD PTR [rcx], rax
|
||||||
sbb r10, 0
|
sbb r10, 0
|
||||||
mov QWORD PTR [rcx+8], r9
|
mov QWORD PTR [rcx+8], r9
|
||||||
@@ -38492,6 +38499,13 @@ sp_256_mont_dbl_4 PROC
|
|||||||
and r12, r13
|
and r12, r13
|
||||||
sub rax, r13
|
sub rax, r13
|
||||||
sbb r8, r11
|
sbb r8, r11
|
||||||
|
sbb r9, 0
|
||||||
|
sbb r10, r12
|
||||||
|
adc r13, 0
|
||||||
|
and r11, r13
|
||||||
|
and r12, r13
|
||||||
|
sub rax, r13
|
||||||
|
sbb r8, r11
|
||||||
mov QWORD PTR [rcx], rax
|
mov QWORD PTR [rcx], rax
|
||||||
sbb r9, 0
|
sbb r9, 0
|
||||||
mov QWORD PTR [rcx+8], r8
|
mov QWORD PTR [rcx+8], r8
|
||||||
@@ -38531,6 +38545,13 @@ sp_256_mont_tpl_4 PROC
|
|||||||
sbb r8, r11
|
sbb r8, r11
|
||||||
sbb r9, 0
|
sbb r9, 0
|
||||||
sbb r10, r12
|
sbb r10, r12
|
||||||
|
adc r13, 0
|
||||||
|
and r11, r13
|
||||||
|
and r12, r13
|
||||||
|
sub rax, r13
|
||||||
|
sbb r8, r11
|
||||||
|
sbb r9, 0
|
||||||
|
sbb r10, r12
|
||||||
mov r11, 4294967295
|
mov r11, 4294967295
|
||||||
mov r12, 18446744069414584321
|
mov r12, 18446744069414584321
|
||||||
add rax, QWORD PTR [rdx]
|
add rax, QWORD PTR [rdx]
|
||||||
@@ -38543,6 +38564,13 @@ sp_256_mont_tpl_4 PROC
|
|||||||
and r12, r13
|
and r12, r13
|
||||||
sub rax, r13
|
sub rax, r13
|
||||||
sbb r8, r11
|
sbb r8, r11
|
||||||
|
sbb r9, 0
|
||||||
|
sbb r10, r12
|
||||||
|
adc r13, 0
|
||||||
|
and r11, r13
|
||||||
|
and r12, r13
|
||||||
|
sub rax, r13
|
||||||
|
sbb r8, r11
|
||||||
mov QWORD PTR [rcx], rax
|
mov QWORD PTR [rcx], rax
|
||||||
sbb r9, 0
|
sbb r9, 0
|
||||||
mov QWORD PTR [rcx+8], r8
|
mov QWORD PTR [rcx+8], r8
|
||||||
@@ -38581,6 +38609,13 @@ sp_256_mont_sub_4 PROC
|
|||||||
and r13, rdx
|
and r13, rdx
|
||||||
add rax, rdx
|
add rax, rdx
|
||||||
adc r9, r12
|
adc r9, r12
|
||||||
|
adc r10, 0
|
||||||
|
adc r11, r13
|
||||||
|
adc rdx, 0
|
||||||
|
and r12, rdx
|
||||||
|
and r13, rdx
|
||||||
|
add rax, rdx
|
||||||
|
adc r9, r12
|
||||||
mov QWORD PTR [rcx], rax
|
mov QWORD PTR [rcx], rax
|
||||||
adc r10, 0
|
adc r10, 0
|
||||||
mov QWORD PTR [rcx+8], r9
|
mov QWORD PTR [rcx+8], r9
|
||||||
@@ -39345,10 +39380,10 @@ L_256_get_entry_64_4_start:
|
|||||||
movdqa xmm8, xmm10
|
movdqa xmm8, xmm10
|
||||||
paddd xmm10, xmm11
|
paddd xmm10, xmm11
|
||||||
pcmpeqd xmm8, xmm9
|
pcmpeqd xmm8, xmm9
|
||||||
movdqa xmm4, [rdx]
|
movdqu xmm4, [rdx]
|
||||||
movdqa xmm5, [rdx+16]
|
movdqu xmm5, [rdx+16]
|
||||||
movdqa xmm6, [rdx+32]
|
movdqu xmm6, [rdx+32]
|
||||||
movdqa xmm7, [rdx+48]
|
movdqu xmm7, [rdx+48]
|
||||||
add rdx, 64
|
add rdx, 64
|
||||||
pand xmm4, xmm8
|
pand xmm4, xmm8
|
||||||
pand xmm5, xmm8
|
pand xmm5, xmm8
|
||||||
@@ -39432,10 +39467,10 @@ L_256_get_entry_65_4_start:
|
|||||||
movdqa xmm8, xmm10
|
movdqa xmm8, xmm10
|
||||||
paddd xmm10, xmm11
|
paddd xmm10, xmm11
|
||||||
pcmpeqd xmm8, xmm9
|
pcmpeqd xmm8, xmm9
|
||||||
movdqa xmm4, [rdx]
|
movdqu xmm4, [rdx]
|
||||||
movdqa xmm5, [rdx+16]
|
movdqu xmm5, [rdx+16]
|
||||||
movdqa xmm6, [rdx+32]
|
movdqu xmm6, [rdx+32]
|
||||||
movdqa xmm7, [rdx+48]
|
movdqu xmm7, [rdx+48]
|
||||||
add rdx, 64
|
add rdx, 64
|
||||||
pand xmm4, xmm8
|
pand xmm4, xmm8
|
||||||
pand xmm5, xmm8
|
pand xmm5, xmm8
|
||||||
@@ -40774,6 +40809,13 @@ L_256_mod_inv_avx2_4_usubv_sub_shr1:
|
|||||||
vpand ymm1, ymm1, ymm14
|
vpand ymm1, ymm1, ymm14
|
||||||
vpaddd ymm0, ymm0, ymm5
|
vpaddd ymm0, ymm0, ymm5
|
||||||
vpaddd ymm1, ymm1, ymm4
|
vpaddd ymm1, ymm1, ymm4
|
||||||
|
vpsrad ymm5, ymm1, 26
|
||||||
|
vpsrad ymm4, ymm0, 26
|
||||||
|
vpermd ymm5, ymm13, ymm5
|
||||||
|
vpand ymm0, ymm0, ymm14
|
||||||
|
vpand ymm1, ymm1, ymm14
|
||||||
|
vpaddd ymm0, ymm0, ymm5
|
||||||
|
vpaddd ymm1, ymm1, ymm4
|
||||||
vpextrd eax, xmm0, 0
|
vpextrd eax, xmm0, 0
|
||||||
vpextrd r10d, xmm0, 1
|
vpextrd r10d, xmm0, 1
|
||||||
vpextrd r12d, xmm0, 2
|
vpextrd r12d, xmm0, 2
|
||||||
@@ -40834,6 +40876,13 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1:
|
|||||||
vpand ymm3, ymm3, ymm14
|
vpand ymm3, ymm3, ymm14
|
||||||
vpaddd ymm2, ymm2, ymm5
|
vpaddd ymm2, ymm2, ymm5
|
||||||
vpaddd ymm3, ymm3, ymm4
|
vpaddd ymm3, ymm3, ymm4
|
||||||
|
vpsrad ymm5, ymm3, 26
|
||||||
|
vpsrad ymm4, ymm2, 26
|
||||||
|
vpermd ymm5, ymm13, ymm5
|
||||||
|
vpand ymm2, ymm2, ymm14
|
||||||
|
vpand ymm3, ymm3, ymm14
|
||||||
|
vpaddd ymm2, ymm2, ymm5
|
||||||
|
vpaddd ymm3, ymm3, ymm4
|
||||||
vpextrd eax, xmm2, 0
|
vpextrd eax, xmm2, 0
|
||||||
vpextrd r10d, xmm2, 1
|
vpextrd r10d, xmm2, 1
|
||||||
vpextrd r12d, xmm2, 2
|
vpextrd r12d, xmm2, 2
|
||||||
@@ -40847,14 +40896,19 @@ L_256_mod_inv_avx2_4_vsubu_sub_shr1:
|
|||||||
vpextrd edi, xmm2, 0
|
vpextrd edi, xmm2, 0
|
||||||
vpextrd esi, xmm3, 0
|
vpextrd esi, xmm3, 0
|
||||||
L_256_mod_inv_avx2_4_store_done:
|
L_256_mod_inv_avx2_4_store_done:
|
||||||
|
movslq rax, eax
|
||||||
shl r9, 26
|
shl r9, 26
|
||||||
add rax, r9
|
add rax, r9
|
||||||
|
movslq r10, r10d
|
||||||
shl r11, 26
|
shl r11, 26
|
||||||
add r10, r11
|
add r10, r11
|
||||||
|
movslq r12, r12d
|
||||||
shl r13, 26
|
shl r13, 26
|
||||||
add r12, r13
|
add r12, r13
|
||||||
|
movslq r14, r14d
|
||||||
shl r15, 26
|
shl r15, 26
|
||||||
add r14, r15
|
add r14, r15
|
||||||
|
movslq rdi, edi
|
||||||
shl rsi, 26
|
shl rsi, 26
|
||||||
add rdi, rsi
|
add rdi, rsi
|
||||||
mov r9, r10
|
mov r9, r10
|
||||||
@@ -43037,12 +43091,12 @@ L_384_get_entry_64_6_start:
|
|||||||
movdqa xmm12, xmm14
|
movdqa xmm12, xmm14
|
||||||
paddd xmm14, xmm15
|
paddd xmm14, xmm15
|
||||||
pcmpeqd xmm12, xmm13
|
pcmpeqd xmm12, xmm13
|
||||||
movdqa xmm6, [rdx]
|
movdqu xmm6, [rdx]
|
||||||
movdqa xmm7, [rdx+16]
|
movdqu xmm7, [rdx+16]
|
||||||
movdqa xmm8, [rdx+32]
|
movdqu xmm8, [rdx+32]
|
||||||
movdqa xmm9, [rdx+48]
|
movdqu xmm9, [rdx+48]
|
||||||
movdqa xmm10, [rdx+64]
|
movdqu xmm10, [rdx+64]
|
||||||
movdqa xmm11, [rdx+80]
|
movdqu xmm11, [rdx+80]
|
||||||
add rdx, 96
|
add rdx, 96
|
||||||
pand xmm6, xmm12
|
pand xmm6, xmm12
|
||||||
pand xmm7, xmm12
|
pand xmm7, xmm12
|
||||||
@@ -43144,12 +43198,12 @@ L_384_get_entry_65_6_start:
|
|||||||
movdqa xmm12, xmm14
|
movdqa xmm12, xmm14
|
||||||
paddd xmm14, xmm15
|
paddd xmm14, xmm15
|
||||||
pcmpeqd xmm12, xmm13
|
pcmpeqd xmm12, xmm13
|
||||||
movdqa xmm6, [rdx]
|
movdqu xmm6, [rdx]
|
||||||
movdqa xmm7, [rdx+16]
|
movdqu xmm7, [rdx+16]
|
||||||
movdqa xmm8, [rdx+32]
|
movdqu xmm8, [rdx+32]
|
||||||
movdqa xmm9, [rdx+48]
|
movdqu xmm9, [rdx+48]
|
||||||
movdqa xmm10, [rdx+64]
|
movdqu xmm10, [rdx+64]
|
||||||
movdqa xmm11, [rdx+80]
|
movdqu xmm11, [rdx+80]
|
||||||
add rdx, 96
|
add rdx, 96
|
||||||
pand xmm6, xmm12
|
pand xmm6, xmm12
|
||||||
pand xmm7, xmm12
|
pand xmm7, xmm12
|
||||||
|
Reference in New Issue
Block a user