Files
wolfssl/wolfcrypt/src/sp_x86_64_asm.asm
Sean Parkinson dbb03cb5a3 SP RSA verify only: fix to compile
Configurations:
./configure --disable-asn --disable-filesystem --enable-cryptonly
--disable-dh --disable-sha224 --disable-ecc CFLAGS=-DWOLFSSL_PUBLIC_MP
--enable-rsavfy --enable-sp=small2048 --enable-sp-math

./configure --disable-asn --disable-filesystem --enable-cryptonly
--disable-dh --disable-sha224 --disable-ecc CFLAGS=-DWOLFSSL_PUBLIC_MP
--enable-rsavfy --enable-sp=2048 --enable-sp-math

./configure --disable-asn --disable-filesystem --enable-cryptonly
--disable-dh --disable-sha224 --disable-ecc CFLAGS=-DWOLFSSL_PUBLIC_MP
--enable-rsavfy --enable-sp=small2048 --enable-sp-math-all

./configure --disable-asn --disable-filesystem --enable-cryptonly
--disable-dh --disable-sha224 --disable-ecc CFLAGS=-DWOLFSSL_PUBLIC_MP
--enable-rsavfy --enable-sp=small2048 --enable-sp-math --enable-sp-asm

./configure --disable-asn --disable-filesystem --enable-cryptonly
--disable-dh --disable-sha224 --disable-ecc CFLAGS=-DWOLFSSL_PUBLIC_MP
--enable-rsavfy --enable-sp=2048 --enable-sp-math --enable-sp-asm
2021-08-20 13:16:58 +10:00

52720 lines
1.5 MiB

; /* sp_x86_64_asm
; *
; * Copyright (C) 2006-2021 wolfSSL Inc.
; *
; * This file is part of wolfSSL.
; *
; * wolfSSL is free software; you can redistribute it and/or modify
; * it under the terms of the GNU General Public License as published by
; * the Free Software Foundation; either version 2 of the License, or
; * (at your option) any later version.
; *
; * wolfSSL is distributed in the hope that it will be useful,
; * but WITHOUT ANY WARRANTY; without even the implied warranty of
; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; * GNU General Public License for more details.
; *
; * You should have received a copy of the GNU General Public License
; * along with this program; if not, write to the Free Software
; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
; */
IF @Version LT 1200
; AVX2 instructions not recognized by old versions of MASM
IFNDEF NO_AVX2_SUPPORT
NO_AVX2_SUPPORT = 1
ENDIF
; MOVBE instruction not recognized by old versions of MASM
IFNDEF NO_MOVBE_SUPPORT
NO_MOVBE_SUPPORT = 1
ENDIF
ENDIF
IFNDEF HAVE_INTEL_AVX1
HAVE_INTEL_AVX1 = 1
ENDIF
IFNDEF NO_AVX2_SUPPORT
HAVE_INTEL_AVX2 = 1
ENDIF
IFNDEF _WIN64
_WIN64 = 1
ENDIF
IFNDEF WOLFSSL_SP_NO_2048
IFNDEF WOLFSSL_SP_NO_2048
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_2048_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 256
xor r13, r13
jmp L_2048_from_bin_bswap_64_end
L_2048_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_2048_from_bin_bswap_64_end:
cmp r9, 63
jg L_2048_from_bin_bswap_64_start
jmp L_2048_from_bin_bswap_8_end
L_2048_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_2048_from_bin_bswap_8_end:
cmp r9, 7
jg L_2048_from_bin_bswap_8_start
cmp r9, r13
je L_2048_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_2048_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_2048_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_2048_from_bin_bswap_hi_end:
cmp rcx, r12
je L_2048_from_bin_bswap_zero_end
L_2048_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_2048_from_bin_bswap_zero_start
L_2048_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_2048_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_2048_from_bin_movbe PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 256
xor r13, r13
jmp L_2048_from_bin_movbe_64_end
L_2048_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_2048_from_bin_movbe_64_end:
cmp r9, 63
jg L_2048_from_bin_movbe_64_start
jmp L_2048_from_bin_movbe_8_end
L_2048_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_2048_from_bin_movbe_8_end:
cmp r9, 7
jg L_2048_from_bin_movbe_8_start
cmp r9, r13
je L_2048_from_bin_movbe_hi_end
mov r10, r13
mov rax, r13
L_2048_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_2048_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_2048_from_bin_movbe_hi_end:
cmp rcx, r12
je L_2048_from_bin_movbe_zero_end
L_2048_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_2048_from_bin_movbe_zero_start
L_2048_from_bin_movbe_zero_end:
pop r13
pop r12
ret
sp_2048_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 256
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_2048_to_bin_bswap_32 PROC
mov rax, QWORD PTR [rcx+248]
mov r8, QWORD PTR [rcx+240]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+232]
mov r8, QWORD PTR [rcx+224]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+216]
mov r8, QWORD PTR [rcx+208]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
mov rax, QWORD PTR [rcx+200]
mov r8, QWORD PTR [rcx+192]
bswap rax
bswap r8
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
mov rax, QWORD PTR [rcx+184]
mov r8, QWORD PTR [rcx+176]
bswap rax
bswap r8
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
mov rax, QWORD PTR [rcx+168]
mov r8, QWORD PTR [rcx+160]
bswap rax
bswap r8
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
mov rax, QWORD PTR [rcx+152]
mov r8, QWORD PTR [rcx+144]
bswap rax
bswap r8
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
mov rax, QWORD PTR [rcx+136]
mov r8, QWORD PTR [rcx+128]
bswap rax
bswap r8
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
mov rax, QWORD PTR [rcx+120]
mov r8, QWORD PTR [rcx+112]
bswap rax
bswap r8
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
mov rax, QWORD PTR [rcx+104]
mov r8, QWORD PTR [rcx+96]
bswap rax
bswap r8
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
mov rax, QWORD PTR [rcx+88]
mov r8, QWORD PTR [rcx+80]
bswap rax
bswap r8
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+64]
bswap rax
bswap r8
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
ret
sp_2048_to_bin_bswap_32 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 256
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_2048_to_bin_movbe_32 PROC
movbe rax, QWORD PTR [rcx+248]
movbe r8, QWORD PTR [rcx+240]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+232]
movbe r8, QWORD PTR [rcx+224]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+216]
movbe r8, QWORD PTR [rcx+208]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
movbe rax, QWORD PTR [rcx+200]
movbe r8, QWORD PTR [rcx+192]
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
movbe rax, QWORD PTR [rcx+184]
movbe r8, QWORD PTR [rcx+176]
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
movbe rax, QWORD PTR [rcx+168]
movbe r8, QWORD PTR [rcx+160]
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
movbe rax, QWORD PTR [rcx+152]
movbe r8, QWORD PTR [rcx+144]
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
movbe rax, QWORD PTR [rcx+136]
movbe r8, QWORD PTR [rcx+128]
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
movbe rax, QWORD PTR [rcx+120]
movbe r8, QWORD PTR [rcx+112]
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
movbe rax, QWORD PTR [rcx+104]
movbe r8, QWORD PTR [rcx+96]
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
movbe rax, QWORD PTR [rcx+88]
movbe r8, QWORD PTR [rcx+80]
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
movbe rax, QWORD PTR [rcx+72]
movbe r8, QWORD PTR [rcx+64]
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
ret
sp_2048_to_bin_movbe_32 ENDP
_text ENDS
ENDIF
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_16 PROC
push r12
mov r9, rdx
sub rsp, 128
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[0] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+72], r10
; A[0] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+80], r11
; A[0] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+88], r12
; A[0] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+96], r10
; A[0] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+104], r11
; A[0] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+112], r12
; A[0] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+120], r10
; A[1] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+128], r11
; A[2] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+16]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+136], r12
; A[3] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+24]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+144], r10
; A[4] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+32]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+152], r11
; A[5] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+40]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+160], r12
; A[6] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+48]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+168], r10
; A[7] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+56]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+176], r11
; A[8] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+64]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+184], r12
; A[9] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+72]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+192], r10
; A[10] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+80]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+200], r11
; A[11] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+88]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+208], r12
; A[12] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+96]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+216], r10
; A[13] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+104]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+224], r11
; A[14] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+112]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+232], r12
; A[15] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r10, QWORD PTR [rsp+80]
mov r11, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r10, QWORD PTR [rsp+112]
mov r11, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
add rsp, 128
pop r12
ret
sp_2048_mul_16 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_16 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 128
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[0] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+72], r9
; A[0] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+80], r10
; A[0] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+88], r11
; A[0] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+96], r9
; A[0] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+104], r10
; A[0] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+112], r11
; A[0] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+120], r9
; A[1] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+128], r10
; A[2] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+16]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+136], r11
; A[3] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+24]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[9]
mov rax, QWORD PTR [r8+72]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+144], r9
; A[4] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+32]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[5] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+152], r10
; A[5] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+40]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[6] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[10]
mov rax, QWORD PTR [r8+80]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+160], r11
; A[6] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+48]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[7] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+168], r9
; A[7] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+56]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[8] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[11]
mov rax, QWORD PTR [r8+88]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+176], r10
; A[8] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+64]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[9] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+184], r11
; A[9] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+72]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[10] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[12]
mov rax, QWORD PTR [r8+96]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+192], r9
; A[10] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+80]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[11] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+96]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+200], r10
; A[11] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+88]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[12] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+96]
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[13] * A[13]
mov rax, QWORD PTR [r8+104]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+208], r11
; A[12] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+96]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[13] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+104]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+216], r9
; A[13] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+104]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[14] * A[14]
mov rax, QWORD PTR [r8+112]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+224], r10
; A[14] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+112]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+232], r11
; A[15] * A[15]
mov rax, QWORD PTR [r8+120]
mul rax
add r9, rax
adc r10, rdx
mov QWORD PTR [rcx+240], r9
mov QWORD PTR [rcx+248], r10
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r13, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r12
mov QWORD PTR [rcx+88], r13
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r12, QWORD PTR [rsp+112]
mov r13, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r12
mov QWORD PTR [rcx+120], r13
add rsp, 128
pop r14
pop r13
pop r12
ret
sp_2048_sqr_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_avx2_16 PROC
push rbx
push rbp
push r12
push r13
push r14
push r15
push rdi
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 128
xor rdi, rdi
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
; A[0] * B[2]
mulx r13, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
; A[0] * B[3]
mulx r14, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
mov QWORD PTR [rbx+24], r13
; A[0] * B[4]
mulx r10, rax, QWORD PTR [rbp+32]
adcx r14, rax
; A[0] * B[5]
mulx r11, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
; A[0] * B[6]
mulx r12, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
; A[0] * B[7]
mulx r13, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
mov QWORD PTR [rbx+56], r12
; A[0] * B[8]
mulx r14, rax, QWORD PTR [rbp+64]
adcx r13, rax
; A[0] * B[9]
mulx r10, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
; A[0] * B[10]
mulx r11, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
; A[0] * B[11]
mulx r12, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
mov QWORD PTR [rbx+88], r11
; A[0] * B[12]
mulx r13, rax, QWORD PTR [rbp+96]
adcx r12, rax
; A[0] * B[13]
mulx r14, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
; A[0] * B[14]
mulx r10, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
; A[0] * B[15]
mulx r11, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adcx r11, rdi
mov r15, rdi
adcx r15, rdi
mov QWORD PTR [rbx+120], r10
mov QWORD PTR [r8], r11
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+32], r14
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[1] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[1] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[1] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[1] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+120], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8], r11
mov QWORD PTR [r8+8], r12
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+40], r10
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[2] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[2] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[2] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[2] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r13
mov rdx, QWORD PTR [r9+24]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+48], r11
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[3] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[3] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[3] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[3] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+8], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+16], r13
mov QWORD PTR [r8+24], r14
mov rdx, QWORD PTR [r9+32]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+56], r12
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[4] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[4] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[4] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[4] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+16], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+24], r14
mov QWORD PTR [r8+32], r10
mov rdx, QWORD PTR [r9+40]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[5] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[5] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[5] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[5] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+24], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov rdx, QWORD PTR [r9+48]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[6] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[6] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[6] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[6] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+32], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov rdx, QWORD PTR [r9+56]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[7] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[7] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[7] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[7] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+40], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+48], r12
mov QWORD PTR [r8+56], r13
mov rdx, QWORD PTR [r9+64]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[8] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[8] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[8] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[8] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+48], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+56], r13
mov QWORD PTR [r8+64], r14
mov rdx, QWORD PTR [r9+72]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[9] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[9] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[9] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[9] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[9] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[9] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[9] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[9] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[9] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[9] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+56], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+64], r14
mov QWORD PTR [r8+72], r10
mov rdx, QWORD PTR [r9+80]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[10] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[10] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[10] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[10] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[10] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[10] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[10] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[10] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[10] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[10] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+64], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov rdx, QWORD PTR [r9+88]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[11] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[11] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[11] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[11] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[11] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[11] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[11] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[11] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[11] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[11] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+72], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+80], r11
mov QWORD PTR [r8+88], r12
mov rdx, QWORD PTR [r9+96]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[12] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[12] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[12] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[12] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[12] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[12] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[12] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+56], r13
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[12] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[12] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[12] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+80], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+88], r12
mov QWORD PTR [r8+96], r13
mov rdx, QWORD PTR [r9+104]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[13] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[13] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[13] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[13] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[13] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[13] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[13] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+64], r14
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
; A[13] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[13] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[13] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+88], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+96], r13
mov QWORD PTR [r8+104], r14
mov rdx, QWORD PTR [r9+112]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[14] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[14] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[14] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[14] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[14] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[14] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[14] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+72], r10
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
; A[14] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[14] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[14] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+96], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+104], r14
mov QWORD PTR [r8+112], r10
mov rdx, QWORD PTR [r9+120]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[15] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[15] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[15] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[15] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[15] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[15] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[15] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+80], r11
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
mov r10, QWORD PTR [r8+112]
; A[15] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[15] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[15] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+96], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+104], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
sub r8, 128
cmp r9, r8
je L_start_2048_mul_avx2_16
cmp rbp, r8
jne L_end_2048_mul_avx2_16
L_start_2048_mul_avx2_16:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbx+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbx+80]
vmovups OWORD PTR [r8+80], xmm0
vmovdqu xmm0, OWORD PTR [rbx+96]
vmovups OWORD PTR [r8+96], xmm0
vmovdqu xmm0, OWORD PTR [rbx+112]
vmovups OWORD PTR [r8+112], xmm0
L_end_2048_mul_avx2_16:
add rsp, 128
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_2048_mul_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_avx2_16 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 128
xor r13, r13
; Diagonal 1
xor r12, r12
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
; A[2] x A[0]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+8], r10
mov QWORD PTR [rbp+16], r11
mov r10, r13
mov r11, r13
; A[3] x A[0]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r12, rax
adox r10, rcx
; A[4] x A[0]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+24], r12
mov QWORD PTR [rbp+32], r10
mov r12, r13
mov r10, r13
; A[5] x A[0]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, rcx
; A[6] x A[0]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+40], r11
mov QWORD PTR [rbp+48], r12
mov r11, r13
mov r12, r13
; A[7] x A[0]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
; A[8] x A[0]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
mov r10, r13
mov r11, r13
; A[9] x A[0]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
; A[10] x A[0]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
mov r12, r13
mov r10, r13
; A[11] x A[0]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
; A[12] x A[0]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+88], r11
mov r15, r12
mov r11, r13
mov r12, r13
; A[13] x A[0]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; A[14] x A[0]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, rcx
mov rdi, r10
mov rsi, r11
mov r10, r13
; A[15] x A[0]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov rbx, r12
; Carry
adcx r10, r13
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8], r10
; Diagonal 2
mov r10, QWORD PTR [rbp+24]
mov r11, QWORD PTR [rbp+32]
mov r12, QWORD PTR [rbp+40]
; A[2] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r10, rax
adox r11, rcx
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+24], r10
mov QWORD PTR [rbp+32], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r12, rax
adox r10, rcx
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r12
mov QWORD PTR [rbp+48], r10
mov r12, QWORD PTR [rbp+64]
mov r10, QWORD PTR [rbp+72]
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+56], r11
mov QWORD PTR [rbp+64], r12
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[8] x A[1]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
; A[9] x A[1]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; No load %r14 - %r9
; A[10] x A[1]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r15, rcx
; A[11] x A[1]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13
; No load %r15 - %r10
; No load %rbx - %r8
; A[12] x A[1]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rdi, rax
adox rsi, rcx
; A[13] x A[1]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r11, QWORD PTR [r8]
mov r12, r13
; A[14] x A[1]
mulx rcx, rax, QWORD PTR [r9+112]
adcx rbx, rax
adox r11, rcx
; A[15] x A[1]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
; No store %rbx
mov QWORD PTR [r8], r11
mov r10, r13
; A[15] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+16], r10
; Diagonal 3
mov r10, QWORD PTR [rbp+40]
mov r11, QWORD PTR [rbp+48]
mov r12, QWORD PTR [rbp+56]
; A[3] x A[2]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+40], r10
mov QWORD PTR [rbp+48], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+56], r12
mov QWORD PTR [rbp+64], r10
mov r12, QWORD PTR [rbp+80]
mov r10, QWORD PTR [rbp+88]
; A[7] x A[2]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[8] x A[2]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+72], r11
mov QWORD PTR [rbp+80], r12
; No load %r13 - %r9
; No load %r14 - %r10
; A[9] x A[2]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r15, rcx
; A[10] x A[2]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13
; No load %r15 - %r8
; No load %rbx - %r9
; A[11] x A[2]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rdi, rax
adox rsi, rcx
; A[12] x A[2]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[13] x A[2]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rbx, rax
adox r12, rcx
; A[14] x A[2]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
; No store %rbx
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
mov r12, r13
; A[14] x A[3]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
; A[14] x A[4]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, r13
; A[14] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+32], r10
; Diagonal 4
mov r10, QWORD PTR [rbp+56]
mov r11, QWORD PTR [rbp+64]
mov r12, QWORD PTR [rbp+72]
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, rcx
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[6] x A[3]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, rcx
; A[7] x A[3]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
; No load %r13 - %r10
; No load %r14 - %r8
; A[8] x A[3]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r15, rcx
; A[9] x A[3]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r11
; No store %r13
; No load %r15 - %r9
; No load %rbx - %r10
; A[10] x A[3]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rdi, rax
adox rsi, rcx
; A[11] x A[3]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[12] x A[3]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rbx, rax
adox r10, rcx
; A[13] x A[3]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; No store %rbx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[13] x A[4]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
; A[13] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, r13
; A[13] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
; A[13] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, r13
; A[13] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+48], r10
; Diagonal 5
mov r10, QWORD PTR [rbp+72]
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
; A[6] x A[4]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; No load %r14 - %r9
; A[7] x A[4]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r15, rcx
; A[8] x A[4]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13
; No load %r15 - %r10
; No load %rbx - %r8
; A[9] x A[4]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rdi, rax
adox rsi, rcx
; A[10] x A[4]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[11] x A[4]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rbx, rax
adox r11, rcx
; A[12] x A[4]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
; No store %rbx
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[12] x A[5]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
; A[12] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[12] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[12] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
mov r12, r13
; A[12] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
; A[12] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, r13
; A[12] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+56], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+64], r10
; Diagonal 6
mov r10, QWORD PTR [rbp+88]
; No load %r13 - %r9
; No load %r14 - %r10
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r15, rcx
; A[7] x A[5]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13
; No load %r15 - %r8
; No load %rbx - %r9
; A[8] x A[5]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rdi, rax
adox rsi, rcx
; A[9] x A[5]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[10] x A[5]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rbx, rax
adox r12, rcx
; A[11] x A[5]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
; No store %rbx
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
; A[11] x A[6]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
; A[11] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[11] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
; A[11] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r12
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r10, QWORD PTR [r8+56]
; A[11] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
; A[13] x A[9]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov r11, QWORD PTR [r8+64]
mov r12, r13
; A[13] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; A[13] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r11
mov r10, r13
; A[13] x A[12]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+72], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+80], r10
; Diagonal 7
; No load %r14 - %r8
; No load %r15 - %r9
; No load %rbx - %r10
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rdi, rax
adox rsi, rcx
; A[8] x A[6]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[9] x A[6]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rbx, rax
adox r10, rcx
; A[10] x A[6]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; No store %rbx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[10] x A[7]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[10] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[10] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
; A[14] x A[6]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[14] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r10, rcx
; A[14] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[14] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
; A[14] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+56], r11
mov QWORD PTR [r8+64], r12
mov r11, QWORD PTR [r8+80]
mov r12, r13
; A[14] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
; A[14] x A[12]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov r10, r13
; A[14] x A[13]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+88], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+96], r10
; Diagonal 8
; No load %rbx - %r8
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r11, rcx
; A[9] x A[7]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
; No store %rbx
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
; A[15] x A[3]
mov rdx, QWORD PTR [r9+120]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[15] x A[4]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
; A[15] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
mov r12, QWORD PTR [r8+56]
; A[15] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
; A[15] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[15] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
; A[15] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+56], r12
mov QWORD PTR [r8+64], r10
mov r12, QWORD PTR [r8+80]
mov r10, QWORD PTR [r8+88]
; A[15] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
; A[15] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+72], r11
mov QWORD PTR [r8+80], r12
mov r11, QWORD PTR [r8+96]
mov r12, r13
; A[15] x A[12]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r10, rax
adox r11, rcx
; A[15] x A[13]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+88], r10
mov QWORD PTR [r8+96], r11
mov r10, r13
; A[15] x A[14]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+104], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r14
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
mov r10, QWORD PTR [rbp+32]
mov r11, QWORD PTR [rbp+40]
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+32], r10
mov QWORD PTR [rbp+40], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+48], r10
mov QWORD PTR [rbp+56], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+64], r10
mov QWORD PTR [rbp+72], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+80], r10
mov QWORD PTR [rbp+88], r11
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r15, r15
adox rdi, rdi
adcx r15, rax
adcx rdi, rcx
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox rsi, rsi
adox rbx, rbx
adcx rsi, rax
adcx rbx, rcx
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[10] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[11] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[12] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+64], r10
mov QWORD PTR [r8+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
; A[13] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
; A[14] x A[14]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+96], r10
mov QWORD PTR [r8+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
; A[15] x A[15]
mov rdx, QWORD PTR [r9+120]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 128
cmp r9, r8
jne L_end_2048_sqr_avx2_16
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbp+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbp+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbp+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbp+80]
vmovups OWORD PTR [r8+80], xmm0
L_end_2048_sqr_avx2_16:
add rsp, 128
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_2048_sqr_avx2_16 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_add_16 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov QWORD PTR [rcx+120], r10
adc rax, 0
ret
sp_2048_add_16 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sub_in_place_32 PROC
mov r8, QWORD PTR [rcx]
xor rax, rax
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r9
sbb r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb r9, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r9
sbb r8, QWORD PTR [rdx+208]
mov r9, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb r9, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r9
sbb r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb r9, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r9
sbb r8, QWORD PTR [rdx+240]
mov r9, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb r9, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+248], r9
sbb rax, 0
ret
sp_2048_sub_in_place_32 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_add_32 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
adc r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
adc r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
adc r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
adc r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
adc r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
adc r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
adc r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
adc r10, QWORD PTR [r8+248]
mov QWORD PTR [rcx+248], r10
adc rax, 0
ret
sp_2048_add_32 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 808
mov QWORD PTR [rsp+768], rcx
mov QWORD PTR [rsp+776], rdx
mov QWORD PTR [rsp+784], r8
lea r12, QWORD PTR [rsp+512]
lea r14, QWORD PTR [rdx+128]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r12+120], rax
adc r15, 0
mov QWORD PTR [rsp+792], r15
lea r13, QWORD PTR [rsp+640]
lea r14, QWORD PTR [r8+128]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r13+120], rax
adc rdi, 0
mov QWORD PTR [rsp+800], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
lea rcx, QWORD PTR [rsp+256]
add r8, 128
add rdx, 128
call sp_2048_mul_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
call sp_2048_mul_16
IFDEF _WIN64
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
ENDIF
mov r15, QWORD PTR [rsp+792]
mov rdi, QWORD PTR [rsp+800]
mov rsi, QWORD PTR [rsp+768]
mov r11, r15
lea r12, QWORD PTR [rsp+512]
lea r13, QWORD PTR [rsp+640]
and r11, rdi
neg r15
neg rdi
add rsi, 256
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+96], rax
mov QWORD PTR [r13+96], r9
mov rax, QWORD PTR [r12+104]
mov r9, QWORD PTR [r13+104]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+104], rax
mov QWORD PTR [r13+104], r9
mov rax, QWORD PTR [r12+112]
mov r9, QWORD PTR [r13+112]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+112], rax
mov QWORD PTR [r13+112], r9
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+120], rax
mov QWORD PTR [r13+120], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov QWORD PTR [rsi+120], rax
adc r11, 0
lea r13, QWORD PTR [rsp+256]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
sub rsi, 128
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov QWORD PTR [rsi+248], r9
adc r11, 0
mov QWORD PTR [rcx+384], r11
add rsi, 128
; Add
mov rax, QWORD PTR [rsi]
xor r11, r11
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov QWORD PTR [rsi+128], r9
adc r11, 0
; Add to zero
mov rax, QWORD PTR [r13+136]
adc rax, 0
mov r9, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], rax
adc r9, 0
mov r10, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r9
adc r10, 0
mov rax, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], r10
adc rax, 0
mov r9, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], rax
adc r9, 0
mov r10, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r9
adc r10, 0
mov rax, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], r10
adc rax, 0
mov r9, QWORD PTR [r13+192]
mov QWORD PTR [rsi+184], rax
adc r9, 0
mov r10, QWORD PTR [r13+200]
mov QWORD PTR [rsi+192], r9
adc r10, 0
mov rax, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], r10
adc rax, 0
mov r9, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], rax
adc r9, 0
mov r10, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r9
adc r10, 0
mov rax, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], r10
adc rax, 0
mov r9, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], rax
adc r9, 0
mov r10, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r9
adc r10, 0
mov QWORD PTR [rsi+248], r10
add rsp, 808
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mul_32 ENDP
_text ENDS
; /* Add a to a into r. (r = a + a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_dbl_16 PROC
mov r8, QWORD PTR [rdx]
xor rax, rax
add r8, r8
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r8
adc r9, r9
mov r8, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r9
adc r8, r8
mov r9, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r8
adc r9, r9
mov r8, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r9
adc r8, r8
mov r9, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r8
adc r9, r9
mov r8, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r9
adc r8, r8
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r8
adc r9, r9
mov r8, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r9
adc r8, r8
mov r9, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r8
adc r9, r9
mov r8, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r9
adc r8, r8
mov r9, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r8
adc r9, r9
mov r8, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r9
adc r8, r8
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r8
adc r9, r9
mov r8, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r9
adc r8, r8
mov r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r8
adc r9, r9
mov QWORD PTR [rcx+120], r9
adc rax, 0
ret
sp_2048_dbl_16 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_32 PROC
push r12
sub rsp, 664
mov QWORD PTR [rsp+640], rcx
mov QWORD PTR [rsp+648], rdx
lea r10, QWORD PTR [rsp+512]
lea r11, QWORD PTR [rdx+128]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
adc rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
adc r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
adc rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
adc r8, QWORD PTR [r11+120]
mov QWORD PTR [r10+120], r8
adc r9, 0
mov QWORD PTR [rsp+656], r9
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_16
mov rdx, QWORD PTR [rsp+648]
lea rcx, QWORD PTR [rsp+256]
add rdx, 128
call sp_2048_sqr_16
mov rdx, QWORD PTR [rsp+648]
mov rcx, QWORD PTR [rsp+640]
call sp_2048_sqr_16
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+648]
mov rcx, QWORD PTR [rsp+640]
ENDIF
mov r12, QWORD PTR [rsp+656]
lea r10, QWORD PTR [rsp+512]
mov r9, r12
neg r12
mov rax, QWORD PTR [r10]
mov r8, QWORD PTR [r10+8]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+256], rax
mov QWORD PTR [rcx+264], r8
mov rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r10+24]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+272], rax
mov QWORD PTR [rcx+280], r8
mov rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r10+40]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+288], rax
mov QWORD PTR [rcx+296], r8
mov rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r10+56]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+304], rax
mov QWORD PTR [rcx+312], r8
mov rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r10+72]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+320], rax
mov QWORD PTR [rcx+328], r8
mov rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r10+88]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+336], rax
mov QWORD PTR [rcx+344], r8
mov rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r10+104]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+352], rax
mov QWORD PTR [rcx+360], r8
mov rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r10+120]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+368], rax
mov QWORD PTR [rcx+376], r8
mov rax, QWORD PTR [rcx+256]
add rax, rax
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, r8
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, rax
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, r8
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, rax
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, r8
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, rax
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, r8
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, rax
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, r8
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, rax
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, r8
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, rax
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, r8
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, rax
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, r8
mov QWORD PTR [rcx+376], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+256]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rdx+248]
mov QWORD PTR [r10+248], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rcx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rcx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rcx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rcx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rcx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rcx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rcx+248]
mov QWORD PTR [r10+248], r8
sbb r9, 0
; Add in place
mov rax, QWORD PTR [rcx+128]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [rcx+256]
mov QWORD PTR [rcx+248], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, QWORD PTR [r10+184]
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, QWORD PTR [r10+200]
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, QWORD PTR [r10+216]
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, QWORD PTR [r10+232]
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, QWORD PTR [r10+248]
mov QWORD PTR [rcx+376], r8
adc r9, 0
mov QWORD PTR [rcx+384], r9
; Add in place
mov rax, QWORD PTR [rcx+256]
xor r9, r9
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rcx+384]
mov QWORD PTR [rcx+376], r8
adc rax, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+384], rax
adc r9, 0
; Add to zero
mov rax, QWORD PTR [rdx+136]
adc rax, 0
mov r8, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+392], rax
adc r8, 0
mov rax, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+400], r8
adc rax, 0
mov r8, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+408], rax
adc r8, 0
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+416], r8
adc rax, 0
mov r8, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+424], rax
adc r8, 0
mov rax, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+432], r8
adc rax, 0
mov r8, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+440], rax
adc r8, 0
mov rax, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+448], r8
adc rax, 0
mov r8, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+456], rax
adc r8, 0
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+464], r8
adc rax, 0
mov r8, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+472], rax
adc r8, 0
mov rax, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+480], r8
adc rax, 0
mov r8, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+488], rax
adc r8, 0
mov rax, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+496], r8
adc rax, 0
mov QWORD PTR [rcx+504], rax
add rsp, 664
pop r12
ret
sp_2048_sqr_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_avx2_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 808
mov QWORD PTR [rsp+768], rcx
mov QWORD PTR [rsp+776], rdx
mov QWORD PTR [rsp+784], r8
lea r12, QWORD PTR [rsp+512]
lea r14, QWORD PTR [rdx+128]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r12+120], rax
adc r15, 0
mov QWORD PTR [rsp+792], r15
lea r13, QWORD PTR [rsp+640]
lea r14, QWORD PTR [r8+128]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov QWORD PTR [r13+120], rax
adc rdi, 0
mov QWORD PTR [rsp+800], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_avx2_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
lea rcx, QWORD PTR [rsp+256]
add r8, 128
add rdx, 128
call sp_2048_mul_avx2_16
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
call sp_2048_mul_avx2_16
IFDEF _WIN64
mov r8, QWORD PTR [rsp+784]
mov rdx, QWORD PTR [rsp+776]
mov rcx, QWORD PTR [rsp+768]
ENDIF
mov r15, QWORD PTR [rsp+792]
mov rdi, QWORD PTR [rsp+800]
mov rsi, QWORD PTR [rsp+768]
mov r11, r15
lea r12, QWORD PTR [rsp+512]
lea r13, QWORD PTR [rsp+640]
and r11, rdi
neg r15
neg rdi
add rsi, 256
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+88], r10
adc rax, r9
mov r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [r13+104]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+96], rax
adc r9, r10
mov r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [r13+112]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+104], r9
adc r10, rax
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+112], r10
adc rax, r9
mov QWORD PTR [rsi+120], rax
adc r11, 0
lea r13, QWORD PTR [rsp+256]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov QWORD PTR [r12+248], r9
sbb r11, 0
sub rsi, 128
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov QWORD PTR [rsi+248], r9
adc r11, 0
mov QWORD PTR [rcx+384], r11
add rsi, 128
; Add
mov rax, QWORD PTR [rsi]
xor r11, r11
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov QWORD PTR [rsi+128], r9
adc r11, 0
; Add to zero
mov rax, QWORD PTR [r13+136]
adc rax, 0
mov r9, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], rax
adc r9, 0
mov r10, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r9
adc r10, 0
mov rax, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], r10
adc rax, 0
mov r9, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], rax
adc r9, 0
mov r10, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r9
adc r10, 0
mov rax, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], r10
adc rax, 0
mov r9, QWORD PTR [r13+192]
mov QWORD PTR [rsi+184], rax
adc r9, 0
mov r10, QWORD PTR [r13+200]
mov QWORD PTR [rsi+192], r9
adc r10, 0
mov rax, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], r10
adc rax, 0
mov r9, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], rax
adc r9, 0
mov r10, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r9
adc r10, 0
mov rax, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], r10
adc rax, 0
mov r9, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], rax
adc r9, 0
mov r10, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r9
adc r10, 0
mov QWORD PTR [rsi+248], r10
add rsp, 808
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mul_avx2_32 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sqr_avx2_32 PROC
push r12
sub rsp, 664
mov QWORD PTR [rsp+640], rcx
mov QWORD PTR [rsp+648], rdx
lea r10, QWORD PTR [rsp+512]
lea r11, QWORD PTR [rdx+128]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
adc rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
adc r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
adc rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
adc r8, QWORD PTR [r11+120]
mov QWORD PTR [r10+120], r8
adc r9, 0
mov QWORD PTR [rsp+656], r9
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_avx2_16
mov rdx, QWORD PTR [rsp+648]
lea rcx, QWORD PTR [rsp+256]
add rdx, 128
call sp_2048_sqr_avx2_16
mov rdx, QWORD PTR [rsp+648]
mov rcx, QWORD PTR [rsp+640]
call sp_2048_sqr_avx2_16
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+648]
mov rcx, QWORD PTR [rsp+640]
ENDIF
mov r12, QWORD PTR [rsp+656]
lea r10, QWORD PTR [rsp+512]
mov r9, r12
neg r12
mov rax, QWORD PTR [r10]
pext rax, rax, r12
add rax, rax
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [rcx+256], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [rcx+264], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [rcx+272], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [rcx+280], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [rcx+288], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [rcx+296], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [rcx+304], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [rcx+312], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [rcx+320], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [rcx+328], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [rcx+336], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [rcx+344], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [rcx+352], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [rcx+360], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [rcx+368], rax
pext r8, r8, r12
adc r8, r8
mov QWORD PTR [rcx+376], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+256]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rdx+248]
mov QWORD PTR [r10+248], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rcx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rcx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rcx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rcx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rcx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rcx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rcx+248]
mov QWORD PTR [r10+248], r8
sbb r9, 0
; Add in place
mov rax, QWORD PTR [rcx+128]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [rcx+256]
mov QWORD PTR [rcx+248], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, QWORD PTR [r10+184]
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, QWORD PTR [r10+200]
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, QWORD PTR [r10+216]
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, QWORD PTR [r10+232]
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, QWORD PTR [r10+248]
mov QWORD PTR [rcx+376], r8
adc r9, 0
mov QWORD PTR [rcx+384], r9
; Add in place
mov rax, QWORD PTR [rcx+256]
xor r9, r9
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rcx+384]
mov QWORD PTR [rcx+376], r8
adc rax, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+384], rax
adc r9, 0
; Add to zero
mov rax, QWORD PTR [rdx+136]
adc rax, 0
mov r8, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+392], rax
adc r8, 0
mov rax, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+400], r8
adc rax, 0
mov r8, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+408], rax
adc r8, 0
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+416], r8
adc rax, 0
mov r8, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+424], rax
adc r8, 0
mov rax, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+432], r8
adc rax, 0
mov r8, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+440], rax
adc r8, 0
mov rax, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+448], r8
adc rax, 0
mov r8, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+456], rax
adc r8, 0
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+464], r8
adc rax, 0
mov r8, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+472], rax
adc r8, 0
mov rax, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+480], r8
adc rax, 0
mov r8, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+488], rax
adc r8, 0
mov rax, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+496], r8
adc rax, 0
mov QWORD PTR [rcx+504], rax
add rsp, 664
pop r12
ret
sp_2048_sqr_avx2_32 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sub_in_place_16 PROC
mov r8, QWORD PTR [rcx]
xor rax, rax
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+120], r9
sbb rax, 0
ret
sp_2048_sub_in_place_16 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_32 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+184]
add r12, rax
mov QWORD PTR [rcx+184], r12
adc r10, rdx
adc r11, 0
; A[24] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+192]
add r10, rax
mov QWORD PTR [rcx+192], r10
adc r11, rdx
adc r12, 0
; A[25] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+200]
add r11, rax
mov QWORD PTR [rcx+200], r11
adc r12, rdx
adc r10, 0
; A[26] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+208]
add r12, rax
mov QWORD PTR [rcx+208], r12
adc r10, rdx
adc r11, 0
; A[27] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+216]
add r10, rax
mov QWORD PTR [rcx+216], r10
adc r11, rdx
adc r12, 0
; A[28] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+224]
add r11, rax
mov QWORD PTR [rcx+224], r11
adc r12, rdx
adc r10, 0
; A[29] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+232]
add r12, rax
mov QWORD PTR [rcx+232], r12
adc r10, rdx
adc r11, 0
; A[30] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+240]
add r10, rax
mov QWORD PTR [rcx+240], r10
adc r11, rdx
adc r12, 0
; A[31] * B
mov rax, r8
mul QWORD PTR [r9+248]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+248], r11
mov QWORD PTR [rcx+256], r12
pop r12
ret
sp_2048_mul_d_32 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_16 PROC
sub rsp, 128
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb rax, 0
add rsp, 128
ret
sp_2048_cond_sub_16 ENDP
_text ENDS
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 16
mov r10, 16
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_2048_mont_loop_16:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+120], r14
adc QWORD PTR [rcx+128], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_2048_mont_loop_16
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 128
call sp_2048_cond_sub_16
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_avx2_16 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov QWORD PTR [rcx+120], r10
sbb rax, 0
pop r12
ret
sp_2048_cond_sub_avx2_16 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_16 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+120], r10
mov QWORD PTR [rcx+128], r11
pop r12
ret
sp_2048_mul_d_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_avx2_16 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+120], r12
mov QWORD PTR [rcx+128], r11
pop r13
pop r12
ret
sp_2048_mul_d_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_2048_word_asm_16 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_2048_word_asm_16 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_2048_cmp_16 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_2048_cmp_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_avx2_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 16
mov r11, 16
mov r15, QWORD PTR [r9]
mov rdi, QWORD PTR [r9+8]
mov rsi, QWORD PTR [r9+16]
mov rbx, QWORD PTR [r9+24]
add r9, 64
xor rbp, rbp
L_2048_mont_loop_avx2_16:
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-32]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-24]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+64], r12
adox rbp, r14
adcx rbp, r14
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-24]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-16]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-16], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-8], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+16]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+8], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+24]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+16], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+32]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+24], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+40]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+32], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+48]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+40], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+56]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+48], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+64]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+56], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+72]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+64], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+72], r12
adox rbp, r14
adcx rbp, r14
; a += 2
add r9, 16
; i -= 2
sub r11, 2
jnz L_2048_mont_loop_avx2_16
sub r9, 64
neg rbp
mov r8, r9
sub r9, 128
mov rcx, QWORD PTR [r10]
mov rdx, r15
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, rdi
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rsi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rbx
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov QWORD PTR [r9+120], rdx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_avx2_16 ENDP
_text ENDS
ENDIF
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_32 PROC
sub rsp, 256
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
sbb r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
sbb r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
sbb r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
sbb r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
sbb r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
sbb r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
sbb r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
sbb r11, r8
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
sbb rax, 0
add rsp, 256
ret
sp_2048_cond_sub_32 ENDP
_text ENDS
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 32
mov r10, 32
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_2048_mont_loop_32:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+184], r14
adc r11, 0
; a[i+24] += m[24] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+192]
mov r14, QWORD PTR [rcx+192]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+192], r14
adc r12, 0
; a[i+25] += m[25] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+200]
mov r14, QWORD PTR [rcx+200]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+200], r14
adc r11, 0
; a[i+26] += m[26] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+208]
mov r14, QWORD PTR [rcx+208]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+208], r14
adc r12, 0
; a[i+27] += m[27] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+216]
mov r14, QWORD PTR [rcx+216]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+216], r14
adc r11, 0
; a[i+28] += m[28] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+224]
mov r14, QWORD PTR [rcx+224]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+224], r14
adc r12, 0
; a[i+29] += m[29] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+232]
mov r14, QWORD PTR [rcx+232]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+232], r14
adc r11, 0
; a[i+30] += m[30] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+240]
mov r14, QWORD PTR [rcx+240]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+240], r14
adc r12, 0
; a[i+31] += m[31] * mu
mov rax, r13
mul QWORD PTR [r9+248]
mov r14, QWORD PTR [rcx+248]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+248], r14
adc QWORD PTR [rcx+256], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_2048_mont_loop_32
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 256
call sp_2048_cond_sub_32
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_32 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_sub_32 PROC
mov r9, QWORD PTR [rdx]
xor rax, rax
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
sbb r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
sbb r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
sbb r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
sbb r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
sbb r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
sbb r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
sbb r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
sbb r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
sbb r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
sbb r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
sbb r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
sbb r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
sbb r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
sbb r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
sbb r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
sbb r10, QWORD PTR [r8+248]
mov QWORD PTR [rcx+248], r10
sbb rax, 0
ret
sp_2048_sub_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_2048_mul_d_avx2_32 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+184], r12
; A[24] * B
mulx r10, r9, QWORD PTR [rax+192]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+192], r11
; A[25] * B
mulx r10, r9, QWORD PTR [rax+200]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+200], r12
; A[26] * B
mulx r10, r9, QWORD PTR [rax+208]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+208], r11
; A[27] * B
mulx r10, r9, QWORD PTR [rax+216]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+216], r12
; A[28] * B
mulx r10, r9, QWORD PTR [rax+224]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+224], r11
; A[29] * B
mulx r10, r9, QWORD PTR [rax+232]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+232], r12
; A[30] * B
mulx r10, r9, QWORD PTR [rax+240]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+240], r11
; A[31] * B
mulx r10, r9, QWORD PTR [rax+248]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+248], r12
mov QWORD PTR [rcx+256], r11
pop r13
pop r12
ret
sp_2048_mul_d_avx2_32 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_2048_word_asm_32 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_2048_word_asm_32 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_sub_avx2_32 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
sbb r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
sbb r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
sbb r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
sbb r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
sbb r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
sbb r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
sbb r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
sbb r11, r12
mov QWORD PTR [rcx+248], r11
sbb rax, 0
pop r12
ret
sp_2048_cond_sub_avx2_32 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_2048_cmp_32 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+248]
mov r12, QWORD PTR [rdx+248]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+240]
mov r12, QWORD PTR [rdx+240]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+232]
mov r12, QWORD PTR [rdx+232]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+224]
mov r12, QWORD PTR [rdx+224]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+216]
mov r12, QWORD PTR [rdx+216]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+208]
mov r12, QWORD PTR [rdx+208]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+200]
mov r12, QWORD PTR [rdx+200]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+192]
mov r12, QWORD PTR [rdx+192]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_2048_cmp_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 2048 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_2048_mont_reduce_avx2_32 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 32
mov r11, 32
mov r15, QWORD PTR [r9]
mov rdi, QWORD PTR [r9+8]
mov rsi, QWORD PTR [r9+16]
mov rbx, QWORD PTR [r9+24]
add r9, 128
xor rbp, rbp
L_2048_mont_loop_avx2_32:
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-96]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-88]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-88], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-80], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-72], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-64], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+24] += m[24] * mu
mulx rcx, rax, QWORD PTR [r10+192]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+25] += m[25] * mu
mulx rcx, rax, QWORD PTR [r10+200]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+26] += m[26] * mu
mulx rcx, rax, QWORD PTR [r10+208]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+27] += m[27] * mu
mulx rcx, rax, QWORD PTR [r10+216]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
; a[i+28] += m[28] * mu
mulx rcx, rax, QWORD PTR [r10+224]
mov r13, QWORD PTR [r9+104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+96], r12
; a[i+29] += m[29] * mu
mulx rcx, rax, QWORD PTR [r10+232]
mov r12, QWORD PTR [r9+112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+104], r13
; a[i+30] += m[30] * mu
mulx rcx, rax, QWORD PTR [r10+240]
mov r13, QWORD PTR [r9+120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+112], r12
; a[i+31] += m[31] * mu
mulx rcx, rax, QWORD PTR [r10+248]
mov r12, QWORD PTR [r9+128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+120], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+128], r12
adox rbp, r14
adcx rbp, r14
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_2048_mont_loop_avx2_32
sub r9, 128
neg rbp
mov r8, r9
sub r9, 256
mov rcx, QWORD PTR [r10]
mov rdx, r15
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, rdi
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rsi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rbx
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+192]
mov rdx, QWORD PTR [r8+192]
pext rax, rax, rbp
mov QWORD PTR [r9+184], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+200]
mov rax, QWORD PTR [r8+200]
pext rcx, rcx, rbp
mov QWORD PTR [r9+192], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+208]
mov rcx, QWORD PTR [r8+208]
pext rdx, rdx, rbp
mov QWORD PTR [r9+200], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+216]
mov rdx, QWORD PTR [r8+216]
pext rax, rax, rbp
mov QWORD PTR [r9+208], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+224]
mov rax, QWORD PTR [r8+224]
pext rcx, rcx, rbp
mov QWORD PTR [r9+216], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+232]
mov rcx, QWORD PTR [r8+232]
pext rdx, rdx, rbp
mov QWORD PTR [r9+224], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+240]
mov rdx, QWORD PTR [r8+240]
pext rax, rax, rbp
mov QWORD PTR [r9+232], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+248]
mov rax, QWORD PTR [r8+248]
pext rcx, rcx, rbp
mov QWORD PTR [r9+240], rdx
sbb rax, rcx
mov QWORD PTR [r9+248], rax
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_2048_mont_reduce_avx2_32 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_add_16 PROC
sub rsp, 128
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
adc r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
adc r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
adc r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
adc r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
adc r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
adc r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
adc r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
adc r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
adc r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
adc r11, r8
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
adc rax, 0
add rsp, 128
ret
sp_2048_cond_add_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_2048_cond_add_avx2_16 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
add r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
adc r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
adc r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
adc r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
adc r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
adc r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
adc r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
adc r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
adc r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
adc r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
adc r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
adc r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
adc r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
adc r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
adc r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
adc r10, r11
mov QWORD PTR [rcx+120], r10
adc rax, 0
pop r12
ret
sp_2048_cond_add_avx2_16 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_2048_lshift_32 PROC
push r12
push r13
mov r9, rcx
mov rcx, r8
mov r12, 0
mov r13, QWORD PTR [rdx+216]
mov rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rdx+232]
mov r10, QWORD PTR [rdx+240]
mov r11, QWORD PTR [rdx+248]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+224], rax
mov QWORD PTR [r9+232], r8
mov QWORD PTR [r9+240], r10
mov QWORD PTR [r9+248], r11
mov QWORD PTR [r9+256], r12
mov r11, QWORD PTR [rdx+184]
mov rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rdx+200]
mov r10, QWORD PTR [rdx+208]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+192], rax
mov QWORD PTR [r9+200], r8
mov QWORD PTR [r9+208], r10
mov QWORD PTR [r9+216], r13
mov r13, QWORD PTR [rdx+152]
mov rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rdx+168]
mov r10, QWORD PTR [rdx+176]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+160], rax
mov QWORD PTR [r9+168], r8
mov QWORD PTR [r9+176], r10
mov QWORD PTR [r9+184], r11
mov r11, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rdx+136]
mov r10, QWORD PTR [rdx+144]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+128], rax
mov QWORD PTR [r9+136], r8
mov QWORD PTR [r9+144], r10
mov QWORD PTR [r9+152], r13
mov r13, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+96], rax
mov QWORD PTR [r9+104], r8
mov QWORD PTR [r9+112], r10
mov QWORD PTR [r9+120], r11
mov r11, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+64], rax
mov QWORD PTR [r9+72], r8
mov QWORD PTR [r9+80], r10
mov QWORD PTR [r9+88], r13
mov r13, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+32], rax
mov QWORD PTR [r9+40], r8
mov QWORD PTR [r9+48], r10
mov QWORD PTR [r9+56], r11
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shl rax, cl
mov QWORD PTR [r9], rax
mov QWORD PTR [r9+8], r8
mov QWORD PTR [r9+16], r10
mov QWORD PTR [r9+24], r13
pop r13
pop r12
ret
sp_2048_lshift_32 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WOLFSSL_SP_NO_3072
IFNDEF WOLFSSL_SP_NO_3072
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_3072_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 384
xor r13, r13
jmp L_3072_from_bin_bswap_64_end
L_3072_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_3072_from_bin_bswap_64_end:
cmp r9, 63
jg L_3072_from_bin_bswap_64_start
jmp L_3072_from_bin_bswap_8_end
L_3072_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_3072_from_bin_bswap_8_end:
cmp r9, 7
jg L_3072_from_bin_bswap_8_start
cmp r9, r13
je L_3072_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_3072_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_3072_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_3072_from_bin_bswap_hi_end:
cmp rcx, r12
je L_3072_from_bin_bswap_zero_end
L_3072_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_3072_from_bin_bswap_zero_start
L_3072_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_3072_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_3072_from_bin_movbe PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 384
xor r13, r13
jmp L_3072_from_bin_movbe_64_end
L_3072_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_3072_from_bin_movbe_64_end:
cmp r9, 63
jg L_3072_from_bin_movbe_64_start
jmp L_3072_from_bin_movbe_8_end
L_3072_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_3072_from_bin_movbe_8_end:
cmp r9, 7
jg L_3072_from_bin_movbe_8_start
cmp r9, r13
je L_3072_from_bin_movbe_hi_end
mov r10, r13
mov rax, r13
L_3072_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_3072_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_3072_from_bin_movbe_hi_end:
cmp rcx, r12
je L_3072_from_bin_movbe_zero_end
L_3072_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_3072_from_bin_movbe_zero_start
L_3072_from_bin_movbe_zero_end:
pop r13
pop r12
ret
sp_3072_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 384
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_3072_to_bin_bswap_48 PROC
mov rax, QWORD PTR [rcx+376]
mov r8, QWORD PTR [rcx+368]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+360]
mov r8, QWORD PTR [rcx+352]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+344]
mov r8, QWORD PTR [rcx+336]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
mov rax, QWORD PTR [rcx+328]
mov r8, QWORD PTR [rcx+320]
bswap rax
bswap r8
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
mov rax, QWORD PTR [rcx+312]
mov r8, QWORD PTR [rcx+304]
bswap rax
bswap r8
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
mov rax, QWORD PTR [rcx+296]
mov r8, QWORD PTR [rcx+288]
bswap rax
bswap r8
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
mov rax, QWORD PTR [rcx+280]
mov r8, QWORD PTR [rcx+272]
bswap rax
bswap r8
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
mov rax, QWORD PTR [rcx+264]
mov r8, QWORD PTR [rcx+256]
bswap rax
bswap r8
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
mov rax, QWORD PTR [rcx+248]
mov r8, QWORD PTR [rcx+240]
bswap rax
bswap r8
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
mov rax, QWORD PTR [rcx+232]
mov r8, QWORD PTR [rcx+224]
bswap rax
bswap r8
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
mov rax, QWORD PTR [rcx+216]
mov r8, QWORD PTR [rcx+208]
bswap rax
bswap r8
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
mov rax, QWORD PTR [rcx+200]
mov r8, QWORD PTR [rcx+192]
bswap rax
bswap r8
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
mov rax, QWORD PTR [rcx+184]
mov r8, QWORD PTR [rcx+176]
bswap rax
bswap r8
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
mov rax, QWORD PTR [rcx+168]
mov r8, QWORD PTR [rcx+160]
bswap rax
bswap r8
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
mov rax, QWORD PTR [rcx+152]
mov r8, QWORD PTR [rcx+144]
bswap rax
bswap r8
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
mov rax, QWORD PTR [rcx+136]
mov r8, QWORD PTR [rcx+128]
bswap rax
bswap r8
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
mov rax, QWORD PTR [rcx+120]
mov r8, QWORD PTR [rcx+112]
bswap rax
bswap r8
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
mov rax, QWORD PTR [rcx+104]
mov r8, QWORD PTR [rcx+96]
bswap rax
bswap r8
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
mov rax, QWORD PTR [rcx+88]
mov r8, QWORD PTR [rcx+80]
bswap rax
bswap r8
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+64]
bswap rax
bswap r8
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
ret
sp_3072_to_bin_bswap_48 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 384
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_3072_to_bin_movbe_48 PROC
movbe rax, QWORD PTR [rcx+376]
movbe r8, QWORD PTR [rcx+368]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+360]
movbe r8, QWORD PTR [rcx+352]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+344]
movbe r8, QWORD PTR [rcx+336]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
movbe rax, QWORD PTR [rcx+328]
movbe r8, QWORD PTR [rcx+320]
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
movbe rax, QWORD PTR [rcx+312]
movbe r8, QWORD PTR [rcx+304]
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
movbe rax, QWORD PTR [rcx+296]
movbe r8, QWORD PTR [rcx+288]
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
movbe rax, QWORD PTR [rcx+280]
movbe r8, QWORD PTR [rcx+272]
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
movbe rax, QWORD PTR [rcx+264]
movbe r8, QWORD PTR [rcx+256]
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
movbe rax, QWORD PTR [rcx+248]
movbe r8, QWORD PTR [rcx+240]
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
movbe rax, QWORD PTR [rcx+232]
movbe r8, QWORD PTR [rcx+224]
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
movbe rax, QWORD PTR [rcx+216]
movbe r8, QWORD PTR [rcx+208]
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
movbe rax, QWORD PTR [rcx+200]
movbe r8, QWORD PTR [rcx+192]
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
movbe rax, QWORD PTR [rcx+184]
movbe r8, QWORD PTR [rcx+176]
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
movbe rax, QWORD PTR [rcx+168]
movbe r8, QWORD PTR [rcx+160]
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
movbe rax, QWORD PTR [rcx+152]
movbe r8, QWORD PTR [rcx+144]
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
movbe rax, QWORD PTR [rcx+136]
movbe r8, QWORD PTR [rcx+128]
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
movbe rax, QWORD PTR [rcx+120]
movbe r8, QWORD PTR [rcx+112]
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
movbe rax, QWORD PTR [rcx+104]
movbe r8, QWORD PTR [rcx+96]
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
movbe rax, QWORD PTR [rcx+88]
movbe r8, QWORD PTR [rcx+80]
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
movbe rax, QWORD PTR [rcx+72]
movbe r8, QWORD PTR [rcx+64]
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
ret
sp_3072_to_bin_movbe_48 ENDP
_text ENDS
ENDIF
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_12 PROC
push r12
mov r9, rdx
sub rsp, 96
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[0] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+72], r10
; A[0] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+80], r11
; A[0] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+88], r12
; A[1] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+8]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+96], r10
; A[2] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+16]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+104], r11
; A[3] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+24]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+112], r12
; A[4] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+32]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+120], r10
; A[5] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+40]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+128], r11
; A[6] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+48]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+136], r12
; A[7] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+56]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+144], r10
; A[8] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+64]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+152], r11
; A[9] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+72]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+160], r12
; A[10] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+80]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+168], r10
; A[11] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+176], r11
mov QWORD PTR [rcx+184], r12
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r10, QWORD PTR [rsp+80]
mov r11, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
add rsp, 96
pop r12
ret
sp_3072_mul_12 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_12 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 96
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[0] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+72], r9
; A[0] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+80], r10
; A[0] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+88], r11
; A[1] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+96], r9
; A[2] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+16]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+104], r10
; A[3] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+24]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+112], r11
; A[4] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+32]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[5] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+120], r9
; A[5] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+40]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[6] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+128], r10
; A[6] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+48]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[7] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+136], r11
; A[7] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+56]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[8] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+64]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[9] * A[9]
mov rax, QWORD PTR [r8+72]
mul rax
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+144], r9
; A[8] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+64]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[9] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+72]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+152], r10
; A[9] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+72]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[10] * A[10]
mov rax, QWORD PTR [r8+80]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+160], r11
; A[10] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+80]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+168], r9
; A[11] * A[11]
mov rax, QWORD PTR [r8+88]
mul rax
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+176], r10
mov QWORD PTR [rcx+184], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r13, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r12
mov QWORD PTR [rcx+88], r13
add rsp, 96
pop r14
pop r13
pop r12
ret
sp_3072_sqr_12 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_12 PROC
push rbx
push rbp
push r12
push r13
push r14
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 96
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 96
xor r14, r14
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
mov QWORD PTR [rbx+8], r11
; A[0] * B[2]
mulx r10, rax, QWORD PTR [rbp+16]
adcx r12, rax
; A[0] * B[3]
mulx r11, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+16], r12
adcx r10, rax
mov QWORD PTR [rbx+24], r10
; A[0] * B[4]
mulx r12, rax, QWORD PTR [rbp+32]
adcx r11, rax
; A[0] * B[5]
mulx r10, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r11
adcx r12, rax
mov QWORD PTR [rbx+40], r12
; A[0] * B[6]
mulx r11, rax, QWORD PTR [rbp+48]
adcx r10, rax
; A[0] * B[7]
mulx r12, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
mov QWORD PTR [rbx+56], r11
; A[0] * B[8]
mulx r10, rax, QWORD PTR [rbp+64]
adcx r12, rax
; A[0] * B[9]
mulx r11, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
mov QWORD PTR [rbx+72], r10
; A[0] * B[10]
mulx r12, rax, QWORD PTR [rbp+80]
adcx r11, rax
; A[0] * B[11]
mulx r10, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adcx r10, r14
mov r13, r14
adcx r13, r14
mov QWORD PTR [rbx+88], r12
mov QWORD PTR [r8], r10
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r10, QWORD PTR [rbx+24]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+16], r12
mov r11, QWORD PTR [rbx+32]
mov r12, QWORD PTR [rbx+40]
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+32], r11
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+48], r10
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[1] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
; A[1] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[1] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+88], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r10, QWORD PTR [rbx+24]
mov r11, QWORD PTR [rbx+32]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+24], r10
mov r12, QWORD PTR [rbx+40]
mov r10, QWORD PTR [rbx+48]
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+40], r12
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+56], r11
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[2] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
; A[2] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[2] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov rdx, QWORD PTR [r9+24]
mov r10, QWORD PTR [rbx+24]
mov r11, QWORD PTR [rbx+32]
mov r12, QWORD PTR [rbx+40]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+32], r11
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r12, rax
adox r10, rcx
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+48], r10
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r10, rcx
; A[3] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
; A[3] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r11, rax
adox r12, rcx
; A[3] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+8], r11
mov r10, r14
adcx r12, rax
adox r10, rcx
adcx r10, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+16], r12
mov QWORD PTR [r8+24], r10
mov rdx, QWORD PTR [r9+32]
mov r11, QWORD PTR [rbx+32]
mov r12, QWORD PTR [rbx+40]
mov r10, QWORD PTR [rbx+48]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+40], r12
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+56], r11
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[4] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
; A[4] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[4] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+16], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov rdx, QWORD PTR [r9+40]
mov r12, QWORD PTR [rbx+40]
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+48], r10
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[5] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
; A[5] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[5] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+24], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+32], r11
mov QWORD PTR [r8+40], r12
mov rdx, QWORD PTR [r9+48]
mov r10, QWORD PTR [rbx+48]
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+56], r11
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r12, rax
adox r10, rcx
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r10, rcx
; A[6] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
; A[6] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r11, rax
adox r12, rcx
; A[6] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+32], r11
mov r10, r14
adcx r12, rax
adox r10, rcx
adcx r10, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r10
mov rdx, QWORD PTR [r9+56]
mov r11, QWORD PTR [rbx+56]
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+64], r12
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[7] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
; A[7] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[7] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+40], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov rdx, QWORD PTR [r9+64]
mov r12, QWORD PTR [rbx+64]
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+72], r10
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
mov r10, QWORD PTR [r8+48]
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[8] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r12
mov r11, QWORD PTR [r8+56]
; A[8] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[8] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+48], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+56], r11
mov QWORD PTR [r8+64], r12
mov rdx, QWORD PTR [r9+72]
mov r10, QWORD PTR [rbx+72]
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
; A[9] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[9] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+80], r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[9] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r12, rax
adox r10, rcx
; A[9] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[9] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[9] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[9] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r10, rax
adox r11, rcx
; A[9] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[9] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r10, rcx
; A[9] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
; A[9] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r11, rax
adox r12, rcx
; A[9] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+56], r11
mov r10, r14
adcx r12, rax
adox r10, rcx
adcx r10, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+64], r12
mov QWORD PTR [r8+72], r10
mov rdx, QWORD PTR [r9+80]
mov r11, QWORD PTR [rbx+80]
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
; A[10] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[10] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+80], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbx+88], r12
mov r11, QWORD PTR [r8+8]
mov r12, QWORD PTR [r8+16]
; A[10] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r10, rax
adox r11, rcx
; A[10] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+24]
mov r11, QWORD PTR [r8+32]
; A[10] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r10, rcx
; A[10] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r12, QWORD PTR [r8+40]
mov r10, QWORD PTR [r8+48]
; A[10] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r11, rax
adox r12, rcx
; A[10] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r12
mov r11, QWORD PTR [r8+56]
mov r12, QWORD PTR [r8+64]
; A[10] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[10] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+48], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+72]
; A[10] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r12, rax
adox r10, rcx
; A[10] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+64], r12
mov r11, r14
adcx r10, rax
adox r11, rcx
adcx r11, r13
mov r13, r14
adox r13, r14
adcx r13, r14
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov rdx, QWORD PTR [r9+88]
mov r12, QWORD PTR [rbx+88]
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[11] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r10, rcx
; A[11] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+88], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[11] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
adcx r11, rax
adox r12, rcx
; A[11] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8+8], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[11] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[11] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[11] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
adcx r12, rax
adox r10, rcx
; A[11] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r12
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[11] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[11] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+56], r11
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+64], r12
mov r11, QWORD PTR [r8+80]
; A[11] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
adcx r10, rax
adox r11, rcx
; A[11] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+72], r10
mov r12, r14
adcx r11, rax
adox r12, rcx
adcx r12, r13
mov QWORD PTR [r8+80], r11
mov QWORD PTR [r8+88], r12
sub r8, 96
cmp r9, r8
je L_start_3072_mul_avx2_12
cmp rbp, r8
jne L_end_3072_mul_avx2_12
L_start_3072_mul_avx2_12:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbx+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbx+80]
vmovups OWORD PTR [r8+80], xmm0
L_end_3072_mul_avx2_12:
add rsp, 96
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_3072_mul_avx2_12 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_12 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 96
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 96
xor r12, r12
; Diagonal 1
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
mov QWORD PTR [rbp+8], r10
mov r10, r12
; A[2] x A[0]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+16], r11
mov r11, r12
; A[3] x A[0]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+24], r10
mov r10, r12
; A[4] x A[0]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+32], r11
mov r11, r12
; A[5] x A[0]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r10
mov r10, r12
; A[6] x A[0]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+48], r11
mov r11, r12
; A[7] x A[0]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov r14, r10
mov r10, r12
; A[8] x A[0]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov r15, r11
mov r11, r12
; A[9] x A[0]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov rdi, r10
mov r10, r12
; A[10] x A[0]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, rcx
mov rsi, r11
mov r11, r12
; A[11] x A[0]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov rbx, r10
; Carry
adcx r11, r12
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8], r11
; Diagonal 2
mov r11, QWORD PTR [rbp+24]
mov r10, QWORD PTR [rbp+32]
; A[2] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+24], r11
mov r11, QWORD PTR [rbp+40]
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+32], r10
mov r10, QWORD PTR [rbp+48]
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [rbp+40], r11
; No load %r12 - %r9
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r14, rcx
mov QWORD PTR [rbp+48], r10
; No load %r13 - %r8
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r14, rax
adox r15, rcx
; No store %r12
; No load %r14 - %r9
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r15, rax
adox rdi, rcx
; No store %r13
; No load %r15 - %r8
; A[8] x A[1]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rdi, rax
adox rsi, rcx
; No store %r14
; No load %rbx - %r9
; A[9] x A[1]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rsi, rax
adox rbx, rcx
; No store %r15
mov r10, QWORD PTR [r8]
; A[10] x A[1]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rbx, rax
adox r10, rcx
; No store %rbx
mov r11, r12
; A[11] x A[1]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r10, r12
; A[11] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+16], r10
; Diagonal 3
mov r10, QWORD PTR [rbp+40]
mov r11, QWORD PTR [rbp+48]
; A[3] x A[2]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r10
; No load %r12 - %r8
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r14, rcx
mov QWORD PTR [rbp+48], r11
; No load %r13 - %r9
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r14, rax
adox r15, rcx
; No store %r12
; No load %r14 - %r8
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r15, rax
adox rdi, rcx
; No store %r13
; No load %r15 - %r9
; A[7] x A[2]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rdi, rax
adox rsi, rcx
; No store %r14
; No load %rbx - %r8
; A[8] x A[2]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rsi, rax
adox rbx, rcx
; No store %r15
mov r11, QWORD PTR [r8]
; A[9] x A[2]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rbx, rax
adox r11, rcx
; No store %rbx
mov r10, QWORD PTR [r8+8]
; A[10] x A[2]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8], r11
mov r11, QWORD PTR [r8+16]
; A[10] x A[3]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r10
mov r10, r12
; A[10] x A[4]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+16], r11
mov r11, r12
; A[10] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
; Carry
adcx r11, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+32], r11
; Diagonal 4
; No load %r12 - %r9
; No load %r13 - %r8
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r14, rax
adox r15, rcx
; No store %r12
; No load %r14 - %r9
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r15, rax
adox rdi, rcx
; No store %r13
; No load %r15 - %r8
; A[6] x A[3]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rdi, rax
adox rsi, rcx
; No store %r14
; No load %rbx - %r9
; A[7] x A[3]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rsi, rax
adox rbx, rcx
; No store %r15
mov r10, QWORD PTR [r8]
; A[8] x A[3]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r10, rcx
; No store %rbx
mov r11, QWORD PTR [r8+8]
; A[9] x A[3]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r10, QWORD PTR [r8+16]
; A[9] x A[4]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov r11, QWORD PTR [r8+24]
; A[9] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+16], r10
mov r10, QWORD PTR [r8+32]
; A[9] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov r11, r12
; A[9] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r10, r12
; A[9] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+48], r10
; Diagonal 5
; No load %r14 - %r8
; No load %r15 - %r9
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx rdi, rax
adox rsi, rcx
; No store %r14
; No load %rbx - %r8
; A[6] x A[4]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rsi, rax
adox rbx, rcx
; No store %r15
mov r11, QWORD PTR [r8]
; A[7] x A[4]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rbx, rax
adox r11, rcx
; No store %rbx
mov r10, QWORD PTR [r8+8]
; A[8] x A[4]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8], r11
mov r11, QWORD PTR [r8+16]
; A[8] x A[5]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r10
mov r10, QWORD PTR [r8+24]
; A[8] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+16], r11
mov r11, QWORD PTR [r8+32]
; A[8] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r10
mov r10, QWORD PTR [r8+40]
; A[10] x A[6]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+32], r11
mov r11, QWORD PTR [r8+48]
; A[10] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+40], r10
mov r10, r12
; A[10] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+48], r11
mov r11, r12
; A[10] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+56], r10
; Carry
adcx r11, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+64], r11
; Diagonal 6
; No load %rbx - %r9
mov r10, QWORD PTR [r8]
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx rbx, rax
adox r10, rcx
; No store %rbx
mov r11, QWORD PTR [r8+8]
; A[7] x A[5]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8], r10
mov r10, QWORD PTR [r8+16]
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov r11, QWORD PTR [r8+24]
; A[11] x A[3]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+16], r10
mov r10, QWORD PTR [r8+32]
; A[11] x A[4]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov r11, QWORD PTR [r8+40]
; A[11] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r10, QWORD PTR [r8+48]
; A[11] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
mov r11, QWORD PTR [r8+56]
; A[11] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+48], r10
mov r10, QWORD PTR [r8+64]
; A[11] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+56], r11
mov r11, r12
; A[11] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+64], r10
mov r10, r12
; A[11] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r10, rcx
mov QWORD PTR [r8+72], r11
; Carry
adcx r10, r13
mov r13, r12
adcx r13, r12
adox r13, r12
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r13
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
mov r10, QWORD PTR [rbp+32]
mov r11, QWORD PTR [rbp+40]
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+32], r10
mov QWORD PTR [rbp+40], r11
mov r10, QWORD PTR [rbp+48]
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox r10, r10
adox r14, r14
adcx r10, rax
adcx r14, rcx
mov QWORD PTR [rbp+48], r10
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox r15, r15
adox rdi, rdi
adcx r15, rax
adcx rdi, rcx
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox rsi, rsi
adox rbx, rbx
adcx rsi, rax
adcx rbx, rcx
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[9] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[10] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+64], r10
mov QWORD PTR [r8+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
; A[11] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r11
mov QWORD PTR [r8+-40], r14
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 96
cmp r9, r8
jne L_end_3072_sqr_avx2_12
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbp+32]
vmovups OWORD PTR [r8+32], xmm0
mov rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+48], rax
L_end_3072_sqr_avx2_12:
add rsp, 96
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_3072_sqr_avx2_12 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_add_12 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov QWORD PTR [rcx+88], r10
adc rax, 0
ret
sp_3072_add_12 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sub_in_place_24 PROC
mov r8, QWORD PTR [rcx]
xor rax, rax
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+184], r9
sbb rax, 0
ret
sp_3072_sub_in_place_24 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_add_24 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov QWORD PTR [rcx+184], r10
adc rax, 0
ret
sp_3072_add_24 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 616
mov QWORD PTR [rsp+576], rcx
mov QWORD PTR [rsp+584], rdx
mov QWORD PTR [rsp+592], r8
lea r12, QWORD PTR [rsp+384]
lea r14, QWORD PTR [rdx+96]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r12+88], r10
adc r15, 0
mov QWORD PTR [rsp+600], r15
lea r13, QWORD PTR [rsp+480]
lea r14, QWORD PTR [r8+96]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r13+88], r10
adc rdi, 0
mov QWORD PTR [rsp+608], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
lea rcx, QWORD PTR [rsp+192]
add r8, 96
add rdx, 96
call sp_3072_mul_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
call sp_3072_mul_12
IFDEF _WIN64
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
ENDIF
mov r15, QWORD PTR [rsp+600]
mov rdi, QWORD PTR [rsp+608]
mov rsi, QWORD PTR [rsp+576]
mov r11, r15
lea r12, QWORD PTR [rsp+384]
lea r13, QWORD PTR [rsp+480]
and r11, rdi
neg r15
neg rdi
add rsi, 192
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov QWORD PTR [rsi+88], r10
adc r11, 0
lea r13, QWORD PTR [rsp+192]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
sub rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+184], r10
adc r11, 0
mov QWORD PTR [rcx+288], r11
add rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov QWORD PTR [rsi+96], rax
; Add to zero
mov rax, QWORD PTR [r13+104]
adc rax, 0
mov r9, QWORD PTR [r13+112]
mov QWORD PTR [rsi+104], rax
adc r9, 0
mov r10, QWORD PTR [r13+120]
mov QWORD PTR [rsi+112], r9
adc r10, 0
mov rax, QWORD PTR [r13+128]
mov QWORD PTR [rsi+120], r10
adc rax, 0
mov r9, QWORD PTR [r13+136]
mov QWORD PTR [rsi+128], rax
adc r9, 0
mov r10, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], r9
adc r10, 0
mov rax, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r10
adc rax, 0
mov r9, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], rax
adc r9, 0
mov r10, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], r9
adc r10, 0
mov rax, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r10
adc rax, 0
mov r9, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], rax
adc r9, 0
mov QWORD PTR [rsi+184], r9
add rsp, 616
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_24 ENDP
_text ENDS
; /* Add a to a into r. (r = a + a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_dbl_12 PROC
mov r8, QWORD PTR [rdx]
xor rax, rax
add r8, r8
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r8
adc r9, r9
mov r8, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r9
adc r8, r8
mov r9, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r8
adc r9, r9
mov r8, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r9
adc r8, r8
mov r9, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r8
adc r9, r9
mov r8, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r9
adc r8, r8
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r8
adc r9, r9
mov r8, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r9
adc r8, r8
mov r9, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r8
adc r9, r9
mov r8, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r9
adc r8, r8
mov r9, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r8
adc r9, r9
mov QWORD PTR [rcx+88], r9
adc rax, 0
ret
sp_3072_dbl_12 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_24 PROC
push r12
sub rsp, 504
mov QWORD PTR [rsp+480], rcx
mov QWORD PTR [rsp+488], rdx
lea r10, QWORD PTR [rsp+384]
lea r11, QWORD PTR [rdx+96]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov QWORD PTR [r10+88], r8
adc r9, 0
mov QWORD PTR [rsp+496], r9
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_12
mov rdx, QWORD PTR [rsp+488]
lea rcx, QWORD PTR [rsp+192]
add rdx, 96
call sp_3072_sqr_12
mov rdx, QWORD PTR [rsp+488]
mov rcx, QWORD PTR [rsp+480]
call sp_3072_sqr_12
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+488]
mov rcx, QWORD PTR [rsp+480]
ENDIF
mov r12, QWORD PTR [rsp+496]
mov r11, rcx
lea r10, QWORD PTR [rsp+384]
mov r9, r12
neg r12
add r11, 192
mov rax, QWORD PTR [r10]
mov r8, QWORD PTR [r10+8]
and rax, r12
and r8, r12
mov QWORD PTR [r11], rax
mov QWORD PTR [r11+8], r8
mov rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r10+24]
and rax, r12
and r8, r12
mov QWORD PTR [r11+16], rax
mov QWORD PTR [r11+24], r8
mov rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r10+40]
and rax, r12
and r8, r12
mov QWORD PTR [r11+32], rax
mov QWORD PTR [r11+40], r8
mov rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r10+56]
and rax, r12
and r8, r12
mov QWORD PTR [r11+48], rax
mov QWORD PTR [r11+56], r8
mov rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r10+72]
and rax, r12
and r8, r12
mov QWORD PTR [r11+64], rax
mov QWORD PTR [r11+72], r8
mov rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r10+88]
and rax, r12
and r8, r12
mov QWORD PTR [r11+80], rax
mov QWORD PTR [r11+88], r8
mov rax, QWORD PTR [r11]
add rax, rax
mov r8, QWORD PTR [r11+8]
mov QWORD PTR [r11], rax
adc r8, r8
mov rax, QWORD PTR [r11+16]
mov QWORD PTR [r11+8], r8
adc rax, rax
mov r8, QWORD PTR [r11+24]
mov QWORD PTR [r11+16], rax
adc r8, r8
mov rax, QWORD PTR [r11+32]
mov QWORD PTR [r11+24], r8
adc rax, rax
mov r8, QWORD PTR [r11+40]
mov QWORD PTR [r11+32], rax
adc r8, r8
mov rax, QWORD PTR [r11+48]
mov QWORD PTR [r11+40], r8
adc rax, rax
mov r8, QWORD PTR [r11+56]
mov QWORD PTR [r11+48], rax
adc r8, r8
mov rax, QWORD PTR [r11+64]
mov QWORD PTR [r11+56], r8
adc rax, rax
mov r8, QWORD PTR [r11+72]
mov QWORD PTR [r11+64], rax
adc r8, r8
mov rax, QWORD PTR [r11+80]
mov QWORD PTR [r11+72], r8
adc rax, rax
mov r8, QWORD PTR [r11+88]
mov QWORD PTR [r11+80], rax
adc r8, r8
mov QWORD PTR [r11+88], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+192]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+184], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov QWORD PTR [r10+184], r8
sbb r9, 0
sub r11, 96
; Add in place
mov rax, QWORD PTR [r11]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [r11+8]
mov QWORD PTR [r11], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [r11+16]
mov QWORD PTR [r11+8], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r11+24]
mov QWORD PTR [r11+16], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [r11+32]
mov QWORD PTR [r11+24], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r11+40]
mov QWORD PTR [r11+32], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [r11+48]
mov QWORD PTR [r11+40], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r11+56]
mov QWORD PTR [r11+48], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [r11+64]
mov QWORD PTR [r11+56], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r11+72]
mov QWORD PTR [r11+64], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [r11+80]
mov QWORD PTR [r11+72], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r11+88]
mov QWORD PTR [r11+80], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [r11+96]
mov QWORD PTR [r11+88], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, QWORD PTR [r10+184]
mov QWORD PTR [r11+184], r8
adc r9, 0
mov QWORD PTR [rcx+288], r9
; Add in place
mov rax, QWORD PTR [r11+96]
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r11+192]
mov QWORD PTR [r11+184], r8
adc rax, QWORD PTR [rdx+96]
mov QWORD PTR [r11+192], rax
; Add to zero
mov rax, QWORD PTR [rdx+104]
adc rax, 0
mov r8, QWORD PTR [rdx+112]
mov QWORD PTR [r11+200], rax
adc r8, 0
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r11+208], r8
adc rax, 0
mov r8, QWORD PTR [rdx+128]
mov QWORD PTR [r11+216], rax
adc r8, 0
mov rax, QWORD PTR [rdx+136]
mov QWORD PTR [r11+224], r8
adc rax, 0
mov r8, QWORD PTR [rdx+144]
mov QWORD PTR [r11+232], rax
adc r8, 0
mov rax, QWORD PTR [rdx+152]
mov QWORD PTR [r11+240], r8
adc rax, 0
mov r8, QWORD PTR [rdx+160]
mov QWORD PTR [r11+248], rax
adc r8, 0
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r11+256], r8
adc rax, 0
mov r8, QWORD PTR [rdx+176]
mov QWORD PTR [r11+264], rax
adc r8, 0
mov rax, QWORD PTR [rdx+184]
mov QWORD PTR [r11+272], r8
adc rax, 0
mov QWORD PTR [r11+280], rax
add rsp, 504
pop r12
ret
sp_3072_sqr_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 616
mov QWORD PTR [rsp+576], rcx
mov QWORD PTR [rsp+584], rdx
mov QWORD PTR [rsp+592], r8
lea r12, QWORD PTR [rsp+384]
lea r14, QWORD PTR [rdx+96]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r12+88], r10
adc r15, 0
mov QWORD PTR [rsp+600], r15
lea r13, QWORD PTR [rsp+480]
lea r14, QWORD PTR [r8+96]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov QWORD PTR [r13+88], r10
adc rdi, 0
mov QWORD PTR [rsp+608], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_avx2_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
lea rcx, QWORD PTR [rsp+192]
add r8, 96
add rdx, 96
call sp_3072_mul_avx2_12
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
call sp_3072_mul_avx2_12
IFDEF _WIN64
mov r8, QWORD PTR [rsp+592]
mov rdx, QWORD PTR [rsp+584]
mov rcx, QWORD PTR [rsp+576]
ENDIF
mov r15, QWORD PTR [rsp+600]
mov rdi, QWORD PTR [rsp+608]
mov rsi, QWORD PTR [rsp+576]
mov r11, r15
lea r12, QWORD PTR [rsp+384]
lea r13, QWORD PTR [rsp+480]
and r11, rdi
neg r15
neg rdi
add rsi, 192
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov QWORD PTR [rsi+88], r10
adc r11, 0
lea r13, QWORD PTR [rsp+192]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov QWORD PTR [r12+184], r10
sbb r11, 0
sub rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+184], r10
adc r11, 0
mov QWORD PTR [rcx+288], r11
add rsi, 96
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov QWORD PTR [rsi+96], rax
; Add to zero
mov rax, QWORD PTR [r13+104]
adc rax, 0
mov r9, QWORD PTR [r13+112]
mov QWORD PTR [rsi+104], rax
adc r9, 0
mov r10, QWORD PTR [r13+120]
mov QWORD PTR [rsi+112], r9
adc r10, 0
mov rax, QWORD PTR [r13+128]
mov QWORD PTR [rsi+120], r10
adc rax, 0
mov r9, QWORD PTR [r13+136]
mov QWORD PTR [rsi+128], rax
adc r9, 0
mov r10, QWORD PTR [r13+144]
mov QWORD PTR [rsi+136], r9
adc r10, 0
mov rax, QWORD PTR [r13+152]
mov QWORD PTR [rsi+144], r10
adc rax, 0
mov r9, QWORD PTR [r13+160]
mov QWORD PTR [rsi+152], rax
adc r9, 0
mov r10, QWORD PTR [r13+168]
mov QWORD PTR [rsi+160], r9
adc r10, 0
mov rax, QWORD PTR [r13+176]
mov QWORD PTR [rsi+168], r10
adc rax, 0
mov r9, QWORD PTR [r13+184]
mov QWORD PTR [rsi+176], rax
adc r9, 0
mov QWORD PTR [rsi+184], r9
add rsp, 616
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_avx2_24 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_24 PROC
push r12
sub rsp, 504
mov QWORD PTR [rsp+480], rcx
mov QWORD PTR [rsp+488], rdx
lea r10, QWORD PTR [rsp+384]
lea r11, QWORD PTR [rdx+96]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov QWORD PTR [r10+88], r8
adc r9, 0
mov QWORD PTR [rsp+496], r9
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_avx2_12
mov rdx, QWORD PTR [rsp+488]
lea rcx, QWORD PTR [rsp+192]
add rdx, 96
call sp_3072_sqr_avx2_12
mov rdx, QWORD PTR [rsp+488]
mov rcx, QWORD PTR [rsp+480]
call sp_3072_sqr_avx2_12
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+488]
mov rcx, QWORD PTR [rsp+480]
ENDIF
mov r12, QWORD PTR [rsp+496]
mov r11, rcx
lea r10, QWORD PTR [rsp+384]
mov r9, r12
neg r12
add r11, 192
mov rax, QWORD PTR [r10]
pext rax, rax, r12
add rax, rax
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r11], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r11+8], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r11+16], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r11+24], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r11+32], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r11+40], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r11+48], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r11+56], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r11+64], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r11+72], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r11+80], rax
pext r8, r8, r12
adc r8, r8
mov QWORD PTR [r11+88], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+192]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+184], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov QWORD PTR [r10+184], r8
sbb r9, 0
sub r11, 96
; Add in place
mov rax, QWORD PTR [r11]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [r11+8]
mov QWORD PTR [r11], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [r11+16]
mov QWORD PTR [r11+8], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r11+24]
mov QWORD PTR [r11+16], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [r11+32]
mov QWORD PTR [r11+24], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r11+40]
mov QWORD PTR [r11+32], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [r11+48]
mov QWORD PTR [r11+40], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r11+56]
mov QWORD PTR [r11+48], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [r11+64]
mov QWORD PTR [r11+56], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r11+72]
mov QWORD PTR [r11+64], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [r11+80]
mov QWORD PTR [r11+72], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r11+88]
mov QWORD PTR [r11+80], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [r11+96]
mov QWORD PTR [r11+88], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, QWORD PTR [r10+184]
mov QWORD PTR [r11+184], r8
adc r9, 0
mov QWORD PTR [rcx+288], r9
; Add in place
mov rax, QWORD PTR [r11+96]
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r11+192]
mov QWORD PTR [r11+184], r8
adc rax, QWORD PTR [rdx+96]
mov QWORD PTR [r11+192], rax
; Add to zero
mov rax, QWORD PTR [rdx+104]
adc rax, 0
mov r8, QWORD PTR [rdx+112]
mov QWORD PTR [r11+200], rax
adc r8, 0
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r11+208], r8
adc rax, 0
mov r8, QWORD PTR [rdx+128]
mov QWORD PTR [r11+216], rax
adc r8, 0
mov rax, QWORD PTR [rdx+136]
mov QWORD PTR [r11+224], r8
adc rax, 0
mov r8, QWORD PTR [rdx+144]
mov QWORD PTR [r11+232], rax
adc r8, 0
mov rax, QWORD PTR [rdx+152]
mov QWORD PTR [r11+240], r8
adc rax, 0
mov r8, QWORD PTR [rdx+160]
mov QWORD PTR [r11+248], rax
adc r8, 0
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r11+256], r8
adc rax, 0
mov r8, QWORD PTR [rdx+176]
mov QWORD PTR [r11+264], rax
adc r8, 0
mov rax, QWORD PTR [rdx+184]
mov QWORD PTR [r11+272], r8
adc rax, 0
mov QWORD PTR [r11+280], rax
add rsp, 504
pop r12
ret
sp_3072_sqr_avx2_24 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sub_in_place_48 PROC
mov r8, QWORD PTR [rcx]
xor rax, rax
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r9
sbb r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb r9, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r9
sbb r8, QWORD PTR [rdx+208]
mov r9, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb r9, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r9
sbb r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb r9, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r9
sbb r8, QWORD PTR [rdx+240]
mov r9, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb r9, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rcx+256]
mov QWORD PTR [rcx+248], r9
sbb r8, QWORD PTR [rdx+256]
mov r9, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], r8
sbb r9, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r9
sbb r8, QWORD PTR [rdx+272]
mov r9, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], r8
sbb r9, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r9
sbb r8, QWORD PTR [rdx+288]
mov r9, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], r8
sbb r9, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r9
sbb r8, QWORD PTR [rdx+304]
mov r9, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], r8
sbb r9, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r9
sbb r8, QWORD PTR [rdx+320]
mov r9, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], r8
sbb r9, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r9
sbb r8, QWORD PTR [rdx+336]
mov r9, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], r8
sbb r9, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r9
sbb r8, QWORD PTR [rdx+352]
mov r9, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], r8
sbb r9, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r9
sbb r8, QWORD PTR [rdx+368]
mov r9, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], r8
sbb r9, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+376], r9
sbb rax, 0
ret
sp_3072_sub_in_place_48 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_add_48 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
adc r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
adc r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
adc r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
adc r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
adc r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
adc r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
adc r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
adc r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
adc r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
adc r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
adc r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
adc r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
adc r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
adc r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
adc r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
adc r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
adc r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
adc r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
adc r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
adc r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
adc r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
adc r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
adc r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
adc r10, QWORD PTR [r8+376]
mov QWORD PTR [rcx+376], r10
adc rax, 0
ret
sp_3072_add_48 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1192
mov QWORD PTR [rsp+1152], rcx
mov QWORD PTR [rsp+1160], rdx
mov QWORD PTR [rsp+1168], r8
lea r12, QWORD PTR [rsp+768]
lea r14, QWORD PTR [rdx+192]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r12+184], r10
adc r15, 0
mov QWORD PTR [rsp+1176], r15
lea r13, QWORD PTR [rsp+960]
lea r14, QWORD PTR [r8+192]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r13+184], r10
adc rdi, 0
mov QWORD PTR [rsp+1184], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
lea rcx, QWORD PTR [rsp+384]
add r8, 192
add rdx, 192
call sp_3072_mul_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
call sp_3072_mul_24
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
ENDIF
mov r15, QWORD PTR [rsp+1176]
mov rdi, QWORD PTR [rsp+1184]
mov rsi, QWORD PTR [rsp+1152]
mov r11, r15
lea r12, QWORD PTR [rsp+768]
lea r13, QWORD PTR [rsp+960]
and r11, rdi
neg r15
neg rdi
add rsi, 384
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+96], rax
mov QWORD PTR [r13+96], r9
mov rax, QWORD PTR [r12+104]
mov r9, QWORD PTR [r13+104]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+104], rax
mov QWORD PTR [r13+104], r9
mov rax, QWORD PTR [r12+112]
mov r9, QWORD PTR [r13+112]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+112], rax
mov QWORD PTR [r13+112], r9
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+120], rax
mov QWORD PTR [r13+120], r9
mov rax, QWORD PTR [r12+128]
mov r9, QWORD PTR [r13+128]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+128], rax
mov QWORD PTR [r13+128], r9
mov rax, QWORD PTR [r12+136]
mov r9, QWORD PTR [r13+136]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+136], rax
mov QWORD PTR [r13+136], r9
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+144], rax
mov QWORD PTR [r13+144], r9
mov rax, QWORD PTR [r12+152]
mov r9, QWORD PTR [r13+152]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+152], rax
mov QWORD PTR [r13+152], r9
mov rax, QWORD PTR [r12+160]
mov r9, QWORD PTR [r13+160]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+160], rax
mov QWORD PTR [r13+160], r9
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+168], rax
mov QWORD PTR [r13+168], r9
mov rax, QWORD PTR [r12+176]
mov r9, QWORD PTR [r13+176]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+176], rax
mov QWORD PTR [r13+176], r9
mov rax, QWORD PTR [r12+184]
mov r9, QWORD PTR [r13+184]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+184], rax
mov QWORD PTR [r13+184], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov QWORD PTR [rsi+184], r10
adc r11, 0
lea r13, QWORD PTR [rsp+384]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
sub rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov QWORD PTR [rsi+376], r10
adc r11, 0
mov QWORD PTR [rcx+576], r11
add rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov QWORD PTR [rsi+192], rax
; Add to zero
mov rax, QWORD PTR [r13+200]
adc rax, 0
mov r9, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], rax
adc r9, 0
mov r10, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], r9
adc r10, 0
mov rax, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r10
adc rax, 0
mov r9, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], rax
adc r9, 0
mov r10, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], r9
adc r10, 0
mov rax, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r10
adc rax, 0
mov r9, QWORD PTR [r13+256]
mov QWORD PTR [rsi+248], rax
adc r9, 0
mov r10, QWORD PTR [r13+264]
mov QWORD PTR [rsi+256], r9
adc r10, 0
mov rax, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], r10
adc rax, 0
mov r9, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], rax
adc r9, 0
mov r10, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r9
adc r10, 0
mov rax, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], r10
adc rax, 0
mov r9, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], rax
adc r9, 0
mov r10, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r9
adc r10, 0
mov rax, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], r10
adc rax, 0
mov r9, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], rax
adc r9, 0
mov r10, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r9
adc r10, 0
mov rax, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], r10
adc rax, 0
mov r9, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], rax
adc r9, 0
mov r10, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r9
adc r10, 0
mov rax, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], r10
adc rax, 0
mov r9, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], rax
adc r9, 0
mov QWORD PTR [rsi+376], r9
add rsp, 1192
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_48 ENDP
_text ENDS
; /* Add a to a into r. (r = a + a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_dbl_24 PROC
mov r8, QWORD PTR [rdx]
xor rax, rax
add r8, r8
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r8
adc r9, r9
mov r8, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r9
adc r8, r8
mov r9, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r8
adc r9, r9
mov r8, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r9
adc r8, r8
mov r9, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r8
adc r9, r9
mov r8, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r9
adc r8, r8
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r8
adc r9, r9
mov r8, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r9
adc r8, r8
mov r9, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r8
adc r9, r9
mov r8, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r9
adc r8, r8
mov r9, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r8
adc r9, r9
mov r8, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r9
adc r8, r8
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r8
adc r9, r9
mov r8, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r9
adc r8, r8
mov r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r8
adc r9, r9
mov r8, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r9
adc r8, r8
mov r9, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r8
adc r9, r9
mov r8, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r9
adc r8, r8
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r8
adc r9, r9
mov r8, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r9
adc r8, r8
mov r9, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r8
adc r9, r9
mov r8, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r9
adc r8, r8
mov r9, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r8
adc r9, r9
mov QWORD PTR [rcx+184], r9
adc rax, 0
ret
sp_3072_dbl_24 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_48 PROC
push r12
sub rsp, 984
mov QWORD PTR [rsp+960], rcx
mov QWORD PTR [rsp+968], rdx
lea r10, QWORD PTR [rsp+768]
lea r11, QWORD PTR [rdx+192]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
adc rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
adc r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
adc rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
adc r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
adc rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
adc r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
adc rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
adc r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
adc rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
adc r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
adc rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
adc r8, QWORD PTR [r11+184]
mov QWORD PTR [r10+184], r8
adc r9, 0
mov QWORD PTR [rsp+976], r9
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_24
mov rdx, QWORD PTR [rsp+968]
lea rcx, QWORD PTR [rsp+384]
add rdx, 192
call sp_3072_sqr_24
mov rdx, QWORD PTR [rsp+968]
mov rcx, QWORD PTR [rsp+960]
call sp_3072_sqr_24
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+968]
mov rcx, QWORD PTR [rsp+960]
ENDIF
mov r12, QWORD PTR [rsp+976]
mov r11, rcx
lea r10, QWORD PTR [rsp+768]
mov r9, r12
neg r12
add r11, 384
mov rax, QWORD PTR [r10]
mov r8, QWORD PTR [r10+8]
and rax, r12
and r8, r12
mov QWORD PTR [r11], rax
mov QWORD PTR [r11+8], r8
mov rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r10+24]
and rax, r12
and r8, r12
mov QWORD PTR [r11+16], rax
mov QWORD PTR [r11+24], r8
mov rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r10+40]
and rax, r12
and r8, r12
mov QWORD PTR [r11+32], rax
mov QWORD PTR [r11+40], r8
mov rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r10+56]
and rax, r12
and r8, r12
mov QWORD PTR [r11+48], rax
mov QWORD PTR [r11+56], r8
mov rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r10+72]
and rax, r12
and r8, r12
mov QWORD PTR [r11+64], rax
mov QWORD PTR [r11+72], r8
mov rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r10+88]
and rax, r12
and r8, r12
mov QWORD PTR [r11+80], rax
mov QWORD PTR [r11+88], r8
mov rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r10+104]
and rax, r12
and r8, r12
mov QWORD PTR [r11+96], rax
mov QWORD PTR [r11+104], r8
mov rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r10+120]
and rax, r12
and r8, r12
mov QWORD PTR [r11+112], rax
mov QWORD PTR [r11+120], r8
mov rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [r10+136]
and rax, r12
and r8, r12
mov QWORD PTR [r11+128], rax
mov QWORD PTR [r11+136], r8
mov rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [r10+152]
and rax, r12
and r8, r12
mov QWORD PTR [r11+144], rax
mov QWORD PTR [r11+152], r8
mov rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [r10+168]
and rax, r12
and r8, r12
mov QWORD PTR [r11+160], rax
mov QWORD PTR [r11+168], r8
mov rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [r10+184]
and rax, r12
and r8, r12
mov QWORD PTR [r11+176], rax
mov QWORD PTR [r11+184], r8
mov rax, QWORD PTR [r11]
add rax, rax
mov r8, QWORD PTR [r11+8]
mov QWORD PTR [r11], rax
adc r8, r8
mov rax, QWORD PTR [r11+16]
mov QWORD PTR [r11+8], r8
adc rax, rax
mov r8, QWORD PTR [r11+24]
mov QWORD PTR [r11+16], rax
adc r8, r8
mov rax, QWORD PTR [r11+32]
mov QWORD PTR [r11+24], r8
adc rax, rax
mov r8, QWORD PTR [r11+40]
mov QWORD PTR [r11+32], rax
adc r8, r8
mov rax, QWORD PTR [r11+48]
mov QWORD PTR [r11+40], r8
adc rax, rax
mov r8, QWORD PTR [r11+56]
mov QWORD PTR [r11+48], rax
adc r8, r8
mov rax, QWORD PTR [r11+64]
mov QWORD PTR [r11+56], r8
adc rax, rax
mov r8, QWORD PTR [r11+72]
mov QWORD PTR [r11+64], rax
adc r8, r8
mov rax, QWORD PTR [r11+80]
mov QWORD PTR [r11+72], r8
adc rax, rax
mov r8, QWORD PTR [r11+88]
mov QWORD PTR [r11+80], rax
adc r8, r8
mov rax, QWORD PTR [r11+96]
mov QWORD PTR [r11+88], r8
adc rax, rax
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, r8
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, rax
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, r8
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, rax
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, r8
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, rax
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, r8
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, rax
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, r8
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, rax
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, r8
mov QWORD PTR [r11+184], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+384]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rdx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rdx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rdx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rdx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rdx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rdx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rdx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rdx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rdx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rdx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rdx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rdx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rdx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rdx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rdx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rdx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rdx+376]
mov QWORD PTR [r10+376], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rcx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rcx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rcx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rcx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rcx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rcx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rcx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rcx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rcx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rcx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rcx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rcx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rcx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rcx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rcx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rcx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rcx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rcx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rcx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rcx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rcx+376]
mov QWORD PTR [r10+376], r8
sbb r9, 0
sub r11, 192
; Add in place
mov rax, QWORD PTR [r11]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [r11+8]
mov QWORD PTR [r11], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [r11+16]
mov QWORD PTR [r11+8], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r11+24]
mov QWORD PTR [r11+16], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [r11+32]
mov QWORD PTR [r11+24], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r11+40]
mov QWORD PTR [r11+32], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [r11+48]
mov QWORD PTR [r11+40], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r11+56]
mov QWORD PTR [r11+48], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [r11+64]
mov QWORD PTR [r11+56], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r11+72]
mov QWORD PTR [r11+64], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [r11+80]
mov QWORD PTR [r11+72], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r11+88]
mov QWORD PTR [r11+80], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [r11+96]
mov QWORD PTR [r11+88], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, QWORD PTR [r10+184]
mov rax, QWORD PTR [r11+192]
mov QWORD PTR [r11+184], r8
adc rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [r11+200]
mov QWORD PTR [r11+192], rax
adc r8, QWORD PTR [r10+200]
mov rax, QWORD PTR [r11+208]
mov QWORD PTR [r11+200], r8
adc rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [r11+216]
mov QWORD PTR [r11+208], rax
adc r8, QWORD PTR [r10+216]
mov rax, QWORD PTR [r11+224]
mov QWORD PTR [r11+216], r8
adc rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [r11+232]
mov QWORD PTR [r11+224], rax
adc r8, QWORD PTR [r10+232]
mov rax, QWORD PTR [r11+240]
mov QWORD PTR [r11+232], r8
adc rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [r11+248]
mov QWORD PTR [r11+240], rax
adc r8, QWORD PTR [r10+248]
mov rax, QWORD PTR [r11+256]
mov QWORD PTR [r11+248], r8
adc rax, QWORD PTR [r10+256]
mov r8, QWORD PTR [r11+264]
mov QWORD PTR [r11+256], rax
adc r8, QWORD PTR [r10+264]
mov rax, QWORD PTR [r11+272]
mov QWORD PTR [r11+264], r8
adc rax, QWORD PTR [r10+272]
mov r8, QWORD PTR [r11+280]
mov QWORD PTR [r11+272], rax
adc r8, QWORD PTR [r10+280]
mov rax, QWORD PTR [r11+288]
mov QWORD PTR [r11+280], r8
adc rax, QWORD PTR [r10+288]
mov r8, QWORD PTR [r11+296]
mov QWORD PTR [r11+288], rax
adc r8, QWORD PTR [r10+296]
mov rax, QWORD PTR [r11+304]
mov QWORD PTR [r11+296], r8
adc rax, QWORD PTR [r10+304]
mov r8, QWORD PTR [r11+312]
mov QWORD PTR [r11+304], rax
adc r8, QWORD PTR [r10+312]
mov rax, QWORD PTR [r11+320]
mov QWORD PTR [r11+312], r8
adc rax, QWORD PTR [r10+320]
mov r8, QWORD PTR [r11+328]
mov QWORD PTR [r11+320], rax
adc r8, QWORD PTR [r10+328]
mov rax, QWORD PTR [r11+336]
mov QWORD PTR [r11+328], r8
adc rax, QWORD PTR [r10+336]
mov r8, QWORD PTR [r11+344]
mov QWORD PTR [r11+336], rax
adc r8, QWORD PTR [r10+344]
mov rax, QWORD PTR [r11+352]
mov QWORD PTR [r11+344], r8
adc rax, QWORD PTR [r10+352]
mov r8, QWORD PTR [r11+360]
mov QWORD PTR [r11+352], rax
adc r8, QWORD PTR [r10+360]
mov rax, QWORD PTR [r11+368]
mov QWORD PTR [r11+360], r8
adc rax, QWORD PTR [r10+368]
mov r8, QWORD PTR [r11+376]
mov QWORD PTR [r11+368], rax
adc r8, QWORD PTR [r10+376]
mov QWORD PTR [r11+376], r8
adc r9, 0
mov QWORD PTR [rcx+576], r9
; Add in place
mov rax, QWORD PTR [r11+192]
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r11+200]
mov QWORD PTR [r11+192], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r11+208]
mov QWORD PTR [r11+200], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r11+216]
mov QWORD PTR [r11+208], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r11+224]
mov QWORD PTR [r11+216], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r11+232]
mov QWORD PTR [r11+224], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r11+240]
mov QWORD PTR [r11+232], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r11+248]
mov QWORD PTR [r11+240], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r11+256]
mov QWORD PTR [r11+248], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r11+264]
mov QWORD PTR [r11+256], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r11+272]
mov QWORD PTR [r11+264], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r11+280]
mov QWORD PTR [r11+272], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r11+288]
mov QWORD PTR [r11+280], r8
adc rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r11+296]
mov QWORD PTR [r11+288], rax
adc r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r11+304]
mov QWORD PTR [r11+296], r8
adc rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r11+312]
mov QWORD PTR [r11+304], rax
adc r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r11+320]
mov QWORD PTR [r11+312], r8
adc rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r11+328]
mov QWORD PTR [r11+320], rax
adc r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r11+336]
mov QWORD PTR [r11+328], r8
adc rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r11+344]
mov QWORD PTR [r11+336], rax
adc r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r11+352]
mov QWORD PTR [r11+344], r8
adc rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r11+360]
mov QWORD PTR [r11+352], rax
adc r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r11+368]
mov QWORD PTR [r11+360], r8
adc rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r11+376]
mov QWORD PTR [r11+368], rax
adc r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r11+384]
mov QWORD PTR [r11+376], r8
adc rax, QWORD PTR [rdx+192]
mov QWORD PTR [r11+384], rax
; Add to zero
mov rax, QWORD PTR [rdx+200]
adc rax, 0
mov r8, QWORD PTR [rdx+208]
mov QWORD PTR [r11+392], rax
adc r8, 0
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [r11+400], r8
adc rax, 0
mov r8, QWORD PTR [rdx+224]
mov QWORD PTR [r11+408], rax
adc r8, 0
mov rax, QWORD PTR [rdx+232]
mov QWORD PTR [r11+416], r8
adc rax, 0
mov r8, QWORD PTR [rdx+240]
mov QWORD PTR [r11+424], rax
adc r8, 0
mov rax, QWORD PTR [rdx+248]
mov QWORD PTR [r11+432], r8
adc rax, 0
mov r8, QWORD PTR [rdx+256]
mov QWORD PTR [r11+440], rax
adc r8, 0
mov rax, QWORD PTR [rdx+264]
mov QWORD PTR [r11+448], r8
adc rax, 0
mov r8, QWORD PTR [rdx+272]
mov QWORD PTR [r11+456], rax
adc r8, 0
mov rax, QWORD PTR [rdx+280]
mov QWORD PTR [r11+464], r8
adc rax, 0
mov r8, QWORD PTR [rdx+288]
mov QWORD PTR [r11+472], rax
adc r8, 0
mov rax, QWORD PTR [rdx+296]
mov QWORD PTR [r11+480], r8
adc rax, 0
mov r8, QWORD PTR [rdx+304]
mov QWORD PTR [r11+488], rax
adc r8, 0
mov rax, QWORD PTR [rdx+312]
mov QWORD PTR [r11+496], r8
adc rax, 0
mov r8, QWORD PTR [rdx+320]
mov QWORD PTR [r11+504], rax
adc r8, 0
mov rax, QWORD PTR [rdx+328]
mov QWORD PTR [r11+512], r8
adc rax, 0
mov r8, QWORD PTR [rdx+336]
mov QWORD PTR [r11+520], rax
adc r8, 0
mov rax, QWORD PTR [rdx+344]
mov QWORD PTR [r11+528], r8
adc rax, 0
mov r8, QWORD PTR [rdx+352]
mov QWORD PTR [r11+536], rax
adc r8, 0
mov rax, QWORD PTR [rdx+360]
mov QWORD PTR [r11+544], r8
adc rax, 0
mov r8, QWORD PTR [rdx+368]
mov QWORD PTR [r11+552], rax
adc r8, 0
mov rax, QWORD PTR [rdx+376]
mov QWORD PTR [r11+560], r8
adc rax, 0
mov QWORD PTR [r11+568], rax
add rsp, 984
pop r12
ret
sp_3072_sqr_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_avx2_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1192
mov QWORD PTR [rsp+1152], rcx
mov QWORD PTR [rsp+1160], rdx
mov QWORD PTR [rsp+1168], r8
lea r12, QWORD PTR [rsp+768]
lea r14, QWORD PTR [rdx+192]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r12+184], r10
adc r15, 0
mov QWORD PTR [rsp+1176], r15
lea r13, QWORD PTR [rsp+960]
lea r14, QWORD PTR [r8+192]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov QWORD PTR [r13+184], r10
adc rdi, 0
mov QWORD PTR [rsp+1184], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_3072_mul_avx2_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
lea rcx, QWORD PTR [rsp+384]
add r8, 192
add rdx, 192
call sp_3072_mul_avx2_24
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
call sp_3072_mul_avx2_24
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1168]
mov rdx, QWORD PTR [rsp+1160]
mov rcx, QWORD PTR [rsp+1152]
ENDIF
mov r15, QWORD PTR [rsp+1176]
mov rdi, QWORD PTR [rsp+1184]
mov rsi, QWORD PTR [rsp+1152]
mov r11, r15
lea r12, QWORD PTR [rsp+768]
lea r13, QWORD PTR [rsp+960]
and r11, rdi
neg r15
neg rdi
add rsi, 384
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+88], r10
adc rax, r9
mov r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [r13+104]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+96], rax
adc r9, r10
mov r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [r13+112]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+104], r9
adc r10, rax
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+112], r10
adc rax, r9
mov r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [r13+128]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+120], rax
adc r9, r10
mov r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [r13+136]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+128], r9
adc r10, rax
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+136], r10
adc rax, r9
mov r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [r13+152]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+144], rax
adc r9, r10
mov r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [r13+160]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+152], r9
adc r10, rax
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+160], r10
adc rax, r9
mov r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [r13+176]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+168], rax
adc r9, r10
mov r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [r13+184]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+176], r9
adc r10, rax
mov QWORD PTR [rsi+184], r10
adc r11, 0
lea r13, QWORD PTR [rsp+384]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov QWORD PTR [r12+376], r10
sbb r11, 0
sub rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov QWORD PTR [rsi+376], r10
adc r11, 0
mov QWORD PTR [rcx+576], r11
add rsi, 192
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov QWORD PTR [rsi+192], rax
; Add to zero
mov rax, QWORD PTR [r13+200]
adc rax, 0
mov r9, QWORD PTR [r13+208]
mov QWORD PTR [rsi+200], rax
adc r9, 0
mov r10, QWORD PTR [r13+216]
mov QWORD PTR [rsi+208], r9
adc r10, 0
mov rax, QWORD PTR [r13+224]
mov QWORD PTR [rsi+216], r10
adc rax, 0
mov r9, QWORD PTR [r13+232]
mov QWORD PTR [rsi+224], rax
adc r9, 0
mov r10, QWORD PTR [r13+240]
mov QWORD PTR [rsi+232], r9
adc r10, 0
mov rax, QWORD PTR [r13+248]
mov QWORD PTR [rsi+240], r10
adc rax, 0
mov r9, QWORD PTR [r13+256]
mov QWORD PTR [rsi+248], rax
adc r9, 0
mov r10, QWORD PTR [r13+264]
mov QWORD PTR [rsi+256], r9
adc r10, 0
mov rax, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], r10
adc rax, 0
mov r9, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], rax
adc r9, 0
mov r10, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r9
adc r10, 0
mov rax, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], r10
adc rax, 0
mov r9, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], rax
adc r9, 0
mov r10, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r9
adc r10, 0
mov rax, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], r10
adc rax, 0
mov r9, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], rax
adc r9, 0
mov r10, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r9
adc r10, 0
mov rax, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], r10
adc rax, 0
mov r9, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], rax
adc r9, 0
mov r10, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r9
adc r10, 0
mov rax, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], r10
adc rax, 0
mov r9, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], rax
adc r9, 0
mov QWORD PTR [rsi+376], r9
add rsp, 1192
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mul_avx2_48 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sqr_avx2_48 PROC
push r12
sub rsp, 984
mov QWORD PTR [rsp+960], rcx
mov QWORD PTR [rsp+968], rdx
lea r10, QWORD PTR [rsp+768]
lea r11, QWORD PTR [rdx+192]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
adc rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
adc r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
adc rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
adc r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
adc rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
adc r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
adc rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
adc r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
adc rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
adc r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
adc rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
adc r8, QWORD PTR [r11+184]
mov QWORD PTR [r10+184], r8
adc r9, 0
mov QWORD PTR [rsp+976], r9
mov rdx, r10
mov rcx, rsp
call sp_3072_sqr_avx2_24
mov rdx, QWORD PTR [rsp+968]
lea rcx, QWORD PTR [rsp+384]
add rdx, 192
call sp_3072_sqr_avx2_24
mov rdx, QWORD PTR [rsp+968]
mov rcx, QWORD PTR [rsp+960]
call sp_3072_sqr_avx2_24
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+968]
mov rcx, QWORD PTR [rsp+960]
ENDIF
mov r12, QWORD PTR [rsp+976]
mov r11, rcx
lea r10, QWORD PTR [rsp+768]
mov r9, r12
neg r12
add r11, 384
mov rax, QWORD PTR [r10]
pext rax, rax, r12
add rax, rax
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r11], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r11+8], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r11+16], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r11+24], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r11+32], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r11+40], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r11+48], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r11+56], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r11+64], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r11+72], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r11+80], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r11+88], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r11+96], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r11+104], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r11+112], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r11+120], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r11+128], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r11+136], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r11+144], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r11+152], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r11+160], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r11+168], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r11+176], rax
pext r8, r8, r12
adc r8, r8
mov QWORD PTR [r11+184], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+384]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rdx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rdx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rdx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rdx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rdx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rdx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rdx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rdx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rdx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rdx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rdx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rdx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rdx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rdx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rdx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rdx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rdx+376]
mov QWORD PTR [r10+376], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rcx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rcx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rcx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rcx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rcx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rcx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rcx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rcx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rcx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rcx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rcx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rcx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rcx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rcx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rcx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rcx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rcx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rcx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rcx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rcx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rcx+376]
mov QWORD PTR [r10+376], r8
sbb r9, 0
sub r11, 192
; Add in place
mov rax, QWORD PTR [r11]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [r11+8]
mov QWORD PTR [r11], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [r11+16]
mov QWORD PTR [r11+8], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r11+24]
mov QWORD PTR [r11+16], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [r11+32]
mov QWORD PTR [r11+24], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r11+40]
mov QWORD PTR [r11+32], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [r11+48]
mov QWORD PTR [r11+40], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r11+56]
mov QWORD PTR [r11+48], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [r11+64]
mov QWORD PTR [r11+56], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r11+72]
mov QWORD PTR [r11+64], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [r11+80]
mov QWORD PTR [r11+72], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r11+88]
mov QWORD PTR [r11+80], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [r11+96]
mov QWORD PTR [r11+88], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r11+104]
mov QWORD PTR [r11+96], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [r11+112]
mov QWORD PTR [r11+104], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r11+120]
mov QWORD PTR [r11+112], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [r11+128]
mov QWORD PTR [r11+120], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [r11+136]
mov QWORD PTR [r11+128], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [r11+144]
mov QWORD PTR [r11+136], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [r11+152]
mov QWORD PTR [r11+144], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [r11+160]
mov QWORD PTR [r11+152], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [r11+168]
mov QWORD PTR [r11+160], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [r11+176]
mov QWORD PTR [r11+168], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [r11+184]
mov QWORD PTR [r11+176], rax
adc r8, QWORD PTR [r10+184]
mov rax, QWORD PTR [r11+192]
mov QWORD PTR [r11+184], r8
adc rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [r11+200]
mov QWORD PTR [r11+192], rax
adc r8, QWORD PTR [r10+200]
mov rax, QWORD PTR [r11+208]
mov QWORD PTR [r11+200], r8
adc rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [r11+216]
mov QWORD PTR [r11+208], rax
adc r8, QWORD PTR [r10+216]
mov rax, QWORD PTR [r11+224]
mov QWORD PTR [r11+216], r8
adc rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [r11+232]
mov QWORD PTR [r11+224], rax
adc r8, QWORD PTR [r10+232]
mov rax, QWORD PTR [r11+240]
mov QWORD PTR [r11+232], r8
adc rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [r11+248]
mov QWORD PTR [r11+240], rax
adc r8, QWORD PTR [r10+248]
mov rax, QWORD PTR [r11+256]
mov QWORD PTR [r11+248], r8
adc rax, QWORD PTR [r10+256]
mov r8, QWORD PTR [r11+264]
mov QWORD PTR [r11+256], rax
adc r8, QWORD PTR [r10+264]
mov rax, QWORD PTR [r11+272]
mov QWORD PTR [r11+264], r8
adc rax, QWORD PTR [r10+272]
mov r8, QWORD PTR [r11+280]
mov QWORD PTR [r11+272], rax
adc r8, QWORD PTR [r10+280]
mov rax, QWORD PTR [r11+288]
mov QWORD PTR [r11+280], r8
adc rax, QWORD PTR [r10+288]
mov r8, QWORD PTR [r11+296]
mov QWORD PTR [r11+288], rax
adc r8, QWORD PTR [r10+296]
mov rax, QWORD PTR [r11+304]
mov QWORD PTR [r11+296], r8
adc rax, QWORD PTR [r10+304]
mov r8, QWORD PTR [r11+312]
mov QWORD PTR [r11+304], rax
adc r8, QWORD PTR [r10+312]
mov rax, QWORD PTR [r11+320]
mov QWORD PTR [r11+312], r8
adc rax, QWORD PTR [r10+320]
mov r8, QWORD PTR [r11+328]
mov QWORD PTR [r11+320], rax
adc r8, QWORD PTR [r10+328]
mov rax, QWORD PTR [r11+336]
mov QWORD PTR [r11+328], r8
adc rax, QWORD PTR [r10+336]
mov r8, QWORD PTR [r11+344]
mov QWORD PTR [r11+336], rax
adc r8, QWORD PTR [r10+344]
mov rax, QWORD PTR [r11+352]
mov QWORD PTR [r11+344], r8
adc rax, QWORD PTR [r10+352]
mov r8, QWORD PTR [r11+360]
mov QWORD PTR [r11+352], rax
adc r8, QWORD PTR [r10+360]
mov rax, QWORD PTR [r11+368]
mov QWORD PTR [r11+360], r8
adc rax, QWORD PTR [r10+368]
mov r8, QWORD PTR [r11+376]
mov QWORD PTR [r11+368], rax
adc r8, QWORD PTR [r10+376]
mov QWORD PTR [r11+376], r8
adc r9, 0
mov QWORD PTR [rcx+576], r9
; Add in place
mov rax, QWORD PTR [r11+192]
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r11+200]
mov QWORD PTR [r11+192], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r11+208]
mov QWORD PTR [r11+200], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r11+216]
mov QWORD PTR [r11+208], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r11+224]
mov QWORD PTR [r11+216], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r11+232]
mov QWORD PTR [r11+224], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r11+240]
mov QWORD PTR [r11+232], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r11+248]
mov QWORD PTR [r11+240], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r11+256]
mov QWORD PTR [r11+248], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r11+264]
mov QWORD PTR [r11+256], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r11+272]
mov QWORD PTR [r11+264], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r11+280]
mov QWORD PTR [r11+272], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r11+288]
mov QWORD PTR [r11+280], r8
adc rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r11+296]
mov QWORD PTR [r11+288], rax
adc r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r11+304]
mov QWORD PTR [r11+296], r8
adc rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r11+312]
mov QWORD PTR [r11+304], rax
adc r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r11+320]
mov QWORD PTR [r11+312], r8
adc rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r11+328]
mov QWORD PTR [r11+320], rax
adc r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r11+336]
mov QWORD PTR [r11+328], r8
adc rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r11+344]
mov QWORD PTR [r11+336], rax
adc r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r11+352]
mov QWORD PTR [r11+344], r8
adc rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r11+360]
mov QWORD PTR [r11+352], rax
adc r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r11+368]
mov QWORD PTR [r11+360], r8
adc rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r11+376]
mov QWORD PTR [r11+368], rax
adc r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r11+384]
mov QWORD PTR [r11+376], r8
adc rax, QWORD PTR [rdx+192]
mov QWORD PTR [r11+384], rax
; Add to zero
mov rax, QWORD PTR [rdx+200]
adc rax, 0
mov r8, QWORD PTR [rdx+208]
mov QWORD PTR [r11+392], rax
adc r8, 0
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [r11+400], r8
adc rax, 0
mov r8, QWORD PTR [rdx+224]
mov QWORD PTR [r11+408], rax
adc r8, 0
mov rax, QWORD PTR [rdx+232]
mov QWORD PTR [r11+416], r8
adc rax, 0
mov r8, QWORD PTR [rdx+240]
mov QWORD PTR [r11+424], rax
adc r8, 0
mov rax, QWORD PTR [rdx+248]
mov QWORD PTR [r11+432], r8
adc rax, 0
mov r8, QWORD PTR [rdx+256]
mov QWORD PTR [r11+440], rax
adc r8, 0
mov rax, QWORD PTR [rdx+264]
mov QWORD PTR [r11+448], r8
adc rax, 0
mov r8, QWORD PTR [rdx+272]
mov QWORD PTR [r11+456], rax
adc r8, 0
mov rax, QWORD PTR [rdx+280]
mov QWORD PTR [r11+464], r8
adc rax, 0
mov r8, QWORD PTR [rdx+288]
mov QWORD PTR [r11+472], rax
adc r8, 0
mov rax, QWORD PTR [rdx+296]
mov QWORD PTR [r11+480], r8
adc rax, 0
mov r8, QWORD PTR [rdx+304]
mov QWORD PTR [r11+488], rax
adc r8, 0
mov rax, QWORD PTR [rdx+312]
mov QWORD PTR [r11+496], r8
adc rax, 0
mov r8, QWORD PTR [rdx+320]
mov QWORD PTR [r11+504], rax
adc r8, 0
mov rax, QWORD PTR [rdx+328]
mov QWORD PTR [r11+512], r8
adc rax, 0
mov r8, QWORD PTR [rdx+336]
mov QWORD PTR [r11+520], rax
adc r8, 0
mov rax, QWORD PTR [rdx+344]
mov QWORD PTR [r11+528], r8
adc rax, 0
mov r8, QWORD PTR [rdx+352]
mov QWORD PTR [r11+536], rax
adc r8, 0
mov rax, QWORD PTR [rdx+360]
mov QWORD PTR [r11+544], r8
adc rax, 0
mov r8, QWORD PTR [rdx+368]
mov QWORD PTR [r11+552], rax
adc r8, 0
mov rax, QWORD PTR [rdx+376]
mov QWORD PTR [r11+560], r8
adc rax, 0
mov QWORD PTR [r11+568], rax
add rsp, 984
pop r12
ret
sp_3072_sqr_avx2_48 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_48 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+184]
add r12, rax
mov QWORD PTR [rcx+184], r12
adc r10, rdx
adc r11, 0
; A[24] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+192]
add r10, rax
mov QWORD PTR [rcx+192], r10
adc r11, rdx
adc r12, 0
; A[25] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+200]
add r11, rax
mov QWORD PTR [rcx+200], r11
adc r12, rdx
adc r10, 0
; A[26] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+208]
add r12, rax
mov QWORD PTR [rcx+208], r12
adc r10, rdx
adc r11, 0
; A[27] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+216]
add r10, rax
mov QWORD PTR [rcx+216], r10
adc r11, rdx
adc r12, 0
; A[28] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+224]
add r11, rax
mov QWORD PTR [rcx+224], r11
adc r12, rdx
adc r10, 0
; A[29] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+232]
add r12, rax
mov QWORD PTR [rcx+232], r12
adc r10, rdx
adc r11, 0
; A[30] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+240]
add r10, rax
mov QWORD PTR [rcx+240], r10
adc r11, rdx
adc r12, 0
; A[31] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+248]
add r11, rax
mov QWORD PTR [rcx+248], r11
adc r12, rdx
adc r10, 0
; A[32] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+256]
add r12, rax
mov QWORD PTR [rcx+256], r12
adc r10, rdx
adc r11, 0
; A[33] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+264]
add r10, rax
mov QWORD PTR [rcx+264], r10
adc r11, rdx
adc r12, 0
; A[34] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+272]
add r11, rax
mov QWORD PTR [rcx+272], r11
adc r12, rdx
adc r10, 0
; A[35] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+280]
add r12, rax
mov QWORD PTR [rcx+280], r12
adc r10, rdx
adc r11, 0
; A[36] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+288]
add r10, rax
mov QWORD PTR [rcx+288], r10
adc r11, rdx
adc r12, 0
; A[37] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+296]
add r11, rax
mov QWORD PTR [rcx+296], r11
adc r12, rdx
adc r10, 0
; A[38] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+304]
add r12, rax
mov QWORD PTR [rcx+304], r12
adc r10, rdx
adc r11, 0
; A[39] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+312]
add r10, rax
mov QWORD PTR [rcx+312], r10
adc r11, rdx
adc r12, 0
; A[40] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+320]
add r11, rax
mov QWORD PTR [rcx+320], r11
adc r12, rdx
adc r10, 0
; A[41] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+328]
add r12, rax
mov QWORD PTR [rcx+328], r12
adc r10, rdx
adc r11, 0
; A[42] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+336]
add r10, rax
mov QWORD PTR [rcx+336], r10
adc r11, rdx
adc r12, 0
; A[43] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+344]
add r11, rax
mov QWORD PTR [rcx+344], r11
adc r12, rdx
adc r10, 0
; A[44] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+352]
add r12, rax
mov QWORD PTR [rcx+352], r12
adc r10, rdx
adc r11, 0
; A[45] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+360]
add r10, rax
mov QWORD PTR [rcx+360], r10
adc r11, rdx
adc r12, 0
; A[46] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+368]
add r11, rax
mov QWORD PTR [rcx+368], r11
adc r12, rdx
adc r10, 0
; A[47] * B
mov rax, r8
mul QWORD PTR [r9+376]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+376], r12
mov QWORD PTR [rcx+384], r10
pop r12
ret
sp_3072_mul_d_48 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_24 PROC
sub rsp, 192
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov QWORD PTR [rcx+184], r11
sbb rax, 0
add rsp, 192
ret
sp_3072_cond_sub_24 ENDP
_text ENDS
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 24
mov r10, 24
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_3072_mont_loop_24:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+184], r14
adc QWORD PTR [rcx+192], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_3072_mont_loop_24
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 192
call sp_3072_cond_sub_24
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_avx2_24 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov QWORD PTR [rcx+184], r12
sbb rax, 0
pop r12
ret
sp_3072_cond_sub_avx2_24 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_24 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
mul QWORD PTR [r9+184]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+184], r12
mov QWORD PTR [rcx+192], r10
pop r12
ret
sp_3072_mul_d_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_avx2_24 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+184], r12
mov QWORD PTR [rcx+192], r11
pop r13
pop r12
ret
sp_3072_mul_d_avx2_24 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_3072_word_asm_24 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_3072_word_asm_24 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_3072_cmp_24 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_3072_cmp_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_avx2_24 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 24
mov r11, 24
mov r15, QWORD PTR [r9]
mov rdi, QWORD PTR [r9+8]
mov rsi, QWORD PTR [r9+16]
mov rbx, QWORD PTR [r9+24]
add r9, 96
xor rbp, rbp
L_3072_mont_loop_avx2_24:
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-64]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-56]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+96], r12
adox rbp, r14
adcx rbp, r14
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_3072_mont_loop_avx2_24
sub r9, 96
neg rbp
mov r8, r9
sub r9, 192
mov rcx, QWORD PTR [r10]
mov rdx, r15
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, rdi
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rsi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rbx
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov QWORD PTR [r9+184], rcx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_avx2_24 ENDP
_text ENDS
ENDIF
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_48 PROC
sub rsp, 384
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [r8+256]
mov r11, QWORD PTR [r8+264]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+256], r10
mov QWORD PTR [rsp+264], r11
mov r10, QWORD PTR [r8+272]
mov r11, QWORD PTR [r8+280]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+272], r10
mov QWORD PTR [rsp+280], r11
mov r10, QWORD PTR [r8+288]
mov r11, QWORD PTR [r8+296]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+288], r10
mov QWORD PTR [rsp+296], r11
mov r10, QWORD PTR [r8+304]
mov r11, QWORD PTR [r8+312]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+304], r10
mov QWORD PTR [rsp+312], r11
mov r10, QWORD PTR [r8+320]
mov r11, QWORD PTR [r8+328]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+320], r10
mov QWORD PTR [rsp+328], r11
mov r10, QWORD PTR [r8+336]
mov r11, QWORD PTR [r8+344]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+336], r10
mov QWORD PTR [rsp+344], r11
mov r10, QWORD PTR [r8+352]
mov r11, QWORD PTR [r8+360]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+352], r10
mov QWORD PTR [rsp+360], r11
mov r10, QWORD PTR [r8+368]
mov r11, QWORD PTR [r8+376]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+368], r10
mov QWORD PTR [rsp+376], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
sbb r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
sbb r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
sbb r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
sbb r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
sbb r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
sbb r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
sbb r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
sbb r11, r8
mov QWORD PTR [rcx+240], r10
mov r10, QWORD PTR [rdx+256]
mov r8, QWORD PTR [rsp+256]
sbb r10, r8
mov QWORD PTR [rcx+248], r11
mov r11, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rsp+264]
sbb r11, r8
mov QWORD PTR [rcx+256], r10
mov r10, QWORD PTR [rdx+272]
mov r8, QWORD PTR [rsp+272]
sbb r10, r8
mov QWORD PTR [rcx+264], r11
mov r11, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rsp+280]
sbb r11, r8
mov QWORD PTR [rcx+272], r10
mov r10, QWORD PTR [rdx+288]
mov r8, QWORD PTR [rsp+288]
sbb r10, r8
mov QWORD PTR [rcx+280], r11
mov r11, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rsp+296]
sbb r11, r8
mov QWORD PTR [rcx+288], r10
mov r10, QWORD PTR [rdx+304]
mov r8, QWORD PTR [rsp+304]
sbb r10, r8
mov QWORD PTR [rcx+296], r11
mov r11, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rsp+312]
sbb r11, r8
mov QWORD PTR [rcx+304], r10
mov r10, QWORD PTR [rdx+320]
mov r8, QWORD PTR [rsp+320]
sbb r10, r8
mov QWORD PTR [rcx+312], r11
mov r11, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rsp+328]
sbb r11, r8
mov QWORD PTR [rcx+320], r10
mov r10, QWORD PTR [rdx+336]
mov r8, QWORD PTR [rsp+336]
sbb r10, r8
mov QWORD PTR [rcx+328], r11
mov r11, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rsp+344]
sbb r11, r8
mov QWORD PTR [rcx+336], r10
mov r10, QWORD PTR [rdx+352]
mov r8, QWORD PTR [rsp+352]
sbb r10, r8
mov QWORD PTR [rcx+344], r11
mov r11, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rsp+360]
sbb r11, r8
mov QWORD PTR [rcx+352], r10
mov r10, QWORD PTR [rdx+368]
mov r8, QWORD PTR [rsp+368]
sbb r10, r8
mov QWORD PTR [rcx+360], r11
mov r11, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rsp+376]
sbb r11, r8
mov QWORD PTR [rcx+368], r10
mov QWORD PTR [rcx+376], r11
sbb rax, 0
add rsp, 384
ret
sp_3072_cond_sub_48 ENDP
_text ENDS
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 48
mov r10, 48
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_3072_mont_loop_48:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+184], r14
adc r11, 0
; a[i+24] += m[24] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+192]
mov r14, QWORD PTR [rcx+192]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+192], r14
adc r12, 0
; a[i+25] += m[25] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+200]
mov r14, QWORD PTR [rcx+200]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+200], r14
adc r11, 0
; a[i+26] += m[26] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+208]
mov r14, QWORD PTR [rcx+208]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+208], r14
adc r12, 0
; a[i+27] += m[27] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+216]
mov r14, QWORD PTR [rcx+216]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+216], r14
adc r11, 0
; a[i+28] += m[28] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+224]
mov r14, QWORD PTR [rcx+224]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+224], r14
adc r12, 0
; a[i+29] += m[29] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+232]
mov r14, QWORD PTR [rcx+232]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+232], r14
adc r11, 0
; a[i+30] += m[30] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+240]
mov r14, QWORD PTR [rcx+240]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+240], r14
adc r12, 0
; a[i+31] += m[31] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+248]
mov r14, QWORD PTR [rcx+248]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+248], r14
adc r11, 0
; a[i+32] += m[32] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+256]
mov r14, QWORD PTR [rcx+256]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+256], r14
adc r12, 0
; a[i+33] += m[33] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+264]
mov r14, QWORD PTR [rcx+264]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+264], r14
adc r11, 0
; a[i+34] += m[34] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+272]
mov r14, QWORD PTR [rcx+272]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+272], r14
adc r12, 0
; a[i+35] += m[35] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+280]
mov r14, QWORD PTR [rcx+280]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+280], r14
adc r11, 0
; a[i+36] += m[36] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+288]
mov r14, QWORD PTR [rcx+288]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+288], r14
adc r12, 0
; a[i+37] += m[37] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+296]
mov r14, QWORD PTR [rcx+296]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+296], r14
adc r11, 0
; a[i+38] += m[38] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+304]
mov r14, QWORD PTR [rcx+304]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+304], r14
adc r12, 0
; a[i+39] += m[39] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+312]
mov r14, QWORD PTR [rcx+312]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+312], r14
adc r11, 0
; a[i+40] += m[40] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+320]
mov r14, QWORD PTR [rcx+320]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+320], r14
adc r12, 0
; a[i+41] += m[41] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+328]
mov r14, QWORD PTR [rcx+328]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+328], r14
adc r11, 0
; a[i+42] += m[42] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+336]
mov r14, QWORD PTR [rcx+336]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+336], r14
adc r12, 0
; a[i+43] += m[43] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+344]
mov r14, QWORD PTR [rcx+344]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+344], r14
adc r11, 0
; a[i+44] += m[44] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+352]
mov r14, QWORD PTR [rcx+352]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+352], r14
adc r12, 0
; a[i+45] += m[45] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+360]
mov r14, QWORD PTR [rcx+360]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+360], r14
adc r11, 0
; a[i+46] += m[46] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+368]
mov r14, QWORD PTR [rcx+368]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+368], r14
adc r12, 0
; a[i+47] += m[47] * mu
mov rax, r13
mul QWORD PTR [r9+376]
mov r14, QWORD PTR [rcx+376]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+376], r14
adc QWORD PTR [rcx+384], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_3072_mont_loop_48
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 384
call sp_3072_cond_sub_48
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_48 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_3072_sub_48 PROC
mov r9, QWORD PTR [rdx]
xor rax, rax
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
sbb r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
sbb r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
sbb r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
sbb r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
sbb r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
sbb r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
sbb r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
sbb r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
sbb r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
sbb r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
sbb r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
sbb r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
sbb r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
sbb r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
sbb r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
sbb r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
sbb r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
sbb r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
sbb r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
sbb r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
sbb r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
sbb r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
sbb r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
sbb r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
sbb r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
sbb r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
sbb r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
sbb r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
sbb r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
sbb r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
sbb r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
sbb r10, QWORD PTR [r8+376]
mov QWORD PTR [rcx+376], r10
sbb rax, 0
ret
sp_3072_sub_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_3072_mul_d_avx2_48 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+184], r12
; A[24] * B
mulx r10, r9, QWORD PTR [rax+192]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+192], r11
; A[25] * B
mulx r10, r9, QWORD PTR [rax+200]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+200], r12
; A[26] * B
mulx r10, r9, QWORD PTR [rax+208]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+208], r11
; A[27] * B
mulx r10, r9, QWORD PTR [rax+216]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+216], r12
; A[28] * B
mulx r10, r9, QWORD PTR [rax+224]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+224], r11
; A[29] * B
mulx r10, r9, QWORD PTR [rax+232]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+232], r12
; A[30] * B
mulx r10, r9, QWORD PTR [rax+240]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+240], r11
; A[31] * B
mulx r10, r9, QWORD PTR [rax+248]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+248], r12
; A[32] * B
mulx r10, r9, QWORD PTR [rax+256]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+256], r11
; A[33] * B
mulx r10, r9, QWORD PTR [rax+264]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+264], r12
; A[34] * B
mulx r10, r9, QWORD PTR [rax+272]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+272], r11
; A[35] * B
mulx r10, r9, QWORD PTR [rax+280]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+280], r12
; A[36] * B
mulx r10, r9, QWORD PTR [rax+288]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+288], r11
; A[37] * B
mulx r10, r9, QWORD PTR [rax+296]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+296], r12
; A[38] * B
mulx r10, r9, QWORD PTR [rax+304]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+304], r11
; A[39] * B
mulx r10, r9, QWORD PTR [rax+312]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+312], r12
; A[40] * B
mulx r10, r9, QWORD PTR [rax+320]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+320], r11
; A[41] * B
mulx r10, r9, QWORD PTR [rax+328]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+328], r12
; A[42] * B
mulx r10, r9, QWORD PTR [rax+336]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+336], r11
; A[43] * B
mulx r10, r9, QWORD PTR [rax+344]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+344], r12
; A[44] * B
mulx r10, r9, QWORD PTR [rax+352]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+352], r11
; A[45] * B
mulx r10, r9, QWORD PTR [rax+360]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+360], r12
; A[46] * B
mulx r10, r9, QWORD PTR [rax+368]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+368], r11
; A[47] * B
mulx r10, r9, QWORD PTR [rax+376]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+376], r12
mov QWORD PTR [rcx+384], r11
pop r13
pop r12
ret
sp_3072_mul_d_avx2_48 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_3072_word_asm_48 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_3072_word_asm_48 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_sub_avx2_48 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
sbb r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
sbb r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
sbb r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
sbb r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
sbb r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
sbb r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
sbb r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
sbb r11, r12
mov r10, QWORD PTR [r8+256]
mov r12, QWORD PTR [rdx+256]
pext r10, r10, r9
mov QWORD PTR [rcx+248], r11
sbb r12, r10
mov r11, QWORD PTR [r8+264]
mov r10, QWORD PTR [rdx+264]
pext r11, r11, r9
mov QWORD PTR [rcx+256], r12
sbb r10, r11
mov r12, QWORD PTR [r8+272]
mov r11, QWORD PTR [rdx+272]
pext r12, r12, r9
mov QWORD PTR [rcx+264], r10
sbb r11, r12
mov r10, QWORD PTR [r8+280]
mov r12, QWORD PTR [rdx+280]
pext r10, r10, r9
mov QWORD PTR [rcx+272], r11
sbb r12, r10
mov r11, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+288]
pext r11, r11, r9
mov QWORD PTR [rcx+280], r12
sbb r10, r11
mov r12, QWORD PTR [r8+296]
mov r11, QWORD PTR [rdx+296]
pext r12, r12, r9
mov QWORD PTR [rcx+288], r10
sbb r11, r12
mov r10, QWORD PTR [r8+304]
mov r12, QWORD PTR [rdx+304]
pext r10, r10, r9
mov QWORD PTR [rcx+296], r11
sbb r12, r10
mov r11, QWORD PTR [r8+312]
mov r10, QWORD PTR [rdx+312]
pext r11, r11, r9
mov QWORD PTR [rcx+304], r12
sbb r10, r11
mov r12, QWORD PTR [r8+320]
mov r11, QWORD PTR [rdx+320]
pext r12, r12, r9
mov QWORD PTR [rcx+312], r10
sbb r11, r12
mov r10, QWORD PTR [r8+328]
mov r12, QWORD PTR [rdx+328]
pext r10, r10, r9
mov QWORD PTR [rcx+320], r11
sbb r12, r10
mov r11, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+336]
pext r11, r11, r9
mov QWORD PTR [rcx+328], r12
sbb r10, r11
mov r12, QWORD PTR [r8+344]
mov r11, QWORD PTR [rdx+344]
pext r12, r12, r9
mov QWORD PTR [rcx+336], r10
sbb r11, r12
mov r10, QWORD PTR [r8+352]
mov r12, QWORD PTR [rdx+352]
pext r10, r10, r9
mov QWORD PTR [rcx+344], r11
sbb r12, r10
mov r11, QWORD PTR [r8+360]
mov r10, QWORD PTR [rdx+360]
pext r11, r11, r9
mov QWORD PTR [rcx+352], r12
sbb r10, r11
mov r12, QWORD PTR [r8+368]
mov r11, QWORD PTR [rdx+368]
pext r12, r12, r9
mov QWORD PTR [rcx+360], r10
sbb r11, r12
mov r10, QWORD PTR [r8+376]
mov r12, QWORD PTR [rdx+376]
pext r10, r10, r9
mov QWORD PTR [rcx+368], r11
sbb r12, r10
mov QWORD PTR [rcx+376], r12
sbb rax, 0
pop r12
ret
sp_3072_cond_sub_avx2_48 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_3072_cmp_48 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+376]
mov r12, QWORD PTR [rdx+376]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+368]
mov r12, QWORD PTR [rdx+368]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+360]
mov r12, QWORD PTR [rdx+360]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+352]
mov r12, QWORD PTR [rdx+352]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+344]
mov r12, QWORD PTR [rdx+344]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+336]
mov r12, QWORD PTR [rdx+336]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+328]
mov r12, QWORD PTR [rdx+328]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+320]
mov r12, QWORD PTR [rdx+320]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+312]
mov r12, QWORD PTR [rdx+312]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+304]
mov r12, QWORD PTR [rdx+304]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+296]
mov r12, QWORD PTR [rdx+296]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+288]
mov r12, QWORD PTR [rdx+288]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+280]
mov r12, QWORD PTR [rdx+280]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+272]
mov r12, QWORD PTR [rdx+272]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+264]
mov r12, QWORD PTR [rdx+264]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+256]
mov r12, QWORD PTR [rdx+256]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+248]
mov r12, QWORD PTR [rdx+248]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+240]
mov r12, QWORD PTR [rdx+240]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+232]
mov r12, QWORD PTR [rdx+232]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+224]
mov r12, QWORD PTR [rdx+224]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+216]
mov r12, QWORD PTR [rdx+216]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+208]
mov r12, QWORD PTR [rdx+208]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+200]
mov r12, QWORD PTR [rdx+200]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+192]
mov r12, QWORD PTR [rdx+192]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_3072_cmp_48 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 3072 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_3072_mont_reduce_avx2_48 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 48
mov r11, 48
mov r15, QWORD PTR [r9]
mov rdi, QWORD PTR [r9+8]
mov rsi, QWORD PTR [r9+16]
mov rbx, QWORD PTR [r9+24]
add r9, 192
xor rbp, rbp
L_3072_mont_loop_avx2_48:
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-160]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-152]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-152], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-144], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-136], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-128], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-120], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-112], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+-96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-104], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+-88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-96], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+-80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-88], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+-72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-80], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+-64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-72], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+-56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-64], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+24] += m[24] * mu
mulx rcx, rax, QWORD PTR [r10+192]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+25] += m[25] * mu
mulx rcx, rax, QWORD PTR [r10+200]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+26] += m[26] * mu
mulx rcx, rax, QWORD PTR [r10+208]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+27] += m[27] * mu
mulx rcx, rax, QWORD PTR [r10+216]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+28] += m[28] * mu
mulx rcx, rax, QWORD PTR [r10+224]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+29] += m[29] * mu
mulx rcx, rax, QWORD PTR [r10+232]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+30] += m[30] * mu
mulx rcx, rax, QWORD PTR [r10+240]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+31] += m[31] * mu
mulx rcx, rax, QWORD PTR [r10+248]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+32] += m[32] * mu
mulx rcx, rax, QWORD PTR [r10+256]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+33] += m[33] * mu
mulx rcx, rax, QWORD PTR [r10+264]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+34] += m[34] * mu
mulx rcx, rax, QWORD PTR [r10+272]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+35] += m[35] * mu
mulx rcx, rax, QWORD PTR [r10+280]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
; a[i+36] += m[36] * mu
mulx rcx, rax, QWORD PTR [r10+288]
mov r13, QWORD PTR [r9+104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+96], r12
; a[i+37] += m[37] * mu
mulx rcx, rax, QWORD PTR [r10+296]
mov r12, QWORD PTR [r9+112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+104], r13
; a[i+38] += m[38] * mu
mulx rcx, rax, QWORD PTR [r10+304]
mov r13, QWORD PTR [r9+120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+112], r12
; a[i+39] += m[39] * mu
mulx rcx, rax, QWORD PTR [r10+312]
mov r12, QWORD PTR [r9+128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+120], r13
; a[i+40] += m[40] * mu
mulx rcx, rax, QWORD PTR [r10+320]
mov r13, QWORD PTR [r9+136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+128], r12
; a[i+41] += m[41] * mu
mulx rcx, rax, QWORD PTR [r10+328]
mov r12, QWORD PTR [r9+144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+136], r13
; a[i+42] += m[42] * mu
mulx rcx, rax, QWORD PTR [r10+336]
mov r13, QWORD PTR [r9+152]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+144], r12
; a[i+43] += m[43] * mu
mulx rcx, rax, QWORD PTR [r10+344]
mov r12, QWORD PTR [r9+160]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+152], r13
; a[i+44] += m[44] * mu
mulx rcx, rax, QWORD PTR [r10+352]
mov r13, QWORD PTR [r9+168]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+160], r12
; a[i+45] += m[45] * mu
mulx rcx, rax, QWORD PTR [r10+360]
mov r12, QWORD PTR [r9+176]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+168], r13
; a[i+46] += m[46] * mu
mulx rcx, rax, QWORD PTR [r10+368]
mov r13, QWORD PTR [r9+184]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+176], r12
; a[i+47] += m[47] * mu
mulx rcx, rax, QWORD PTR [r10+376]
mov r12, QWORD PTR [r9+192]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+184], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+192], r12
adox rbp, r14
adcx rbp, r14
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_3072_mont_loop_avx2_48
sub r9, 192
neg rbp
mov r8, r9
sub r9, 384
mov rcx, QWORD PTR [r10]
mov rdx, r15
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, rdi
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rsi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rbx
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+192]
mov rdx, QWORD PTR [r8+192]
pext rax, rax, rbp
mov QWORD PTR [r9+184], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+200]
mov rax, QWORD PTR [r8+200]
pext rcx, rcx, rbp
mov QWORD PTR [r9+192], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+208]
mov rcx, QWORD PTR [r8+208]
pext rdx, rdx, rbp
mov QWORD PTR [r9+200], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+216]
mov rdx, QWORD PTR [r8+216]
pext rax, rax, rbp
mov QWORD PTR [r9+208], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+224]
mov rax, QWORD PTR [r8+224]
pext rcx, rcx, rbp
mov QWORD PTR [r9+216], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+232]
mov rcx, QWORD PTR [r8+232]
pext rdx, rdx, rbp
mov QWORD PTR [r9+224], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+240]
mov rdx, QWORD PTR [r8+240]
pext rax, rax, rbp
mov QWORD PTR [r9+232], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+248]
mov rax, QWORD PTR [r8+248]
pext rcx, rcx, rbp
mov QWORD PTR [r9+240], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+256]
mov rcx, QWORD PTR [r8+256]
pext rdx, rdx, rbp
mov QWORD PTR [r9+248], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+264]
mov rdx, QWORD PTR [r8+264]
pext rax, rax, rbp
mov QWORD PTR [r9+256], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+272]
mov rax, QWORD PTR [r8+272]
pext rcx, rcx, rbp
mov QWORD PTR [r9+264], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+280]
mov rcx, QWORD PTR [r8+280]
pext rdx, rdx, rbp
mov QWORD PTR [r9+272], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+288]
mov rdx, QWORD PTR [r8+288]
pext rax, rax, rbp
mov QWORD PTR [r9+280], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+296]
mov rax, QWORD PTR [r8+296]
pext rcx, rcx, rbp
mov QWORD PTR [r9+288], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+304]
mov rcx, QWORD PTR [r8+304]
pext rdx, rdx, rbp
mov QWORD PTR [r9+296], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+312]
mov rdx, QWORD PTR [r8+312]
pext rax, rax, rbp
mov QWORD PTR [r9+304], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+320]
mov rax, QWORD PTR [r8+320]
pext rcx, rcx, rbp
mov QWORD PTR [r9+312], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+328]
mov rcx, QWORD PTR [r8+328]
pext rdx, rdx, rbp
mov QWORD PTR [r9+320], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+336]
mov rdx, QWORD PTR [r8+336]
pext rax, rax, rbp
mov QWORD PTR [r9+328], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+344]
mov rax, QWORD PTR [r8+344]
pext rcx, rcx, rbp
mov QWORD PTR [r9+336], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+352]
mov rcx, QWORD PTR [r8+352]
pext rdx, rdx, rbp
mov QWORD PTR [r9+344], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+360]
mov rdx, QWORD PTR [r8+360]
pext rax, rax, rbp
mov QWORD PTR [r9+352], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+368]
mov rax, QWORD PTR [r8+368]
pext rcx, rcx, rbp
mov QWORD PTR [r9+360], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+376]
mov rcx, QWORD PTR [r8+376]
pext rdx, rdx, rbp
mov QWORD PTR [r9+368], rax
sbb rcx, rdx
mov QWORD PTR [r9+376], rcx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_3072_mont_reduce_avx2_48 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_add_24 PROC
sub rsp, 192
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
adc r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
adc r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
adc r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
adc r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
adc r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
adc r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
adc r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
adc r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
adc r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
adc r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
adc r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
adc r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
adc r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
adc r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
adc r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
adc r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
adc r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
adc r11, r8
mov QWORD PTR [rcx+176], r10
mov QWORD PTR [rcx+184], r11
adc rax, 0
add rsp, 192
ret
sp_3072_cond_add_24 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_3072_cond_add_avx2_24 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
add r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
adc r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
adc r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
adc r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
adc r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
adc r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
adc r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
adc r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
adc r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
adc r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
adc r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
adc r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
adc r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
adc r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
adc r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
adc r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
adc r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
adc r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
adc r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
adc r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
adc r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
adc r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
adc r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
adc r12, r10
mov QWORD PTR [rcx+184], r12
adc rax, 0
pop r12
ret
sp_3072_cond_add_avx2_24 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_3072_lshift_48 PROC
push r12
push r13
mov r9, rcx
mov rcx, r8
mov r12, 0
mov r13, QWORD PTR [rdx+344]
mov rax, QWORD PTR [rdx+352]
mov r8, QWORD PTR [rdx+360]
mov r10, QWORD PTR [rdx+368]
mov r11, QWORD PTR [rdx+376]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+352], rax
mov QWORD PTR [r9+360], r8
mov QWORD PTR [r9+368], r10
mov QWORD PTR [r9+376], r11
mov QWORD PTR [r9+384], r12
mov r11, QWORD PTR [rdx+312]
mov rax, QWORD PTR [rdx+320]
mov r8, QWORD PTR [rdx+328]
mov r10, QWORD PTR [rdx+336]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+320], rax
mov QWORD PTR [r9+328], r8
mov QWORD PTR [r9+336], r10
mov QWORD PTR [r9+344], r13
mov r13, QWORD PTR [rdx+280]
mov rax, QWORD PTR [rdx+288]
mov r8, QWORD PTR [rdx+296]
mov r10, QWORD PTR [rdx+304]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+288], rax
mov QWORD PTR [r9+296], r8
mov QWORD PTR [r9+304], r10
mov QWORD PTR [r9+312], r11
mov r11, QWORD PTR [rdx+248]
mov rax, QWORD PTR [rdx+256]
mov r8, QWORD PTR [rdx+264]
mov r10, QWORD PTR [rdx+272]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+256], rax
mov QWORD PTR [r9+264], r8
mov QWORD PTR [r9+272], r10
mov QWORD PTR [r9+280], r13
mov r13, QWORD PTR [rdx+216]
mov rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rdx+232]
mov r10, QWORD PTR [rdx+240]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+224], rax
mov QWORD PTR [r9+232], r8
mov QWORD PTR [r9+240], r10
mov QWORD PTR [r9+248], r11
mov r11, QWORD PTR [rdx+184]
mov rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rdx+200]
mov r10, QWORD PTR [rdx+208]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+192], rax
mov QWORD PTR [r9+200], r8
mov QWORD PTR [r9+208], r10
mov QWORD PTR [r9+216], r13
mov r13, QWORD PTR [rdx+152]
mov rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rdx+168]
mov r10, QWORD PTR [rdx+176]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+160], rax
mov QWORD PTR [r9+168], r8
mov QWORD PTR [r9+176], r10
mov QWORD PTR [r9+184], r11
mov r11, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rdx+136]
mov r10, QWORD PTR [rdx+144]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+128], rax
mov QWORD PTR [r9+136], r8
mov QWORD PTR [r9+144], r10
mov QWORD PTR [r9+152], r13
mov r13, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+96], rax
mov QWORD PTR [r9+104], r8
mov QWORD PTR [r9+112], r10
mov QWORD PTR [r9+120], r11
mov r11, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+64], rax
mov QWORD PTR [r9+72], r8
mov QWORD PTR [r9+80], r10
mov QWORD PTR [r9+88], r13
mov r13, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+32], rax
mov QWORD PTR [r9+40], r8
mov QWORD PTR [r9+48], r10
mov QWORD PTR [r9+56], r11
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shl rax, cl
mov QWORD PTR [r9], rax
mov QWORD PTR [r9+8], r8
mov QWORD PTR [r9+16], r10
mov QWORD PTR [r9+24], r13
pop r13
pop r12
ret
sp_3072_lshift_48 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF WOLFSSL_SP_4096
IFDEF WOLFSSL_SP_4096
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_4096_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 512
xor r13, r13
jmp L_4096_from_bin_bswap_64_end
L_4096_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_4096_from_bin_bswap_64_end:
cmp r9, 63
jg L_4096_from_bin_bswap_64_start
jmp L_4096_from_bin_bswap_8_end
L_4096_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_4096_from_bin_bswap_8_end:
cmp r9, 7
jg L_4096_from_bin_bswap_8_start
cmp r9, r13
je L_4096_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_4096_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_4096_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_4096_from_bin_bswap_hi_end:
cmp rcx, r12
je L_4096_from_bin_bswap_zero_end
L_4096_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_4096_from_bin_bswap_zero_start
L_4096_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_4096_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_4096_from_bin_movbe PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 512
xor r13, r13
jmp L_4096_from_bin_movbe_64_end
L_4096_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_4096_from_bin_movbe_64_end:
cmp r9, 63
jg L_4096_from_bin_movbe_64_start
jmp L_4096_from_bin_movbe_8_end
L_4096_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_4096_from_bin_movbe_8_end:
cmp r9, 7
jg L_4096_from_bin_movbe_8_start
cmp r9, r13
je L_4096_from_bin_movbe_hi_end
mov r10, r13
mov rax, r13
L_4096_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_4096_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_4096_from_bin_movbe_hi_end:
cmp rcx, r12
je L_4096_from_bin_movbe_zero_end
L_4096_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_4096_from_bin_movbe_zero_start
L_4096_from_bin_movbe_zero_end:
pop r13
pop r12
ret
sp_4096_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 512
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_4096_to_bin_bswap_64 PROC
mov rax, QWORD PTR [rcx+504]
mov r8, QWORD PTR [rcx+496]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+488]
mov r8, QWORD PTR [rcx+480]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+472]
mov r8, QWORD PTR [rcx+464]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
mov rax, QWORD PTR [rcx+456]
mov r8, QWORD PTR [rcx+448]
bswap rax
bswap r8
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
mov rax, QWORD PTR [rcx+440]
mov r8, QWORD PTR [rcx+432]
bswap rax
bswap r8
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
mov rax, QWORD PTR [rcx+424]
mov r8, QWORD PTR [rcx+416]
bswap rax
bswap r8
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
mov rax, QWORD PTR [rcx+408]
mov r8, QWORD PTR [rcx+400]
bswap rax
bswap r8
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
mov rax, QWORD PTR [rcx+392]
mov r8, QWORD PTR [rcx+384]
bswap rax
bswap r8
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
mov rax, QWORD PTR [rcx+376]
mov r8, QWORD PTR [rcx+368]
bswap rax
bswap r8
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
mov rax, QWORD PTR [rcx+360]
mov r8, QWORD PTR [rcx+352]
bswap rax
bswap r8
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
mov rax, QWORD PTR [rcx+344]
mov r8, QWORD PTR [rcx+336]
bswap rax
bswap r8
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
mov rax, QWORD PTR [rcx+328]
mov r8, QWORD PTR [rcx+320]
bswap rax
bswap r8
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
mov rax, QWORD PTR [rcx+312]
mov r8, QWORD PTR [rcx+304]
bswap rax
bswap r8
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
mov rax, QWORD PTR [rcx+296]
mov r8, QWORD PTR [rcx+288]
bswap rax
bswap r8
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
mov rax, QWORD PTR [rcx+280]
mov r8, QWORD PTR [rcx+272]
bswap rax
bswap r8
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
mov rax, QWORD PTR [rcx+264]
mov r8, QWORD PTR [rcx+256]
bswap rax
bswap r8
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
mov rax, QWORD PTR [rcx+248]
mov r8, QWORD PTR [rcx+240]
bswap rax
bswap r8
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
mov rax, QWORD PTR [rcx+232]
mov r8, QWORD PTR [rcx+224]
bswap rax
bswap r8
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
mov rax, QWORD PTR [rcx+216]
mov r8, QWORD PTR [rcx+208]
bswap rax
bswap r8
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
mov rax, QWORD PTR [rcx+200]
mov r8, QWORD PTR [rcx+192]
bswap rax
bswap r8
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
mov rax, QWORD PTR [rcx+184]
mov r8, QWORD PTR [rcx+176]
bswap rax
bswap r8
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
mov rax, QWORD PTR [rcx+168]
mov r8, QWORD PTR [rcx+160]
bswap rax
bswap r8
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
mov rax, QWORD PTR [rcx+152]
mov r8, QWORD PTR [rcx+144]
bswap rax
bswap r8
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
mov rax, QWORD PTR [rcx+136]
mov r8, QWORD PTR [rcx+128]
bswap rax
bswap r8
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
mov rax, QWORD PTR [rcx+120]
mov r8, QWORD PTR [rcx+112]
bswap rax
bswap r8
mov QWORD PTR [rdx+384], rax
mov QWORD PTR [rdx+392], r8
mov rax, QWORD PTR [rcx+104]
mov r8, QWORD PTR [rcx+96]
bswap rax
bswap r8
mov QWORD PTR [rdx+400], rax
mov QWORD PTR [rdx+408], r8
mov rax, QWORD PTR [rcx+88]
mov r8, QWORD PTR [rcx+80]
bswap rax
bswap r8
mov QWORD PTR [rdx+416], rax
mov QWORD PTR [rdx+424], r8
mov rax, QWORD PTR [rcx+72]
mov r8, QWORD PTR [rcx+64]
bswap rax
bswap r8
mov QWORD PTR [rdx+432], rax
mov QWORD PTR [rdx+440], r8
mov rax, QWORD PTR [rcx+56]
mov r8, QWORD PTR [rcx+48]
bswap rax
bswap r8
mov QWORD PTR [rdx+448], rax
mov QWORD PTR [rdx+456], r8
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx+464], rax
mov QWORD PTR [rdx+472], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+480], rax
mov QWORD PTR [rdx+488], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+496], rax
mov QWORD PTR [rdx+504], r8
ret
sp_4096_to_bin_bswap_64 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 512
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_4096_to_bin_movbe_64 PROC
movbe rax, QWORD PTR [rcx+504]
movbe r8, QWORD PTR [rcx+496]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+488]
movbe r8, QWORD PTR [rcx+480]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+472]
movbe r8, QWORD PTR [rcx+464]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
movbe rax, QWORD PTR [rcx+456]
movbe r8, QWORD PTR [rcx+448]
mov QWORD PTR [rdx+48], rax
mov QWORD PTR [rdx+56], r8
movbe rax, QWORD PTR [rcx+440]
movbe r8, QWORD PTR [rcx+432]
mov QWORD PTR [rdx+64], rax
mov QWORD PTR [rdx+72], r8
movbe rax, QWORD PTR [rcx+424]
movbe r8, QWORD PTR [rcx+416]
mov QWORD PTR [rdx+80], rax
mov QWORD PTR [rdx+88], r8
movbe rax, QWORD PTR [rcx+408]
movbe r8, QWORD PTR [rcx+400]
mov QWORD PTR [rdx+96], rax
mov QWORD PTR [rdx+104], r8
movbe rax, QWORD PTR [rcx+392]
movbe r8, QWORD PTR [rcx+384]
mov QWORD PTR [rdx+112], rax
mov QWORD PTR [rdx+120], r8
movbe rax, QWORD PTR [rcx+376]
movbe r8, QWORD PTR [rcx+368]
mov QWORD PTR [rdx+128], rax
mov QWORD PTR [rdx+136], r8
movbe rax, QWORD PTR [rcx+360]
movbe r8, QWORD PTR [rcx+352]
mov QWORD PTR [rdx+144], rax
mov QWORD PTR [rdx+152], r8
movbe rax, QWORD PTR [rcx+344]
movbe r8, QWORD PTR [rcx+336]
mov QWORD PTR [rdx+160], rax
mov QWORD PTR [rdx+168], r8
movbe rax, QWORD PTR [rcx+328]
movbe r8, QWORD PTR [rcx+320]
mov QWORD PTR [rdx+176], rax
mov QWORD PTR [rdx+184], r8
movbe rax, QWORD PTR [rcx+312]
movbe r8, QWORD PTR [rcx+304]
mov QWORD PTR [rdx+192], rax
mov QWORD PTR [rdx+200], r8
movbe rax, QWORD PTR [rcx+296]
movbe r8, QWORD PTR [rcx+288]
mov QWORD PTR [rdx+208], rax
mov QWORD PTR [rdx+216], r8
movbe rax, QWORD PTR [rcx+280]
movbe r8, QWORD PTR [rcx+272]
mov QWORD PTR [rdx+224], rax
mov QWORD PTR [rdx+232], r8
movbe rax, QWORD PTR [rcx+264]
movbe r8, QWORD PTR [rcx+256]
mov QWORD PTR [rdx+240], rax
mov QWORD PTR [rdx+248], r8
movbe rax, QWORD PTR [rcx+248]
movbe r8, QWORD PTR [rcx+240]
mov QWORD PTR [rdx+256], rax
mov QWORD PTR [rdx+264], r8
movbe rax, QWORD PTR [rcx+232]
movbe r8, QWORD PTR [rcx+224]
mov QWORD PTR [rdx+272], rax
mov QWORD PTR [rdx+280], r8
movbe rax, QWORD PTR [rcx+216]
movbe r8, QWORD PTR [rcx+208]
mov QWORD PTR [rdx+288], rax
mov QWORD PTR [rdx+296], r8
movbe rax, QWORD PTR [rcx+200]
movbe r8, QWORD PTR [rcx+192]
mov QWORD PTR [rdx+304], rax
mov QWORD PTR [rdx+312], r8
movbe rax, QWORD PTR [rcx+184]
movbe r8, QWORD PTR [rcx+176]
mov QWORD PTR [rdx+320], rax
mov QWORD PTR [rdx+328], r8
movbe rax, QWORD PTR [rcx+168]
movbe r8, QWORD PTR [rcx+160]
mov QWORD PTR [rdx+336], rax
mov QWORD PTR [rdx+344], r8
movbe rax, QWORD PTR [rcx+152]
movbe r8, QWORD PTR [rcx+144]
mov QWORD PTR [rdx+352], rax
mov QWORD PTR [rdx+360], r8
movbe rax, QWORD PTR [rcx+136]
movbe r8, QWORD PTR [rcx+128]
mov QWORD PTR [rdx+368], rax
mov QWORD PTR [rdx+376], r8
movbe rax, QWORD PTR [rcx+120]
movbe r8, QWORD PTR [rcx+112]
mov QWORD PTR [rdx+384], rax
mov QWORD PTR [rdx+392], r8
movbe rax, QWORD PTR [rcx+104]
movbe r8, QWORD PTR [rcx+96]
mov QWORD PTR [rdx+400], rax
mov QWORD PTR [rdx+408], r8
movbe rax, QWORD PTR [rcx+88]
movbe r8, QWORD PTR [rcx+80]
mov QWORD PTR [rdx+416], rax
mov QWORD PTR [rdx+424], r8
movbe rax, QWORD PTR [rcx+72]
movbe r8, QWORD PTR [rcx+64]
mov QWORD PTR [rdx+432], rax
mov QWORD PTR [rdx+440], r8
movbe rax, QWORD PTR [rcx+56]
movbe r8, QWORD PTR [rcx+48]
mov QWORD PTR [rdx+448], rax
mov QWORD PTR [rdx+456], r8
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx+464], rax
mov QWORD PTR [rdx+472], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+480], rax
mov QWORD PTR [rdx+488], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+496], rax
mov QWORD PTR [rdx+504], r8
ret
sp_4096_to_bin_movbe_64 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sub_in_place_64 PROC
mov r8, QWORD PTR [rcx]
xor rax, rax
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rcx+128]
mov QWORD PTR [rcx+120], r9
sbb r8, QWORD PTR [rdx+128]
mov r9, QWORD PTR [rcx+136]
mov QWORD PTR [rcx+128], r8
sbb r9, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rcx+144]
mov QWORD PTR [rcx+136], r9
sbb r8, QWORD PTR [rdx+144]
mov r9, QWORD PTR [rcx+152]
mov QWORD PTR [rcx+144], r8
sbb r9, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rcx+160]
mov QWORD PTR [rcx+152], r9
sbb r8, QWORD PTR [rdx+160]
mov r9, QWORD PTR [rcx+168]
mov QWORD PTR [rcx+160], r8
sbb r9, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rcx+176]
mov QWORD PTR [rcx+168], r9
sbb r8, QWORD PTR [rdx+176]
mov r9, QWORD PTR [rcx+184]
mov QWORD PTR [rcx+176], r8
sbb r9, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rcx+192]
mov QWORD PTR [rcx+184], r9
sbb r8, QWORD PTR [rdx+192]
mov r9, QWORD PTR [rcx+200]
mov QWORD PTR [rcx+192], r8
sbb r9, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rcx+208]
mov QWORD PTR [rcx+200], r9
sbb r8, QWORD PTR [rdx+208]
mov r9, QWORD PTR [rcx+216]
mov QWORD PTR [rcx+208], r8
sbb r9, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rcx+224]
mov QWORD PTR [rcx+216], r9
sbb r8, QWORD PTR [rdx+224]
mov r9, QWORD PTR [rcx+232]
mov QWORD PTR [rcx+224], r8
sbb r9, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rcx+240]
mov QWORD PTR [rcx+232], r9
sbb r8, QWORD PTR [rdx+240]
mov r9, QWORD PTR [rcx+248]
mov QWORD PTR [rcx+240], r8
sbb r9, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rcx+256]
mov QWORD PTR [rcx+248], r9
sbb r8, QWORD PTR [rdx+256]
mov r9, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], r8
sbb r9, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r9
sbb r8, QWORD PTR [rdx+272]
mov r9, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], r8
sbb r9, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r9
sbb r8, QWORD PTR [rdx+288]
mov r9, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], r8
sbb r9, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r9
sbb r8, QWORD PTR [rdx+304]
mov r9, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], r8
sbb r9, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r9
sbb r8, QWORD PTR [rdx+320]
mov r9, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], r8
sbb r9, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r9
sbb r8, QWORD PTR [rdx+336]
mov r9, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], r8
sbb r9, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r9
sbb r8, QWORD PTR [rdx+352]
mov r9, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], r8
sbb r9, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r9
sbb r8, QWORD PTR [rdx+368]
mov r9, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], r8
sbb r9, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rcx+384]
mov QWORD PTR [rcx+376], r9
sbb r8, QWORD PTR [rdx+384]
mov r9, QWORD PTR [rcx+392]
mov QWORD PTR [rcx+384], r8
sbb r9, QWORD PTR [rdx+392]
mov r8, QWORD PTR [rcx+400]
mov QWORD PTR [rcx+392], r9
sbb r8, QWORD PTR [rdx+400]
mov r9, QWORD PTR [rcx+408]
mov QWORD PTR [rcx+400], r8
sbb r9, QWORD PTR [rdx+408]
mov r8, QWORD PTR [rcx+416]
mov QWORD PTR [rcx+408], r9
sbb r8, QWORD PTR [rdx+416]
mov r9, QWORD PTR [rcx+424]
mov QWORD PTR [rcx+416], r8
sbb r9, QWORD PTR [rdx+424]
mov r8, QWORD PTR [rcx+432]
mov QWORD PTR [rcx+424], r9
sbb r8, QWORD PTR [rdx+432]
mov r9, QWORD PTR [rcx+440]
mov QWORD PTR [rcx+432], r8
sbb r9, QWORD PTR [rdx+440]
mov r8, QWORD PTR [rcx+448]
mov QWORD PTR [rcx+440], r9
sbb r8, QWORD PTR [rdx+448]
mov r9, QWORD PTR [rcx+456]
mov QWORD PTR [rcx+448], r8
sbb r9, QWORD PTR [rdx+456]
mov r8, QWORD PTR [rcx+464]
mov QWORD PTR [rcx+456], r9
sbb r8, QWORD PTR [rdx+464]
mov r9, QWORD PTR [rcx+472]
mov QWORD PTR [rcx+464], r8
sbb r9, QWORD PTR [rdx+472]
mov r8, QWORD PTR [rcx+480]
mov QWORD PTR [rcx+472], r9
sbb r8, QWORD PTR [rdx+480]
mov r9, QWORD PTR [rcx+488]
mov QWORD PTR [rcx+480], r8
sbb r9, QWORD PTR [rdx+488]
mov r8, QWORD PTR [rcx+496]
mov QWORD PTR [rcx+488], r9
sbb r8, QWORD PTR [rdx+496]
mov r9, QWORD PTR [rcx+504]
mov QWORD PTR [rcx+496], r8
sbb r9, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+504], r9
sbb rax, 0
ret
sp_4096_sub_in_place_64 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_add_64 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
adc r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
adc r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
adc r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
adc r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
adc r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
adc r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
adc r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
adc r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
adc r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
adc r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
adc r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
adc r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
adc r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
adc r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
adc r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
adc r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
adc r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
adc r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
adc r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
adc r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
adc r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
adc r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
adc r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
adc r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
adc r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
adc r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
adc r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
adc r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
adc r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
adc r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
adc r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
adc r10, QWORD PTR [r8+376]
mov r9, QWORD PTR [rdx+384]
mov QWORD PTR [rcx+376], r10
adc r9, QWORD PTR [r8+384]
mov r10, QWORD PTR [rdx+392]
mov QWORD PTR [rcx+384], r9
adc r10, QWORD PTR [r8+392]
mov r9, QWORD PTR [rdx+400]
mov QWORD PTR [rcx+392], r10
adc r9, QWORD PTR [r8+400]
mov r10, QWORD PTR [rdx+408]
mov QWORD PTR [rcx+400], r9
adc r10, QWORD PTR [r8+408]
mov r9, QWORD PTR [rdx+416]
mov QWORD PTR [rcx+408], r10
adc r9, QWORD PTR [r8+416]
mov r10, QWORD PTR [rdx+424]
mov QWORD PTR [rcx+416], r9
adc r10, QWORD PTR [r8+424]
mov r9, QWORD PTR [rdx+432]
mov QWORD PTR [rcx+424], r10
adc r9, QWORD PTR [r8+432]
mov r10, QWORD PTR [rdx+440]
mov QWORD PTR [rcx+432], r9
adc r10, QWORD PTR [r8+440]
mov r9, QWORD PTR [rdx+448]
mov QWORD PTR [rcx+440], r10
adc r9, QWORD PTR [r8+448]
mov r10, QWORD PTR [rdx+456]
mov QWORD PTR [rcx+448], r9
adc r10, QWORD PTR [r8+456]
mov r9, QWORD PTR [rdx+464]
mov QWORD PTR [rcx+456], r10
adc r9, QWORD PTR [r8+464]
mov r10, QWORD PTR [rdx+472]
mov QWORD PTR [rcx+464], r9
adc r10, QWORD PTR [r8+472]
mov r9, QWORD PTR [rdx+480]
mov QWORD PTR [rcx+472], r10
adc r9, QWORD PTR [r8+480]
mov r10, QWORD PTR [rdx+488]
mov QWORD PTR [rcx+480], r9
adc r10, QWORD PTR [r8+488]
mov r9, QWORD PTR [rdx+496]
mov QWORD PTR [rcx+488], r10
adc r9, QWORD PTR [r8+496]
mov r10, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+496], r9
adc r10, QWORD PTR [r8+504]
mov QWORD PTR [rcx+504], r10
adc rax, 0
ret
sp_4096_add_64 ENDP
_text ENDS
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1576
mov QWORD PTR [rsp+1536], rcx
mov QWORD PTR [rsp+1544], rdx
mov QWORD PTR [rsp+1552], r8
lea r12, QWORD PTR [rsp+1024]
lea r14, QWORD PTR [rdx+256]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r12+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [rdx+200]
mov QWORD PTR [r12+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [rdx+208]
mov QWORD PTR [r12+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [r12+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [r12+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [r12+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r12+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [rdx+248]
mov QWORD PTR [r12+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r12+248], r9
adc r15, 0
mov QWORD PTR [rsp+1560], r15
lea r13, QWORD PTR [rsp+1280]
lea r14, QWORD PTR [r8+256]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [r8+192]
mov QWORD PTR [r13+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [r8+200]
mov QWORD PTR [r13+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [r8+208]
mov QWORD PTR [r13+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [r8+216]
mov QWORD PTR [r13+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [r8+224]
mov QWORD PTR [r13+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [r8+232]
mov QWORD PTR [r13+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [r8+240]
mov QWORD PTR [r13+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [r8+248]
mov QWORD PTR [r13+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r13+248], r9
adc rdi, 0
mov QWORD PTR [rsp+1568], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
lea rcx, QWORD PTR [rsp+512]
add r8, 256
add rdx, 256
call sp_2048_mul_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
call sp_2048_mul_32
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
ENDIF
mov r15, QWORD PTR [rsp+1560]
mov rdi, QWORD PTR [rsp+1568]
mov rsi, QWORD PTR [rsp+1536]
mov r11, r15
lea r12, QWORD PTR [rsp+1024]
lea r13, QWORD PTR [rsp+1280]
and r11, rdi
neg r15
neg rdi
add rsi, 512
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
and rax, rdi
and r9, r15
mov QWORD PTR [r12], rax
mov QWORD PTR [r13], r9
mov rax, QWORD PTR [r12+8]
mov r9, QWORD PTR [r13+8]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+8], rax
mov QWORD PTR [r13+8], r9
mov rax, QWORD PTR [r12+16]
mov r9, QWORD PTR [r13+16]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+16], rax
mov QWORD PTR [r13+16], r9
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+24], rax
mov QWORD PTR [r13+24], r9
mov rax, QWORD PTR [r12+32]
mov r9, QWORD PTR [r13+32]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+32], rax
mov QWORD PTR [r13+32], r9
mov rax, QWORD PTR [r12+40]
mov r9, QWORD PTR [r13+40]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+40], rax
mov QWORD PTR [r13+40], r9
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+48], rax
mov QWORD PTR [r13+48], r9
mov rax, QWORD PTR [r12+56]
mov r9, QWORD PTR [r13+56]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+56], rax
mov QWORD PTR [r13+56], r9
mov rax, QWORD PTR [r12+64]
mov r9, QWORD PTR [r13+64]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+64], rax
mov QWORD PTR [r13+64], r9
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+72], rax
mov QWORD PTR [r13+72], r9
mov rax, QWORD PTR [r12+80]
mov r9, QWORD PTR [r13+80]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+80], rax
mov QWORD PTR [r13+80], r9
mov rax, QWORD PTR [r12+88]
mov r9, QWORD PTR [r13+88]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+88], rax
mov QWORD PTR [r13+88], r9
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+96], rax
mov QWORD PTR [r13+96], r9
mov rax, QWORD PTR [r12+104]
mov r9, QWORD PTR [r13+104]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+104], rax
mov QWORD PTR [r13+104], r9
mov rax, QWORD PTR [r12+112]
mov r9, QWORD PTR [r13+112]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+112], rax
mov QWORD PTR [r13+112], r9
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+120], rax
mov QWORD PTR [r13+120], r9
mov rax, QWORD PTR [r12+128]
mov r9, QWORD PTR [r13+128]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+128], rax
mov QWORD PTR [r13+128], r9
mov rax, QWORD PTR [r12+136]
mov r9, QWORD PTR [r13+136]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+136], rax
mov QWORD PTR [r13+136], r9
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+144], rax
mov QWORD PTR [r13+144], r9
mov rax, QWORD PTR [r12+152]
mov r9, QWORD PTR [r13+152]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+152], rax
mov QWORD PTR [r13+152], r9
mov rax, QWORD PTR [r12+160]
mov r9, QWORD PTR [r13+160]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+160], rax
mov QWORD PTR [r13+160], r9
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+168], rax
mov QWORD PTR [r13+168], r9
mov rax, QWORD PTR [r12+176]
mov r9, QWORD PTR [r13+176]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+176], rax
mov QWORD PTR [r13+176], r9
mov rax, QWORD PTR [r12+184]
mov r9, QWORD PTR [r13+184]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+184], rax
mov QWORD PTR [r13+184], r9
mov rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [r13+192]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+192], rax
mov QWORD PTR [r13+192], r9
mov rax, QWORD PTR [r12+200]
mov r9, QWORD PTR [r13+200]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+200], rax
mov QWORD PTR [r13+200], r9
mov rax, QWORD PTR [r12+208]
mov r9, QWORD PTR [r13+208]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+208], rax
mov QWORD PTR [r13+208], r9
mov rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [r13+216]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+216], rax
mov QWORD PTR [r13+216], r9
mov rax, QWORD PTR [r12+224]
mov r9, QWORD PTR [r13+224]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+224], rax
mov QWORD PTR [r13+224], r9
mov rax, QWORD PTR [r12+232]
mov r9, QWORD PTR [r13+232]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+232], rax
mov QWORD PTR [r13+232], r9
mov rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [r13+240]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+240], rax
mov QWORD PTR [r13+240], r9
mov rax, QWORD PTR [r12+248]
mov r9, QWORD PTR [r13+248]
and rax, rdi
and r9, r15
mov QWORD PTR [r12+248], rax
mov QWORD PTR [r13+248], r9
mov rax, QWORD PTR [r12]
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r13+248]
mov QWORD PTR [rsi+248], r9
adc r11, 0
lea r13, QWORD PTR [rsp+512]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [r13+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [r13+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [r13+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [r13+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [r13+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [r13+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [r13+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [r13+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [r13+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [r13+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [r13+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [r13+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [r13+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [r13+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [r13+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [r13+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [rcx+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [rcx+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [rcx+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [rcx+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [rcx+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [rcx+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [rcx+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [rcx+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [rcx+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [rcx+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [rcx+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [rcx+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [rcx+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [rcx+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [rcx+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [rcx+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
sub rsi, 256
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov rax, QWORD PTR [rsi+384]
mov QWORD PTR [rsi+376], r10
adc rax, QWORD PTR [r12+384]
mov r9, QWORD PTR [rsi+392]
mov QWORD PTR [rsi+384], rax
adc r9, QWORD PTR [r12+392]
mov r10, QWORD PTR [rsi+400]
mov QWORD PTR [rsi+392], r9
adc r10, QWORD PTR [r12+400]
mov rax, QWORD PTR [rsi+408]
mov QWORD PTR [rsi+400], r10
adc rax, QWORD PTR [r12+408]
mov r9, QWORD PTR [rsi+416]
mov QWORD PTR [rsi+408], rax
adc r9, QWORD PTR [r12+416]
mov r10, QWORD PTR [rsi+424]
mov QWORD PTR [rsi+416], r9
adc r10, QWORD PTR [r12+424]
mov rax, QWORD PTR [rsi+432]
mov QWORD PTR [rsi+424], r10
adc rax, QWORD PTR [r12+432]
mov r9, QWORD PTR [rsi+440]
mov QWORD PTR [rsi+432], rax
adc r9, QWORD PTR [r12+440]
mov r10, QWORD PTR [rsi+448]
mov QWORD PTR [rsi+440], r9
adc r10, QWORD PTR [r12+448]
mov rax, QWORD PTR [rsi+456]
mov QWORD PTR [rsi+448], r10
adc rax, QWORD PTR [r12+456]
mov r9, QWORD PTR [rsi+464]
mov QWORD PTR [rsi+456], rax
adc r9, QWORD PTR [r12+464]
mov r10, QWORD PTR [rsi+472]
mov QWORD PTR [rsi+464], r9
adc r10, QWORD PTR [r12+472]
mov rax, QWORD PTR [rsi+480]
mov QWORD PTR [rsi+472], r10
adc rax, QWORD PTR [r12+480]
mov r9, QWORD PTR [rsi+488]
mov QWORD PTR [rsi+480], rax
adc r9, QWORD PTR [r12+488]
mov r10, QWORD PTR [rsi+496]
mov QWORD PTR [rsi+488], r9
adc r10, QWORD PTR [r12+496]
mov rax, QWORD PTR [rsi+504]
mov QWORD PTR [rsi+496], r10
adc rax, QWORD PTR [r12+504]
mov QWORD PTR [rsi+504], rax
adc r11, 0
mov QWORD PTR [rcx+768], r11
add rsi, 256
; Add
mov rax, QWORD PTR [rsi]
xor r11, r11
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r13+256]
mov QWORD PTR [rsi+256], r10
adc r11, 0
; Add to zero
mov rax, QWORD PTR [r13+264]
adc rax, 0
mov r9, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], rax
adc r9, 0
mov r10, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], r9
adc r10, 0
mov rax, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r10
adc rax, 0
mov r9, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], rax
adc r9, 0
mov r10, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], r9
adc r10, 0
mov rax, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r10
adc rax, 0
mov r9, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], rax
adc r9, 0
mov r10, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], r9
adc r10, 0
mov rax, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r10
adc rax, 0
mov r9, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], rax
adc r9, 0
mov r10, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], r9
adc r10, 0
mov rax, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r10
adc rax, 0
mov r9, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], rax
adc r9, 0
mov r10, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], r9
adc r10, 0
mov rax, QWORD PTR [r13+384]
mov QWORD PTR [rsi+376], r10
adc rax, 0
mov r9, QWORD PTR [r13+392]
mov QWORD PTR [rsi+384], rax
adc r9, 0
mov r10, QWORD PTR [r13+400]
mov QWORD PTR [rsi+392], r9
adc r10, 0
mov rax, QWORD PTR [r13+408]
mov QWORD PTR [rsi+400], r10
adc rax, 0
mov r9, QWORD PTR [r13+416]
mov QWORD PTR [rsi+408], rax
adc r9, 0
mov r10, QWORD PTR [r13+424]
mov QWORD PTR [rsi+416], r9
adc r10, 0
mov rax, QWORD PTR [r13+432]
mov QWORD PTR [rsi+424], r10
adc rax, 0
mov r9, QWORD PTR [r13+440]
mov QWORD PTR [rsi+432], rax
adc r9, 0
mov r10, QWORD PTR [r13+448]
mov QWORD PTR [rsi+440], r9
adc r10, 0
mov rax, QWORD PTR [r13+456]
mov QWORD PTR [rsi+448], r10
adc rax, 0
mov r9, QWORD PTR [r13+464]
mov QWORD PTR [rsi+456], rax
adc r9, 0
mov r10, QWORD PTR [r13+472]
mov QWORD PTR [rsi+464], r9
adc r10, 0
mov rax, QWORD PTR [r13+480]
mov QWORD PTR [rsi+472], r10
adc rax, 0
mov r9, QWORD PTR [r13+488]
mov QWORD PTR [rsi+480], rax
adc r9, 0
mov r10, QWORD PTR [r13+496]
mov QWORD PTR [rsi+488], r9
adc r10, 0
mov rax, QWORD PTR [r13+504]
mov QWORD PTR [rsi+496], r10
adc rax, 0
mov QWORD PTR [rsi+504], rax
add rsp, 1576
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mul_64 ENDP
_text ENDS
; /* Add a to a into r. (r = a + a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_2048_dbl_32 PROC
mov r8, QWORD PTR [rdx]
xor rax, rax
add r8, r8
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r8
adc r9, r9
mov r8, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r9
adc r8, r8
mov r9, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r8
adc r9, r9
mov r8, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r9
adc r8, r8
mov r9, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r8
adc r9, r9
mov r8, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r9
adc r8, r8
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r8
adc r9, r9
mov r8, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r9
adc r8, r8
mov r9, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r8
adc r9, r9
mov r8, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r9
adc r8, r8
mov r9, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r8
adc r9, r9
mov r8, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r9
adc r8, r8
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r8
adc r9, r9
mov r8, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r9
adc r8, r8
mov r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r8
adc r9, r9
mov r8, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r9
adc r8, r8
mov r9, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r8
adc r9, r9
mov r8, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r9
adc r8, r8
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r8
adc r9, r9
mov r8, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r9
adc r8, r8
mov r9, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r8
adc r9, r9
mov r8, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r9
adc r8, r8
mov r9, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r8
adc r9, r9
mov r8, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r9
adc r8, r8
mov r9, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r8
adc r9, r9
mov r8, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r9
adc r8, r8
mov r9, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r8
adc r9, r9
mov r8, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r9
adc r8, r8
mov r9, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r8
adc r9, r9
mov r8, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r9
adc r8, r8
mov r9, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r8
adc r9, r9
mov QWORD PTR [rcx+248], r9
adc rax, 0
ret
sp_2048_dbl_32 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sqr_64 PROC
push r12
sub rsp, 1304
mov QWORD PTR [rsp+1280], rcx
mov QWORD PTR [rsp+1288], rdx
lea r10, QWORD PTR [rsp+1024]
lea r11, QWORD PTR [rdx+256]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
adc rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
adc r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
adc rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
adc r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
adc rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
adc r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
adc rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
adc r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
adc rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
adc r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
adc rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
adc r8, QWORD PTR [r11+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r10+184], r8
adc rax, QWORD PTR [r11+192]
mov r8, QWORD PTR [rdx+200]
mov QWORD PTR [r10+192], rax
adc r8, QWORD PTR [r11+200]
mov rax, QWORD PTR [rdx+208]
mov QWORD PTR [r10+200], r8
adc rax, QWORD PTR [r11+208]
mov r8, QWORD PTR [rdx+216]
mov QWORD PTR [r10+208], rax
adc r8, QWORD PTR [r11+216]
mov rax, QWORD PTR [rdx+224]
mov QWORD PTR [r10+216], r8
adc rax, QWORD PTR [r11+224]
mov r8, QWORD PTR [rdx+232]
mov QWORD PTR [r10+224], rax
adc r8, QWORD PTR [r11+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r10+232], r8
adc rax, QWORD PTR [r11+240]
mov r8, QWORD PTR [rdx+248]
mov QWORD PTR [r10+240], rax
adc r8, QWORD PTR [r11+248]
mov QWORD PTR [r10+248], r8
adc r9, 0
mov QWORD PTR [rsp+1296], r9
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_32
mov rdx, QWORD PTR [rsp+1288]
lea rcx, QWORD PTR [rsp+512]
add rdx, 256
call sp_2048_sqr_32
mov rdx, QWORD PTR [rsp+1288]
mov rcx, QWORD PTR [rsp+1280]
call sp_2048_sqr_32
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+1288]
mov rcx, QWORD PTR [rsp+1280]
ENDIF
mov r12, QWORD PTR [rsp+1296]
lea r10, QWORD PTR [rsp+1024]
mov r9, r12
neg r12
mov rax, QWORD PTR [r10]
mov r8, QWORD PTR [r10+8]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+512], rax
mov QWORD PTR [rcx+520], r8
mov rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [r10+24]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+528], rax
mov QWORD PTR [rcx+536], r8
mov rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [r10+40]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+544], rax
mov QWORD PTR [rcx+552], r8
mov rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [r10+56]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+560], rax
mov QWORD PTR [rcx+568], r8
mov rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [r10+72]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+576], rax
mov QWORD PTR [rcx+584], r8
mov rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [r10+88]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+592], rax
mov QWORD PTR [rcx+600], r8
mov rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [r10+104]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+608], rax
mov QWORD PTR [rcx+616], r8
mov rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [r10+120]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+624], rax
mov QWORD PTR [rcx+632], r8
mov rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [r10+136]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+640], rax
mov QWORD PTR [rcx+648], r8
mov rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [r10+152]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+656], rax
mov QWORD PTR [rcx+664], r8
mov rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [r10+168]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+672], rax
mov QWORD PTR [rcx+680], r8
mov rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [r10+184]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+688], rax
mov QWORD PTR [rcx+696], r8
mov rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [r10+200]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+704], rax
mov QWORD PTR [rcx+712], r8
mov rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [r10+216]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+720], rax
mov QWORD PTR [rcx+728], r8
mov rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [r10+232]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+736], rax
mov QWORD PTR [rcx+744], r8
mov rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [r10+248]
and rax, r12
and r8, r12
mov QWORD PTR [rcx+752], rax
mov QWORD PTR [rcx+760], r8
mov rax, QWORD PTR [rcx+512]
add rax, rax
mov r8, QWORD PTR [rcx+520]
mov QWORD PTR [rcx+512], rax
adc r8, r8
mov rax, QWORD PTR [rcx+528]
mov QWORD PTR [rcx+520], r8
adc rax, rax
mov r8, QWORD PTR [rcx+536]
mov QWORD PTR [rcx+528], rax
adc r8, r8
mov rax, QWORD PTR [rcx+544]
mov QWORD PTR [rcx+536], r8
adc rax, rax
mov r8, QWORD PTR [rcx+552]
mov QWORD PTR [rcx+544], rax
adc r8, r8
mov rax, QWORD PTR [rcx+560]
mov QWORD PTR [rcx+552], r8
adc rax, rax
mov r8, QWORD PTR [rcx+568]
mov QWORD PTR [rcx+560], rax
adc r8, r8
mov rax, QWORD PTR [rcx+576]
mov QWORD PTR [rcx+568], r8
adc rax, rax
mov r8, QWORD PTR [rcx+584]
mov QWORD PTR [rcx+576], rax
adc r8, r8
mov rax, QWORD PTR [rcx+592]
mov QWORD PTR [rcx+584], r8
adc rax, rax
mov r8, QWORD PTR [rcx+600]
mov QWORD PTR [rcx+592], rax
adc r8, r8
mov rax, QWORD PTR [rcx+608]
mov QWORD PTR [rcx+600], r8
adc rax, rax
mov r8, QWORD PTR [rcx+616]
mov QWORD PTR [rcx+608], rax
adc r8, r8
mov rax, QWORD PTR [rcx+624]
mov QWORD PTR [rcx+616], r8
adc rax, rax
mov r8, QWORD PTR [rcx+632]
mov QWORD PTR [rcx+624], rax
adc r8, r8
mov rax, QWORD PTR [rcx+640]
mov QWORD PTR [rcx+632], r8
adc rax, rax
mov r8, QWORD PTR [rcx+648]
mov QWORD PTR [rcx+640], rax
adc r8, r8
mov rax, QWORD PTR [rcx+656]
mov QWORD PTR [rcx+648], r8
adc rax, rax
mov r8, QWORD PTR [rcx+664]
mov QWORD PTR [rcx+656], rax
adc r8, r8
mov rax, QWORD PTR [rcx+672]
mov QWORD PTR [rcx+664], r8
adc rax, rax
mov r8, QWORD PTR [rcx+680]
mov QWORD PTR [rcx+672], rax
adc r8, r8
mov rax, QWORD PTR [rcx+688]
mov QWORD PTR [rcx+680], r8
adc rax, rax
mov r8, QWORD PTR [rcx+696]
mov QWORD PTR [rcx+688], rax
adc r8, r8
mov rax, QWORD PTR [rcx+704]
mov QWORD PTR [rcx+696], r8
adc rax, rax
mov r8, QWORD PTR [rcx+712]
mov QWORD PTR [rcx+704], rax
adc r8, r8
mov rax, QWORD PTR [rcx+720]
mov QWORD PTR [rcx+712], r8
adc rax, rax
mov r8, QWORD PTR [rcx+728]
mov QWORD PTR [rcx+720], rax
adc r8, r8
mov rax, QWORD PTR [rcx+736]
mov QWORD PTR [rcx+728], r8
adc rax, rax
mov r8, QWORD PTR [rcx+744]
mov QWORD PTR [rcx+736], rax
adc r8, r8
mov rax, QWORD PTR [rcx+752]
mov QWORD PTR [rcx+744], r8
adc rax, rax
mov r8, QWORD PTR [rcx+760]
mov QWORD PTR [rcx+752], rax
adc r8, r8
mov QWORD PTR [rcx+760], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+512]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rdx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rdx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rdx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rdx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rdx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rdx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rdx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rdx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rdx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rdx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rdx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rdx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rdx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rdx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rdx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rdx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rdx+376]
mov rax, QWORD PTR [r10+384]
mov QWORD PTR [r10+376], r8
sbb rax, QWORD PTR [rdx+384]
mov r8, QWORD PTR [r10+392]
mov QWORD PTR [r10+384], rax
sbb r8, QWORD PTR [rdx+392]
mov rax, QWORD PTR [r10+400]
mov QWORD PTR [r10+392], r8
sbb rax, QWORD PTR [rdx+400]
mov r8, QWORD PTR [r10+408]
mov QWORD PTR [r10+400], rax
sbb r8, QWORD PTR [rdx+408]
mov rax, QWORD PTR [r10+416]
mov QWORD PTR [r10+408], r8
sbb rax, QWORD PTR [rdx+416]
mov r8, QWORD PTR [r10+424]
mov QWORD PTR [r10+416], rax
sbb r8, QWORD PTR [rdx+424]
mov rax, QWORD PTR [r10+432]
mov QWORD PTR [r10+424], r8
sbb rax, QWORD PTR [rdx+432]
mov r8, QWORD PTR [r10+440]
mov QWORD PTR [r10+432], rax
sbb r8, QWORD PTR [rdx+440]
mov rax, QWORD PTR [r10+448]
mov QWORD PTR [r10+440], r8
sbb rax, QWORD PTR [rdx+448]
mov r8, QWORD PTR [r10+456]
mov QWORD PTR [r10+448], rax
sbb r8, QWORD PTR [rdx+456]
mov rax, QWORD PTR [r10+464]
mov QWORD PTR [r10+456], r8
sbb rax, QWORD PTR [rdx+464]
mov r8, QWORD PTR [r10+472]
mov QWORD PTR [r10+464], rax
sbb r8, QWORD PTR [rdx+472]
mov rax, QWORD PTR [r10+480]
mov QWORD PTR [r10+472], r8
sbb rax, QWORD PTR [rdx+480]
mov r8, QWORD PTR [r10+488]
mov QWORD PTR [r10+480], rax
sbb r8, QWORD PTR [rdx+488]
mov rax, QWORD PTR [r10+496]
mov QWORD PTR [r10+488], r8
sbb rax, QWORD PTR [rdx+496]
mov r8, QWORD PTR [r10+504]
mov QWORD PTR [r10+496], rax
sbb r8, QWORD PTR [rdx+504]
mov QWORD PTR [r10+504], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rcx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rcx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rcx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rcx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rcx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rcx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rcx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rcx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rcx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rcx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rcx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rcx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rcx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rcx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rcx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rcx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rcx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rcx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rcx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rcx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rcx+376]
mov rax, QWORD PTR [r10+384]
mov QWORD PTR [r10+376], r8
sbb rax, QWORD PTR [rcx+384]
mov r8, QWORD PTR [r10+392]
mov QWORD PTR [r10+384], rax
sbb r8, QWORD PTR [rcx+392]
mov rax, QWORD PTR [r10+400]
mov QWORD PTR [r10+392], r8
sbb rax, QWORD PTR [rcx+400]
mov r8, QWORD PTR [r10+408]
mov QWORD PTR [r10+400], rax
sbb r8, QWORD PTR [rcx+408]
mov rax, QWORD PTR [r10+416]
mov QWORD PTR [r10+408], r8
sbb rax, QWORD PTR [rcx+416]
mov r8, QWORD PTR [r10+424]
mov QWORD PTR [r10+416], rax
sbb r8, QWORD PTR [rcx+424]
mov rax, QWORD PTR [r10+432]
mov QWORD PTR [r10+424], r8
sbb rax, QWORD PTR [rcx+432]
mov r8, QWORD PTR [r10+440]
mov QWORD PTR [r10+432], rax
sbb r8, QWORD PTR [rcx+440]
mov rax, QWORD PTR [r10+448]
mov QWORD PTR [r10+440], r8
sbb rax, QWORD PTR [rcx+448]
mov r8, QWORD PTR [r10+456]
mov QWORD PTR [r10+448], rax
sbb r8, QWORD PTR [rcx+456]
mov rax, QWORD PTR [r10+464]
mov QWORD PTR [r10+456], r8
sbb rax, QWORD PTR [rcx+464]
mov r8, QWORD PTR [r10+472]
mov QWORD PTR [r10+464], rax
sbb r8, QWORD PTR [rcx+472]
mov rax, QWORD PTR [r10+480]
mov QWORD PTR [r10+472], r8
sbb rax, QWORD PTR [rcx+480]
mov r8, QWORD PTR [r10+488]
mov QWORD PTR [r10+480], rax
sbb r8, QWORD PTR [rcx+488]
mov rax, QWORD PTR [r10+496]
mov QWORD PTR [r10+488], r8
sbb rax, QWORD PTR [rcx+496]
mov r8, QWORD PTR [r10+504]
mov QWORD PTR [r10+496], rax
sbb r8, QWORD PTR [rcx+504]
mov QWORD PTR [r10+504], r8
sbb r9, 0
; Add in place
mov rax, QWORD PTR [rcx+256]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [rcx+384]
mov QWORD PTR [rcx+376], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [rcx+392]
mov QWORD PTR [rcx+384], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [rcx+400]
mov QWORD PTR [rcx+392], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [rcx+408]
mov QWORD PTR [rcx+400], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [rcx+416]
mov QWORD PTR [rcx+408], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [rcx+424]
mov QWORD PTR [rcx+416], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [rcx+432]
mov QWORD PTR [rcx+424], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [rcx+440]
mov QWORD PTR [rcx+432], rax
adc r8, QWORD PTR [r10+184]
mov rax, QWORD PTR [rcx+448]
mov QWORD PTR [rcx+440], r8
adc rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [rcx+456]
mov QWORD PTR [rcx+448], rax
adc r8, QWORD PTR [r10+200]
mov rax, QWORD PTR [rcx+464]
mov QWORD PTR [rcx+456], r8
adc rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [rcx+472]
mov QWORD PTR [rcx+464], rax
adc r8, QWORD PTR [r10+216]
mov rax, QWORD PTR [rcx+480]
mov QWORD PTR [rcx+472], r8
adc rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [rcx+488]
mov QWORD PTR [rcx+480], rax
adc r8, QWORD PTR [r10+232]
mov rax, QWORD PTR [rcx+496]
mov QWORD PTR [rcx+488], r8
adc rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [rcx+504]
mov QWORD PTR [rcx+496], rax
adc r8, QWORD PTR [r10+248]
mov rax, QWORD PTR [rcx+512]
mov QWORD PTR [rcx+504], r8
adc rax, QWORD PTR [r10+256]
mov r8, QWORD PTR [rcx+520]
mov QWORD PTR [rcx+512], rax
adc r8, QWORD PTR [r10+264]
mov rax, QWORD PTR [rcx+528]
mov QWORD PTR [rcx+520], r8
adc rax, QWORD PTR [r10+272]
mov r8, QWORD PTR [rcx+536]
mov QWORD PTR [rcx+528], rax
adc r8, QWORD PTR [r10+280]
mov rax, QWORD PTR [rcx+544]
mov QWORD PTR [rcx+536], r8
adc rax, QWORD PTR [r10+288]
mov r8, QWORD PTR [rcx+552]
mov QWORD PTR [rcx+544], rax
adc r8, QWORD PTR [r10+296]
mov rax, QWORD PTR [rcx+560]
mov QWORD PTR [rcx+552], r8
adc rax, QWORD PTR [r10+304]
mov r8, QWORD PTR [rcx+568]
mov QWORD PTR [rcx+560], rax
adc r8, QWORD PTR [r10+312]
mov rax, QWORD PTR [rcx+576]
mov QWORD PTR [rcx+568], r8
adc rax, QWORD PTR [r10+320]
mov r8, QWORD PTR [rcx+584]
mov QWORD PTR [rcx+576], rax
adc r8, QWORD PTR [r10+328]
mov rax, QWORD PTR [rcx+592]
mov QWORD PTR [rcx+584], r8
adc rax, QWORD PTR [r10+336]
mov r8, QWORD PTR [rcx+600]
mov QWORD PTR [rcx+592], rax
adc r8, QWORD PTR [r10+344]
mov rax, QWORD PTR [rcx+608]
mov QWORD PTR [rcx+600], r8
adc rax, QWORD PTR [r10+352]
mov r8, QWORD PTR [rcx+616]
mov QWORD PTR [rcx+608], rax
adc r8, QWORD PTR [r10+360]
mov rax, QWORD PTR [rcx+624]
mov QWORD PTR [rcx+616], r8
adc rax, QWORD PTR [r10+368]
mov r8, QWORD PTR [rcx+632]
mov QWORD PTR [rcx+624], rax
adc r8, QWORD PTR [r10+376]
mov rax, QWORD PTR [rcx+640]
mov QWORD PTR [rcx+632], r8
adc rax, QWORD PTR [r10+384]
mov r8, QWORD PTR [rcx+648]
mov QWORD PTR [rcx+640], rax
adc r8, QWORD PTR [r10+392]
mov rax, QWORD PTR [rcx+656]
mov QWORD PTR [rcx+648], r8
adc rax, QWORD PTR [r10+400]
mov r8, QWORD PTR [rcx+664]
mov QWORD PTR [rcx+656], rax
adc r8, QWORD PTR [r10+408]
mov rax, QWORD PTR [rcx+672]
mov QWORD PTR [rcx+664], r8
adc rax, QWORD PTR [r10+416]
mov r8, QWORD PTR [rcx+680]
mov QWORD PTR [rcx+672], rax
adc r8, QWORD PTR [r10+424]
mov rax, QWORD PTR [rcx+688]
mov QWORD PTR [rcx+680], r8
adc rax, QWORD PTR [r10+432]
mov r8, QWORD PTR [rcx+696]
mov QWORD PTR [rcx+688], rax
adc r8, QWORD PTR [r10+440]
mov rax, QWORD PTR [rcx+704]
mov QWORD PTR [rcx+696], r8
adc rax, QWORD PTR [r10+448]
mov r8, QWORD PTR [rcx+712]
mov QWORD PTR [rcx+704], rax
adc r8, QWORD PTR [r10+456]
mov rax, QWORD PTR [rcx+720]
mov QWORD PTR [rcx+712], r8
adc rax, QWORD PTR [r10+464]
mov r8, QWORD PTR [rcx+728]
mov QWORD PTR [rcx+720], rax
adc r8, QWORD PTR [r10+472]
mov rax, QWORD PTR [rcx+736]
mov QWORD PTR [rcx+728], r8
adc rax, QWORD PTR [r10+480]
mov r8, QWORD PTR [rcx+744]
mov QWORD PTR [rcx+736], rax
adc r8, QWORD PTR [r10+488]
mov rax, QWORD PTR [rcx+752]
mov QWORD PTR [rcx+744], r8
adc rax, QWORD PTR [r10+496]
mov r8, QWORD PTR [rcx+760]
mov QWORD PTR [rcx+752], rax
adc r8, QWORD PTR [r10+504]
mov QWORD PTR [rcx+760], r8
adc r9, 0
mov QWORD PTR [rcx+768], r9
; Add in place
mov rax, QWORD PTR [rcx+512]
xor r9, r9
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rcx+520]
mov QWORD PTR [rcx+512], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [rcx+528]
mov QWORD PTR [rcx+520], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rcx+536]
mov QWORD PTR [rcx+528], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rcx+544]
mov QWORD PTR [rcx+536], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rcx+552]
mov QWORD PTR [rcx+544], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [rcx+560]
mov QWORD PTR [rcx+552], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rcx+568]
mov QWORD PTR [rcx+560], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rcx+576]
mov QWORD PTR [rcx+568], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rcx+584]
mov QWORD PTR [rcx+576], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [rcx+592]
mov QWORD PTR [rcx+584], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rcx+600]
mov QWORD PTR [rcx+592], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rcx+608]
mov QWORD PTR [rcx+600], r8
adc rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rcx+616]
mov QWORD PTR [rcx+608], rax
adc r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [rcx+624]
mov QWORD PTR [rcx+616], r8
adc rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rcx+632]
mov QWORD PTR [rcx+624], rax
adc r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rcx+640]
mov QWORD PTR [rcx+632], r8
adc rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rcx+648]
mov QWORD PTR [rcx+640], rax
adc r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [rcx+656]
mov QWORD PTR [rcx+648], r8
adc rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rcx+664]
mov QWORD PTR [rcx+656], rax
adc r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [rcx+672]
mov QWORD PTR [rcx+664], r8
adc rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rcx+680]
mov QWORD PTR [rcx+672], rax
adc r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [rcx+688]
mov QWORD PTR [rcx+680], r8
adc rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rcx+696]
mov QWORD PTR [rcx+688], rax
adc r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [rcx+704]
mov QWORD PTR [rcx+696], r8
adc rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rcx+712]
mov QWORD PTR [rcx+704], rax
adc r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [rcx+720]
mov QWORD PTR [rcx+712], r8
adc rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rcx+728]
mov QWORD PTR [rcx+720], rax
adc r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [rcx+736]
mov QWORD PTR [rcx+728], r8
adc rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rcx+744]
mov QWORD PTR [rcx+736], rax
adc r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [rcx+752]
mov QWORD PTR [rcx+744], r8
adc rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rcx+760]
mov QWORD PTR [rcx+752], rax
adc r8, QWORD PTR [rdx+248]
mov rax, QWORD PTR [rcx+768]
mov QWORD PTR [rcx+760], r8
adc rax, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+768], rax
adc r9, 0
; Add to zero
mov rax, QWORD PTR [rdx+264]
adc rax, 0
mov r8, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+776], rax
adc r8, 0
mov rax, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+784], r8
adc rax, 0
mov r8, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+792], rax
adc r8, 0
mov rax, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+800], r8
adc rax, 0
mov r8, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+808], rax
adc r8, 0
mov rax, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+816], r8
adc rax, 0
mov r8, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+824], rax
adc r8, 0
mov rax, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+832], r8
adc rax, 0
mov r8, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+840], rax
adc r8, 0
mov rax, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+848], r8
adc rax, 0
mov r8, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+856], rax
adc r8, 0
mov rax, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+864], r8
adc rax, 0
mov r8, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+872], rax
adc r8, 0
mov rax, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+880], r8
adc rax, 0
mov r8, QWORD PTR [rdx+384]
mov QWORD PTR [rcx+888], rax
adc r8, 0
mov rax, QWORD PTR [rdx+392]
mov QWORD PTR [rcx+896], r8
adc rax, 0
mov r8, QWORD PTR [rdx+400]
mov QWORD PTR [rcx+904], rax
adc r8, 0
mov rax, QWORD PTR [rdx+408]
mov QWORD PTR [rcx+912], r8
adc rax, 0
mov r8, QWORD PTR [rdx+416]
mov QWORD PTR [rcx+920], rax
adc r8, 0
mov rax, QWORD PTR [rdx+424]
mov QWORD PTR [rcx+928], r8
adc rax, 0
mov r8, QWORD PTR [rdx+432]
mov QWORD PTR [rcx+936], rax
adc r8, 0
mov rax, QWORD PTR [rdx+440]
mov QWORD PTR [rcx+944], r8
adc rax, 0
mov r8, QWORD PTR [rdx+448]
mov QWORD PTR [rcx+952], rax
adc r8, 0
mov rax, QWORD PTR [rdx+456]
mov QWORD PTR [rcx+960], r8
adc rax, 0
mov r8, QWORD PTR [rdx+464]
mov QWORD PTR [rcx+968], rax
adc r8, 0
mov rax, QWORD PTR [rdx+472]
mov QWORD PTR [rcx+976], r8
adc rax, 0
mov r8, QWORD PTR [rdx+480]
mov QWORD PTR [rcx+984], rax
adc r8, 0
mov rax, QWORD PTR [rdx+488]
mov QWORD PTR [rcx+992], r8
adc rax, 0
mov r8, QWORD PTR [rdx+496]
mov QWORD PTR [rcx+1000], rax
adc r8, 0
mov rax, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+1008], r8
adc rax, 0
mov QWORD PTR [rcx+1016], rax
add rsp, 1304
pop r12
ret
sp_4096_sqr_64 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_avx2_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 1576
mov QWORD PTR [rsp+1536], rcx
mov QWORD PTR [rsp+1544], rdx
mov QWORD PTR [rsp+1552], r8
lea r12, QWORD PTR [rsp+1024]
lea r14, QWORD PTR [rdx+256]
; Add
mov rax, QWORD PTR [rdx]
xor r15, r15
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [r12], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [rdx+16]
mov QWORD PTR [r12+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [rdx+24]
mov QWORD PTR [r12+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [r12+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [r12+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r12+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [rdx+56]
mov QWORD PTR [r12+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [rdx+64]
mov QWORD PTR [r12+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [rdx+72]
mov QWORD PTR [r12+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [r12+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [r12+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r12+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [rdx+104]
mov QWORD PTR [r12+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [rdx+112]
mov QWORD PTR [r12+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [rdx+120]
mov QWORD PTR [r12+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [r12+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [r12+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r12+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [rdx+152]
mov QWORD PTR [r12+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [rdx+160]
mov QWORD PTR [r12+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [rdx+168]
mov QWORD PTR [r12+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [r12+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [r12+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r12+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [rdx+200]
mov QWORD PTR [r12+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [rdx+208]
mov QWORD PTR [r12+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [rdx+216]
mov QWORD PTR [r12+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [r12+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [r12+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r12+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [rdx+248]
mov QWORD PTR [r12+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r12+248], r9
adc r15, 0
mov QWORD PTR [rsp+1560], r15
lea r13, QWORD PTR [rsp+1280]
lea r14, QWORD PTR [r8+256]
; Add
mov rax, QWORD PTR [r8]
xor rdi, rdi
add rax, QWORD PTR [r14]
mov r9, QWORD PTR [r8+8]
mov QWORD PTR [r13], rax
adc r9, QWORD PTR [r14+8]
mov r10, QWORD PTR [r8+16]
mov QWORD PTR [r13+8], r9
adc r10, QWORD PTR [r14+16]
mov rax, QWORD PTR [r8+24]
mov QWORD PTR [r13+16], r10
adc rax, QWORD PTR [r14+24]
mov r9, QWORD PTR [r8+32]
mov QWORD PTR [r13+24], rax
adc r9, QWORD PTR [r14+32]
mov r10, QWORD PTR [r8+40]
mov QWORD PTR [r13+32], r9
adc r10, QWORD PTR [r14+40]
mov rax, QWORD PTR [r8+48]
mov QWORD PTR [r13+40], r10
adc rax, QWORD PTR [r14+48]
mov r9, QWORD PTR [r8+56]
mov QWORD PTR [r13+48], rax
adc r9, QWORD PTR [r14+56]
mov r10, QWORD PTR [r8+64]
mov QWORD PTR [r13+56], r9
adc r10, QWORD PTR [r14+64]
mov rax, QWORD PTR [r8+72]
mov QWORD PTR [r13+64], r10
adc rax, QWORD PTR [r14+72]
mov r9, QWORD PTR [r8+80]
mov QWORD PTR [r13+72], rax
adc r9, QWORD PTR [r14+80]
mov r10, QWORD PTR [r8+88]
mov QWORD PTR [r13+80], r9
adc r10, QWORD PTR [r14+88]
mov rax, QWORD PTR [r8+96]
mov QWORD PTR [r13+88], r10
adc rax, QWORD PTR [r14+96]
mov r9, QWORD PTR [r8+104]
mov QWORD PTR [r13+96], rax
adc r9, QWORD PTR [r14+104]
mov r10, QWORD PTR [r8+112]
mov QWORD PTR [r13+104], r9
adc r10, QWORD PTR [r14+112]
mov rax, QWORD PTR [r8+120]
mov QWORD PTR [r13+112], r10
adc rax, QWORD PTR [r14+120]
mov r9, QWORD PTR [r8+128]
mov QWORD PTR [r13+120], rax
adc r9, QWORD PTR [r14+128]
mov r10, QWORD PTR [r8+136]
mov QWORD PTR [r13+128], r9
adc r10, QWORD PTR [r14+136]
mov rax, QWORD PTR [r8+144]
mov QWORD PTR [r13+136], r10
adc rax, QWORD PTR [r14+144]
mov r9, QWORD PTR [r8+152]
mov QWORD PTR [r13+144], rax
adc r9, QWORD PTR [r14+152]
mov r10, QWORD PTR [r8+160]
mov QWORD PTR [r13+152], r9
adc r10, QWORD PTR [r14+160]
mov rax, QWORD PTR [r8+168]
mov QWORD PTR [r13+160], r10
adc rax, QWORD PTR [r14+168]
mov r9, QWORD PTR [r8+176]
mov QWORD PTR [r13+168], rax
adc r9, QWORD PTR [r14+176]
mov r10, QWORD PTR [r8+184]
mov QWORD PTR [r13+176], r9
adc r10, QWORD PTR [r14+184]
mov rax, QWORD PTR [r8+192]
mov QWORD PTR [r13+184], r10
adc rax, QWORD PTR [r14+192]
mov r9, QWORD PTR [r8+200]
mov QWORD PTR [r13+192], rax
adc r9, QWORD PTR [r14+200]
mov r10, QWORD PTR [r8+208]
mov QWORD PTR [r13+200], r9
adc r10, QWORD PTR [r14+208]
mov rax, QWORD PTR [r8+216]
mov QWORD PTR [r13+208], r10
adc rax, QWORD PTR [r14+216]
mov r9, QWORD PTR [r8+224]
mov QWORD PTR [r13+216], rax
adc r9, QWORD PTR [r14+224]
mov r10, QWORD PTR [r8+232]
mov QWORD PTR [r13+224], r9
adc r10, QWORD PTR [r14+232]
mov rax, QWORD PTR [r8+240]
mov QWORD PTR [r13+232], r10
adc rax, QWORD PTR [r14+240]
mov r9, QWORD PTR [r8+248]
mov QWORD PTR [r13+240], rax
adc r9, QWORD PTR [r14+248]
mov QWORD PTR [r13+248], r9
adc rdi, 0
mov QWORD PTR [rsp+1568], rdi
mov r8, r13
mov rdx, r12
mov rcx, rsp
call sp_2048_mul_avx2_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
lea rcx, QWORD PTR [rsp+512]
add r8, 256
add rdx, 256
call sp_2048_mul_avx2_32
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
call sp_2048_mul_avx2_32
IFDEF _WIN64
mov r8, QWORD PTR [rsp+1552]
mov rdx, QWORD PTR [rsp+1544]
mov rcx, QWORD PTR [rsp+1536]
ENDIF
mov r15, QWORD PTR [rsp+1560]
mov rdi, QWORD PTR [rsp+1568]
mov rsi, QWORD PTR [rsp+1536]
mov r11, r15
lea r12, QWORD PTR [rsp+1024]
lea r13, QWORD PTR [rsp+1280]
and r11, rdi
neg r15
neg rdi
add rsi, 512
mov rax, QWORD PTR [r12]
mov r9, QWORD PTR [r13]
pext rax, rax, rdi
pext r9, r9, r15
add rax, r9
mov r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [r13+8]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi], rax
adc r9, r10
mov r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [r13+16]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+8], r9
adc r10, rax
mov rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [r13+24]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+16], r10
adc rax, r9
mov r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [r13+32]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+24], rax
adc r9, r10
mov r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [r13+40]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+32], r9
adc r10, rax
mov rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [r13+48]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+40], r10
adc rax, r9
mov r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [r13+56]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+48], rax
adc r9, r10
mov r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [r13+64]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+56], r9
adc r10, rax
mov rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [r13+72]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+64], r10
adc rax, r9
mov r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [r13+80]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+72], rax
adc r9, r10
mov r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [r13+88]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+80], r9
adc r10, rax
mov rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [r13+96]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+88], r10
adc rax, r9
mov r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [r13+104]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+96], rax
adc r9, r10
mov r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [r13+112]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+104], r9
adc r10, rax
mov rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [r13+120]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+112], r10
adc rax, r9
mov r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [r13+128]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+120], rax
adc r9, r10
mov r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [r13+136]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+128], r9
adc r10, rax
mov rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [r13+144]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+136], r10
adc rax, r9
mov r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [r13+152]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+144], rax
adc r9, r10
mov r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [r13+160]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+152], r9
adc r10, rax
mov rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [r13+168]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+160], r10
adc rax, r9
mov r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [r13+176]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+168], rax
adc r9, r10
mov r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [r13+184]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+176], r9
adc r10, rax
mov rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [r13+192]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+184], r10
adc rax, r9
mov r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [r13+200]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+192], rax
adc r9, r10
mov r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [r13+208]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+200], r9
adc r10, rax
mov rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [r13+216]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+208], r10
adc rax, r9
mov r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [r13+224]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+216], rax
adc r9, r10
mov r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [r13+232]
pext r10, r10, rdi
pext rax, rax, r15
mov QWORD PTR [rsi+224], r9
adc r10, rax
mov rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [r13+240]
pext rax, rax, rdi
pext r9, r9, r15
mov QWORD PTR [rsi+232], r10
adc rax, r9
mov r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [r13+248]
pext r9, r9, rdi
pext r10, r10, r15
mov QWORD PTR [rsi+240], rax
adc r9, r10
mov QWORD PTR [rsi+248], r9
adc r11, 0
lea r13, QWORD PTR [rsp+512]
mov r12, rsp
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [r13]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [r13+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [r13+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [r13+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [r13+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [r13+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [r13+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [r13+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [r13+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [r13+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [r13+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [r13+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [r13+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [r13+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [r13+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [r13+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [r13+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [r13+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [r13+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [r13+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [r13+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [r13+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [r13+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [r13+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [r13+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [r13+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [r13+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [r13+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [r13+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [r13+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [r13+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [r13+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [r13+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
mov rax, QWORD PTR [r12]
sub rax, QWORD PTR [rcx]
mov r9, QWORD PTR [r12+8]
mov QWORD PTR [r12], rax
sbb r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [r12+16]
mov QWORD PTR [r12+8], r9
sbb r10, QWORD PTR [rcx+16]
mov rax, QWORD PTR [r12+24]
mov QWORD PTR [r12+16], r10
sbb rax, QWORD PTR [rcx+24]
mov r9, QWORD PTR [r12+32]
mov QWORD PTR [r12+24], rax
sbb r9, QWORD PTR [rcx+32]
mov r10, QWORD PTR [r12+40]
mov QWORD PTR [r12+32], r9
sbb r10, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r12+48]
mov QWORD PTR [r12+40], r10
sbb rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [r12+56]
mov QWORD PTR [r12+48], rax
sbb r9, QWORD PTR [rcx+56]
mov r10, QWORD PTR [r12+64]
mov QWORD PTR [r12+56], r9
sbb r10, QWORD PTR [rcx+64]
mov rax, QWORD PTR [r12+72]
mov QWORD PTR [r12+64], r10
sbb rax, QWORD PTR [rcx+72]
mov r9, QWORD PTR [r12+80]
mov QWORD PTR [r12+72], rax
sbb r9, QWORD PTR [rcx+80]
mov r10, QWORD PTR [r12+88]
mov QWORD PTR [r12+80], r9
sbb r10, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r12+96]
mov QWORD PTR [r12+88], r10
sbb rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [r12+104]
mov QWORD PTR [r12+96], rax
sbb r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [r12+112]
mov QWORD PTR [r12+104], r9
sbb r10, QWORD PTR [rcx+112]
mov rax, QWORD PTR [r12+120]
mov QWORD PTR [r12+112], r10
sbb rax, QWORD PTR [rcx+120]
mov r9, QWORD PTR [r12+128]
mov QWORD PTR [r12+120], rax
sbb r9, QWORD PTR [rcx+128]
mov r10, QWORD PTR [r12+136]
mov QWORD PTR [r12+128], r9
sbb r10, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r12+144]
mov QWORD PTR [r12+136], r10
sbb rax, QWORD PTR [rcx+144]
mov r9, QWORD PTR [r12+152]
mov QWORD PTR [r12+144], rax
sbb r9, QWORD PTR [rcx+152]
mov r10, QWORD PTR [r12+160]
mov QWORD PTR [r12+152], r9
sbb r10, QWORD PTR [rcx+160]
mov rax, QWORD PTR [r12+168]
mov QWORD PTR [r12+160], r10
sbb rax, QWORD PTR [rcx+168]
mov r9, QWORD PTR [r12+176]
mov QWORD PTR [r12+168], rax
sbb r9, QWORD PTR [rcx+176]
mov r10, QWORD PTR [r12+184]
mov QWORD PTR [r12+176], r9
sbb r10, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r12+192]
mov QWORD PTR [r12+184], r10
sbb rax, QWORD PTR [rcx+192]
mov r9, QWORD PTR [r12+200]
mov QWORD PTR [r12+192], rax
sbb r9, QWORD PTR [rcx+200]
mov r10, QWORD PTR [r12+208]
mov QWORD PTR [r12+200], r9
sbb r10, QWORD PTR [rcx+208]
mov rax, QWORD PTR [r12+216]
mov QWORD PTR [r12+208], r10
sbb rax, QWORD PTR [rcx+216]
mov r9, QWORD PTR [r12+224]
mov QWORD PTR [r12+216], rax
sbb r9, QWORD PTR [rcx+224]
mov r10, QWORD PTR [r12+232]
mov QWORD PTR [r12+224], r9
sbb r10, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r12+240]
mov QWORD PTR [r12+232], r10
sbb rax, QWORD PTR [rcx+240]
mov r9, QWORD PTR [r12+248]
mov QWORD PTR [r12+240], rax
sbb r9, QWORD PTR [rcx+248]
mov r10, QWORD PTR [r12+256]
mov QWORD PTR [r12+248], r9
sbb r10, QWORD PTR [rcx+256]
mov rax, QWORD PTR [r12+264]
mov QWORD PTR [r12+256], r10
sbb rax, QWORD PTR [rcx+264]
mov r9, QWORD PTR [r12+272]
mov QWORD PTR [r12+264], rax
sbb r9, QWORD PTR [rcx+272]
mov r10, QWORD PTR [r12+280]
mov QWORD PTR [r12+272], r9
sbb r10, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r12+288]
mov QWORD PTR [r12+280], r10
sbb rax, QWORD PTR [rcx+288]
mov r9, QWORD PTR [r12+296]
mov QWORD PTR [r12+288], rax
sbb r9, QWORD PTR [rcx+296]
mov r10, QWORD PTR [r12+304]
mov QWORD PTR [r12+296], r9
sbb r10, QWORD PTR [rcx+304]
mov rax, QWORD PTR [r12+312]
mov QWORD PTR [r12+304], r10
sbb rax, QWORD PTR [rcx+312]
mov r9, QWORD PTR [r12+320]
mov QWORD PTR [r12+312], rax
sbb r9, QWORD PTR [rcx+320]
mov r10, QWORD PTR [r12+328]
mov QWORD PTR [r12+320], r9
sbb r10, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r12+336]
mov QWORD PTR [r12+328], r10
sbb rax, QWORD PTR [rcx+336]
mov r9, QWORD PTR [r12+344]
mov QWORD PTR [r12+336], rax
sbb r9, QWORD PTR [rcx+344]
mov r10, QWORD PTR [r12+352]
mov QWORD PTR [r12+344], r9
sbb r10, QWORD PTR [rcx+352]
mov rax, QWORD PTR [r12+360]
mov QWORD PTR [r12+352], r10
sbb rax, QWORD PTR [rcx+360]
mov r9, QWORD PTR [r12+368]
mov QWORD PTR [r12+360], rax
sbb r9, QWORD PTR [rcx+368]
mov r10, QWORD PTR [r12+376]
mov QWORD PTR [r12+368], r9
sbb r10, QWORD PTR [rcx+376]
mov rax, QWORD PTR [r12+384]
mov QWORD PTR [r12+376], r10
sbb rax, QWORD PTR [rcx+384]
mov r9, QWORD PTR [r12+392]
mov QWORD PTR [r12+384], rax
sbb r9, QWORD PTR [rcx+392]
mov r10, QWORD PTR [r12+400]
mov QWORD PTR [r12+392], r9
sbb r10, QWORD PTR [rcx+400]
mov rax, QWORD PTR [r12+408]
mov QWORD PTR [r12+400], r10
sbb rax, QWORD PTR [rcx+408]
mov r9, QWORD PTR [r12+416]
mov QWORD PTR [r12+408], rax
sbb r9, QWORD PTR [rcx+416]
mov r10, QWORD PTR [r12+424]
mov QWORD PTR [r12+416], r9
sbb r10, QWORD PTR [rcx+424]
mov rax, QWORD PTR [r12+432]
mov QWORD PTR [r12+424], r10
sbb rax, QWORD PTR [rcx+432]
mov r9, QWORD PTR [r12+440]
mov QWORD PTR [r12+432], rax
sbb r9, QWORD PTR [rcx+440]
mov r10, QWORD PTR [r12+448]
mov QWORD PTR [r12+440], r9
sbb r10, QWORD PTR [rcx+448]
mov rax, QWORD PTR [r12+456]
mov QWORD PTR [r12+448], r10
sbb rax, QWORD PTR [rcx+456]
mov r9, QWORD PTR [r12+464]
mov QWORD PTR [r12+456], rax
sbb r9, QWORD PTR [rcx+464]
mov r10, QWORD PTR [r12+472]
mov QWORD PTR [r12+464], r9
sbb r10, QWORD PTR [rcx+472]
mov rax, QWORD PTR [r12+480]
mov QWORD PTR [r12+472], r10
sbb rax, QWORD PTR [rcx+480]
mov r9, QWORD PTR [r12+488]
mov QWORD PTR [r12+480], rax
sbb r9, QWORD PTR [rcx+488]
mov r10, QWORD PTR [r12+496]
mov QWORD PTR [r12+488], r9
sbb r10, QWORD PTR [rcx+496]
mov rax, QWORD PTR [r12+504]
mov QWORD PTR [r12+496], r10
sbb rax, QWORD PTR [rcx+504]
mov QWORD PTR [r12+504], rax
sbb r11, 0
sub rsi, 256
; Add
mov rax, QWORD PTR [rsi]
add rax, QWORD PTR [r12]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r12+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r12+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r12+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r12+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r12+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r12+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r12+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r12+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r12+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r12+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r12+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r12+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r12+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r12+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r12+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r12+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r12+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r12+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r12+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r12+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r12+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r12+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r12+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r12+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r12+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r12+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r12+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r12+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r12+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r12+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r12+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r12+256]
mov rax, QWORD PTR [rsi+264]
mov QWORD PTR [rsi+256], r10
adc rax, QWORD PTR [r12+264]
mov r9, QWORD PTR [rsi+272]
mov QWORD PTR [rsi+264], rax
adc r9, QWORD PTR [r12+272]
mov r10, QWORD PTR [rsi+280]
mov QWORD PTR [rsi+272], r9
adc r10, QWORD PTR [r12+280]
mov rax, QWORD PTR [rsi+288]
mov QWORD PTR [rsi+280], r10
adc rax, QWORD PTR [r12+288]
mov r9, QWORD PTR [rsi+296]
mov QWORD PTR [rsi+288], rax
adc r9, QWORD PTR [r12+296]
mov r10, QWORD PTR [rsi+304]
mov QWORD PTR [rsi+296], r9
adc r10, QWORD PTR [r12+304]
mov rax, QWORD PTR [rsi+312]
mov QWORD PTR [rsi+304], r10
adc rax, QWORD PTR [r12+312]
mov r9, QWORD PTR [rsi+320]
mov QWORD PTR [rsi+312], rax
adc r9, QWORD PTR [r12+320]
mov r10, QWORD PTR [rsi+328]
mov QWORD PTR [rsi+320], r9
adc r10, QWORD PTR [r12+328]
mov rax, QWORD PTR [rsi+336]
mov QWORD PTR [rsi+328], r10
adc rax, QWORD PTR [r12+336]
mov r9, QWORD PTR [rsi+344]
mov QWORD PTR [rsi+336], rax
adc r9, QWORD PTR [r12+344]
mov r10, QWORD PTR [rsi+352]
mov QWORD PTR [rsi+344], r9
adc r10, QWORD PTR [r12+352]
mov rax, QWORD PTR [rsi+360]
mov QWORD PTR [rsi+352], r10
adc rax, QWORD PTR [r12+360]
mov r9, QWORD PTR [rsi+368]
mov QWORD PTR [rsi+360], rax
adc r9, QWORD PTR [r12+368]
mov r10, QWORD PTR [rsi+376]
mov QWORD PTR [rsi+368], r9
adc r10, QWORD PTR [r12+376]
mov rax, QWORD PTR [rsi+384]
mov QWORD PTR [rsi+376], r10
adc rax, QWORD PTR [r12+384]
mov r9, QWORD PTR [rsi+392]
mov QWORD PTR [rsi+384], rax
adc r9, QWORD PTR [r12+392]
mov r10, QWORD PTR [rsi+400]
mov QWORD PTR [rsi+392], r9
adc r10, QWORD PTR [r12+400]
mov rax, QWORD PTR [rsi+408]
mov QWORD PTR [rsi+400], r10
adc rax, QWORD PTR [r12+408]
mov r9, QWORD PTR [rsi+416]
mov QWORD PTR [rsi+408], rax
adc r9, QWORD PTR [r12+416]
mov r10, QWORD PTR [rsi+424]
mov QWORD PTR [rsi+416], r9
adc r10, QWORD PTR [r12+424]
mov rax, QWORD PTR [rsi+432]
mov QWORD PTR [rsi+424], r10
adc rax, QWORD PTR [r12+432]
mov r9, QWORD PTR [rsi+440]
mov QWORD PTR [rsi+432], rax
adc r9, QWORD PTR [r12+440]
mov r10, QWORD PTR [rsi+448]
mov QWORD PTR [rsi+440], r9
adc r10, QWORD PTR [r12+448]
mov rax, QWORD PTR [rsi+456]
mov QWORD PTR [rsi+448], r10
adc rax, QWORD PTR [r12+456]
mov r9, QWORD PTR [rsi+464]
mov QWORD PTR [rsi+456], rax
adc r9, QWORD PTR [r12+464]
mov r10, QWORD PTR [rsi+472]
mov QWORD PTR [rsi+464], r9
adc r10, QWORD PTR [r12+472]
mov rax, QWORD PTR [rsi+480]
mov QWORD PTR [rsi+472], r10
adc rax, QWORD PTR [r12+480]
mov r9, QWORD PTR [rsi+488]
mov QWORD PTR [rsi+480], rax
adc r9, QWORD PTR [r12+488]
mov r10, QWORD PTR [rsi+496]
mov QWORD PTR [rsi+488], r9
adc r10, QWORD PTR [r12+496]
mov rax, QWORD PTR [rsi+504]
mov QWORD PTR [rsi+496], r10
adc rax, QWORD PTR [r12+504]
mov QWORD PTR [rsi+504], rax
adc r11, 0
mov QWORD PTR [rcx+768], r11
add rsi, 256
; Add
mov rax, QWORD PTR [rsi]
xor r11, r11
add rax, QWORD PTR [r13]
mov r9, QWORD PTR [rsi+8]
mov QWORD PTR [rsi], rax
adc r9, QWORD PTR [r13+8]
mov r10, QWORD PTR [rsi+16]
mov QWORD PTR [rsi+8], r9
adc r10, QWORD PTR [r13+16]
mov rax, QWORD PTR [rsi+24]
mov QWORD PTR [rsi+16], r10
adc rax, QWORD PTR [r13+24]
mov r9, QWORD PTR [rsi+32]
mov QWORD PTR [rsi+24], rax
adc r9, QWORD PTR [r13+32]
mov r10, QWORD PTR [rsi+40]
mov QWORD PTR [rsi+32], r9
adc r10, QWORD PTR [r13+40]
mov rax, QWORD PTR [rsi+48]
mov QWORD PTR [rsi+40], r10
adc rax, QWORD PTR [r13+48]
mov r9, QWORD PTR [rsi+56]
mov QWORD PTR [rsi+48], rax
adc r9, QWORD PTR [r13+56]
mov r10, QWORD PTR [rsi+64]
mov QWORD PTR [rsi+56], r9
adc r10, QWORD PTR [r13+64]
mov rax, QWORD PTR [rsi+72]
mov QWORD PTR [rsi+64], r10
adc rax, QWORD PTR [r13+72]
mov r9, QWORD PTR [rsi+80]
mov QWORD PTR [rsi+72], rax
adc r9, QWORD PTR [r13+80]
mov r10, QWORD PTR [rsi+88]
mov QWORD PTR [rsi+80], r9
adc r10, QWORD PTR [r13+88]
mov rax, QWORD PTR [rsi+96]
mov QWORD PTR [rsi+88], r10
adc rax, QWORD PTR [r13+96]
mov r9, QWORD PTR [rsi+104]
mov QWORD PTR [rsi+96], rax
adc r9, QWORD PTR [r13+104]
mov r10, QWORD PTR [rsi+112]
mov QWORD PTR [rsi+104], r9
adc r10, QWORD PTR [r13+112]
mov rax, QWORD PTR [rsi+120]
mov QWORD PTR [rsi+112], r10
adc rax, QWORD PTR [r13+120]
mov r9, QWORD PTR [rsi+128]
mov QWORD PTR [rsi+120], rax
adc r9, QWORD PTR [r13+128]
mov r10, QWORD PTR [rsi+136]
mov QWORD PTR [rsi+128], r9
adc r10, QWORD PTR [r13+136]
mov rax, QWORD PTR [rsi+144]
mov QWORD PTR [rsi+136], r10
adc rax, QWORD PTR [r13+144]
mov r9, QWORD PTR [rsi+152]
mov QWORD PTR [rsi+144], rax
adc r9, QWORD PTR [r13+152]
mov r10, QWORD PTR [rsi+160]
mov QWORD PTR [rsi+152], r9
adc r10, QWORD PTR [r13+160]
mov rax, QWORD PTR [rsi+168]
mov QWORD PTR [rsi+160], r10
adc rax, QWORD PTR [r13+168]
mov r9, QWORD PTR [rsi+176]
mov QWORD PTR [rsi+168], rax
adc r9, QWORD PTR [r13+176]
mov r10, QWORD PTR [rsi+184]
mov QWORD PTR [rsi+176], r9
adc r10, QWORD PTR [r13+184]
mov rax, QWORD PTR [rsi+192]
mov QWORD PTR [rsi+184], r10
adc rax, QWORD PTR [r13+192]
mov r9, QWORD PTR [rsi+200]
mov QWORD PTR [rsi+192], rax
adc r9, QWORD PTR [r13+200]
mov r10, QWORD PTR [rsi+208]
mov QWORD PTR [rsi+200], r9
adc r10, QWORD PTR [r13+208]
mov rax, QWORD PTR [rsi+216]
mov QWORD PTR [rsi+208], r10
adc rax, QWORD PTR [r13+216]
mov r9, QWORD PTR [rsi+224]
mov QWORD PTR [rsi+216], rax
adc r9, QWORD PTR [r13+224]
mov r10, QWORD PTR [rsi+232]
mov QWORD PTR [rsi+224], r9
adc r10, QWORD PTR [r13+232]
mov rax, QWORD PTR [rsi+240]
mov QWORD PTR [rsi+232], r10
adc rax, QWORD PTR [r13+240]
mov r9, QWORD PTR [rsi+248]
mov QWORD PTR [rsi+240], rax
adc r9, QWORD PTR [r13+248]
mov r10, QWORD PTR [rsi+256]
mov QWORD PTR [rsi+248], r9
adc r10, QWORD PTR [r13+256]
mov QWORD PTR [rsi+256], r10
adc r11, 0
; Add to zero
mov rax, QWORD PTR [r13+264]
adc rax, 0
mov r9, QWORD PTR [r13+272]
mov QWORD PTR [rsi+264], rax
adc r9, 0
mov r10, QWORD PTR [r13+280]
mov QWORD PTR [rsi+272], r9
adc r10, 0
mov rax, QWORD PTR [r13+288]
mov QWORD PTR [rsi+280], r10
adc rax, 0
mov r9, QWORD PTR [r13+296]
mov QWORD PTR [rsi+288], rax
adc r9, 0
mov r10, QWORD PTR [r13+304]
mov QWORD PTR [rsi+296], r9
adc r10, 0
mov rax, QWORD PTR [r13+312]
mov QWORD PTR [rsi+304], r10
adc rax, 0
mov r9, QWORD PTR [r13+320]
mov QWORD PTR [rsi+312], rax
adc r9, 0
mov r10, QWORD PTR [r13+328]
mov QWORD PTR [rsi+320], r9
adc r10, 0
mov rax, QWORD PTR [r13+336]
mov QWORD PTR [rsi+328], r10
adc rax, 0
mov r9, QWORD PTR [r13+344]
mov QWORD PTR [rsi+336], rax
adc r9, 0
mov r10, QWORD PTR [r13+352]
mov QWORD PTR [rsi+344], r9
adc r10, 0
mov rax, QWORD PTR [r13+360]
mov QWORD PTR [rsi+352], r10
adc rax, 0
mov r9, QWORD PTR [r13+368]
mov QWORD PTR [rsi+360], rax
adc r9, 0
mov r10, QWORD PTR [r13+376]
mov QWORD PTR [rsi+368], r9
adc r10, 0
mov rax, QWORD PTR [r13+384]
mov QWORD PTR [rsi+376], r10
adc rax, 0
mov r9, QWORD PTR [r13+392]
mov QWORD PTR [rsi+384], rax
adc r9, 0
mov r10, QWORD PTR [r13+400]
mov QWORD PTR [rsi+392], r9
adc r10, 0
mov rax, QWORD PTR [r13+408]
mov QWORD PTR [rsi+400], r10
adc rax, 0
mov r9, QWORD PTR [r13+416]
mov QWORD PTR [rsi+408], rax
adc r9, 0
mov r10, QWORD PTR [r13+424]
mov QWORD PTR [rsi+416], r9
adc r10, 0
mov rax, QWORD PTR [r13+432]
mov QWORD PTR [rsi+424], r10
adc rax, 0
mov r9, QWORD PTR [r13+440]
mov QWORD PTR [rsi+432], rax
adc r9, 0
mov r10, QWORD PTR [r13+448]
mov QWORD PTR [rsi+440], r9
adc r10, 0
mov rax, QWORD PTR [r13+456]
mov QWORD PTR [rsi+448], r10
adc rax, 0
mov r9, QWORD PTR [r13+464]
mov QWORD PTR [rsi+456], rax
adc r9, 0
mov r10, QWORD PTR [r13+472]
mov QWORD PTR [rsi+464], r9
adc r10, 0
mov rax, QWORD PTR [r13+480]
mov QWORD PTR [rsi+472], r10
adc rax, 0
mov r9, QWORD PTR [r13+488]
mov QWORD PTR [rsi+480], rax
adc r9, 0
mov r10, QWORD PTR [r13+496]
mov QWORD PTR [rsi+488], r9
adc r10, 0
mov rax, QWORD PTR [r13+504]
mov QWORD PTR [rsi+496], r10
adc rax, 0
mov QWORD PTR [rsi+504], rax
add rsp, 1576
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mul_avx2_64 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sqr_avx2_64 PROC
push r12
sub rsp, 1304
mov QWORD PTR [rsp+1280], rcx
mov QWORD PTR [rsp+1288], rdx
lea r10, QWORD PTR [rsp+1024]
lea r11, QWORD PTR [rdx+256]
; Add
mov rax, QWORD PTR [rdx]
xor r9, r9
add rax, QWORD PTR [r11]
mov r8, QWORD PTR [rdx+8]
mov QWORD PTR [r10], rax
adc r8, QWORD PTR [r11+8]
mov rax, QWORD PTR [rdx+16]
mov QWORD PTR [r10+8], r8
adc rax, QWORD PTR [r11+16]
mov r8, QWORD PTR [rdx+24]
mov QWORD PTR [r10+16], rax
adc r8, QWORD PTR [r11+24]
mov rax, QWORD PTR [rdx+32]
mov QWORD PTR [r10+24], r8
adc rax, QWORD PTR [r11+32]
mov r8, QWORD PTR [rdx+40]
mov QWORD PTR [r10+32], rax
adc r8, QWORD PTR [r11+40]
mov rax, QWORD PTR [rdx+48]
mov QWORD PTR [r10+40], r8
adc rax, QWORD PTR [r11+48]
mov r8, QWORD PTR [rdx+56]
mov QWORD PTR [r10+48], rax
adc r8, QWORD PTR [r11+56]
mov rax, QWORD PTR [rdx+64]
mov QWORD PTR [r10+56], r8
adc rax, QWORD PTR [r11+64]
mov r8, QWORD PTR [rdx+72]
mov QWORD PTR [r10+64], rax
adc r8, QWORD PTR [r11+72]
mov rax, QWORD PTR [rdx+80]
mov QWORD PTR [r10+72], r8
adc rax, QWORD PTR [r11+80]
mov r8, QWORD PTR [rdx+88]
mov QWORD PTR [r10+80], rax
adc r8, QWORD PTR [r11+88]
mov rax, QWORD PTR [rdx+96]
mov QWORD PTR [r10+88], r8
adc rax, QWORD PTR [r11+96]
mov r8, QWORD PTR [rdx+104]
mov QWORD PTR [r10+96], rax
adc r8, QWORD PTR [r11+104]
mov rax, QWORD PTR [rdx+112]
mov QWORD PTR [r10+104], r8
adc rax, QWORD PTR [r11+112]
mov r8, QWORD PTR [rdx+120]
mov QWORD PTR [r10+112], rax
adc r8, QWORD PTR [r11+120]
mov rax, QWORD PTR [rdx+128]
mov QWORD PTR [r10+120], r8
adc rax, QWORD PTR [r11+128]
mov r8, QWORD PTR [rdx+136]
mov QWORD PTR [r10+128], rax
adc r8, QWORD PTR [r11+136]
mov rax, QWORD PTR [rdx+144]
mov QWORD PTR [r10+136], r8
adc rax, QWORD PTR [r11+144]
mov r8, QWORD PTR [rdx+152]
mov QWORD PTR [r10+144], rax
adc r8, QWORD PTR [r11+152]
mov rax, QWORD PTR [rdx+160]
mov QWORD PTR [r10+152], r8
adc rax, QWORD PTR [r11+160]
mov r8, QWORD PTR [rdx+168]
mov QWORD PTR [r10+160], rax
adc r8, QWORD PTR [r11+168]
mov rax, QWORD PTR [rdx+176]
mov QWORD PTR [r10+168], r8
adc rax, QWORD PTR [r11+176]
mov r8, QWORD PTR [rdx+184]
mov QWORD PTR [r10+176], rax
adc r8, QWORD PTR [r11+184]
mov rax, QWORD PTR [rdx+192]
mov QWORD PTR [r10+184], r8
adc rax, QWORD PTR [r11+192]
mov r8, QWORD PTR [rdx+200]
mov QWORD PTR [r10+192], rax
adc r8, QWORD PTR [r11+200]
mov rax, QWORD PTR [rdx+208]
mov QWORD PTR [r10+200], r8
adc rax, QWORD PTR [r11+208]
mov r8, QWORD PTR [rdx+216]
mov QWORD PTR [r10+208], rax
adc r8, QWORD PTR [r11+216]
mov rax, QWORD PTR [rdx+224]
mov QWORD PTR [r10+216], r8
adc rax, QWORD PTR [r11+224]
mov r8, QWORD PTR [rdx+232]
mov QWORD PTR [r10+224], rax
adc r8, QWORD PTR [r11+232]
mov rax, QWORD PTR [rdx+240]
mov QWORD PTR [r10+232], r8
adc rax, QWORD PTR [r11+240]
mov r8, QWORD PTR [rdx+248]
mov QWORD PTR [r10+240], rax
adc r8, QWORD PTR [r11+248]
mov QWORD PTR [r10+248], r8
adc r9, 0
mov QWORD PTR [rsp+1296], r9
mov rdx, r10
mov rcx, rsp
call sp_2048_sqr_avx2_32
mov rdx, QWORD PTR [rsp+1288]
lea rcx, QWORD PTR [rsp+512]
add rdx, 256
call sp_2048_sqr_avx2_32
mov rdx, QWORD PTR [rsp+1288]
mov rcx, QWORD PTR [rsp+1280]
call sp_2048_sqr_avx2_32
IFDEF _WIN64
mov rdx, QWORD PTR [rsp+1288]
mov rcx, QWORD PTR [rsp+1280]
ENDIF
mov r12, QWORD PTR [rsp+1296]
lea r10, QWORD PTR [rsp+1024]
mov r9, r12
neg r12
mov rax, QWORD PTR [r10]
pext rax, rax, r12
add rax, rax
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [rcx+512], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [rcx+520], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [rcx+528], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [rcx+536], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [rcx+544], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [rcx+552], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [rcx+560], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [rcx+568], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [rcx+576], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [rcx+584], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [rcx+592], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [rcx+600], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [rcx+608], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [rcx+616], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [rcx+624], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [rcx+632], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [rcx+640], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [rcx+648], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [rcx+656], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [rcx+664], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [rcx+672], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [rcx+680], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [rcx+688], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [rcx+696], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [rcx+704], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [rcx+712], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [rcx+720], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [rcx+728], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [rcx+736], rax
pext r8, r8, r12
adc r8, r8
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [rcx+744], r8
pext rax, rax, r12
adc rax, rax
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [rcx+752], rax
pext r8, r8, r12
adc r8, r8
mov QWORD PTR [rcx+760], r8
adc r9, 0
lea rdx, QWORD PTR [rsp+512]
mov r10, rsp
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rdx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rdx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rdx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rdx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rdx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rdx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rdx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rdx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rdx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rdx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rdx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rdx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rdx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rdx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rdx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rdx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rdx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rdx+376]
mov rax, QWORD PTR [r10+384]
mov QWORD PTR [r10+376], r8
sbb rax, QWORD PTR [rdx+384]
mov r8, QWORD PTR [r10+392]
mov QWORD PTR [r10+384], rax
sbb r8, QWORD PTR [rdx+392]
mov rax, QWORD PTR [r10+400]
mov QWORD PTR [r10+392], r8
sbb rax, QWORD PTR [rdx+400]
mov r8, QWORD PTR [r10+408]
mov QWORD PTR [r10+400], rax
sbb r8, QWORD PTR [rdx+408]
mov rax, QWORD PTR [r10+416]
mov QWORD PTR [r10+408], r8
sbb rax, QWORD PTR [rdx+416]
mov r8, QWORD PTR [r10+424]
mov QWORD PTR [r10+416], rax
sbb r8, QWORD PTR [rdx+424]
mov rax, QWORD PTR [r10+432]
mov QWORD PTR [r10+424], r8
sbb rax, QWORD PTR [rdx+432]
mov r8, QWORD PTR [r10+440]
mov QWORD PTR [r10+432], rax
sbb r8, QWORD PTR [rdx+440]
mov rax, QWORD PTR [r10+448]
mov QWORD PTR [r10+440], r8
sbb rax, QWORD PTR [rdx+448]
mov r8, QWORD PTR [r10+456]
mov QWORD PTR [r10+448], rax
sbb r8, QWORD PTR [rdx+456]
mov rax, QWORD PTR [r10+464]
mov QWORD PTR [r10+456], r8
sbb rax, QWORD PTR [rdx+464]
mov r8, QWORD PTR [r10+472]
mov QWORD PTR [r10+464], rax
sbb r8, QWORD PTR [rdx+472]
mov rax, QWORD PTR [r10+480]
mov QWORD PTR [r10+472], r8
sbb rax, QWORD PTR [rdx+480]
mov r8, QWORD PTR [r10+488]
mov QWORD PTR [r10+480], rax
sbb r8, QWORD PTR [rdx+488]
mov rax, QWORD PTR [r10+496]
mov QWORD PTR [r10+488], r8
sbb rax, QWORD PTR [rdx+496]
mov r8, QWORD PTR [r10+504]
mov QWORD PTR [r10+496], rax
sbb r8, QWORD PTR [rdx+504]
mov QWORD PTR [r10+504], r8
sbb r9, 0
mov rax, QWORD PTR [r10]
sub rax, QWORD PTR [rcx]
mov r8, QWORD PTR [r10+8]
mov QWORD PTR [r10], rax
sbb r8, QWORD PTR [rcx+8]
mov rax, QWORD PTR [r10+16]
mov QWORD PTR [r10+8], r8
sbb rax, QWORD PTR [rcx+16]
mov r8, QWORD PTR [r10+24]
mov QWORD PTR [r10+16], rax
sbb r8, QWORD PTR [rcx+24]
mov rax, QWORD PTR [r10+32]
mov QWORD PTR [r10+24], r8
sbb rax, QWORD PTR [rcx+32]
mov r8, QWORD PTR [r10+40]
mov QWORD PTR [r10+32], rax
sbb r8, QWORD PTR [rcx+40]
mov rax, QWORD PTR [r10+48]
mov QWORD PTR [r10+40], r8
sbb rax, QWORD PTR [rcx+48]
mov r8, QWORD PTR [r10+56]
mov QWORD PTR [r10+48], rax
sbb r8, QWORD PTR [rcx+56]
mov rax, QWORD PTR [r10+64]
mov QWORD PTR [r10+56], r8
sbb rax, QWORD PTR [rcx+64]
mov r8, QWORD PTR [r10+72]
mov QWORD PTR [r10+64], rax
sbb r8, QWORD PTR [rcx+72]
mov rax, QWORD PTR [r10+80]
mov QWORD PTR [r10+72], r8
sbb rax, QWORD PTR [rcx+80]
mov r8, QWORD PTR [r10+88]
mov QWORD PTR [r10+80], rax
sbb r8, QWORD PTR [rcx+88]
mov rax, QWORD PTR [r10+96]
mov QWORD PTR [r10+88], r8
sbb rax, QWORD PTR [rcx+96]
mov r8, QWORD PTR [r10+104]
mov QWORD PTR [r10+96], rax
sbb r8, QWORD PTR [rcx+104]
mov rax, QWORD PTR [r10+112]
mov QWORD PTR [r10+104], r8
sbb rax, QWORD PTR [rcx+112]
mov r8, QWORD PTR [r10+120]
mov QWORD PTR [r10+112], rax
sbb r8, QWORD PTR [rcx+120]
mov rax, QWORD PTR [r10+128]
mov QWORD PTR [r10+120], r8
sbb rax, QWORD PTR [rcx+128]
mov r8, QWORD PTR [r10+136]
mov QWORD PTR [r10+128], rax
sbb r8, QWORD PTR [rcx+136]
mov rax, QWORD PTR [r10+144]
mov QWORD PTR [r10+136], r8
sbb rax, QWORD PTR [rcx+144]
mov r8, QWORD PTR [r10+152]
mov QWORD PTR [r10+144], rax
sbb r8, QWORD PTR [rcx+152]
mov rax, QWORD PTR [r10+160]
mov QWORD PTR [r10+152], r8
sbb rax, QWORD PTR [rcx+160]
mov r8, QWORD PTR [r10+168]
mov QWORD PTR [r10+160], rax
sbb r8, QWORD PTR [rcx+168]
mov rax, QWORD PTR [r10+176]
mov QWORD PTR [r10+168], r8
sbb rax, QWORD PTR [rcx+176]
mov r8, QWORD PTR [r10+184]
mov QWORD PTR [r10+176], rax
sbb r8, QWORD PTR [rcx+184]
mov rax, QWORD PTR [r10+192]
mov QWORD PTR [r10+184], r8
sbb rax, QWORD PTR [rcx+192]
mov r8, QWORD PTR [r10+200]
mov QWORD PTR [r10+192], rax
sbb r8, QWORD PTR [rcx+200]
mov rax, QWORD PTR [r10+208]
mov QWORD PTR [r10+200], r8
sbb rax, QWORD PTR [rcx+208]
mov r8, QWORD PTR [r10+216]
mov QWORD PTR [r10+208], rax
sbb r8, QWORD PTR [rcx+216]
mov rax, QWORD PTR [r10+224]
mov QWORD PTR [r10+216], r8
sbb rax, QWORD PTR [rcx+224]
mov r8, QWORD PTR [r10+232]
mov QWORD PTR [r10+224], rax
sbb r8, QWORD PTR [rcx+232]
mov rax, QWORD PTR [r10+240]
mov QWORD PTR [r10+232], r8
sbb rax, QWORD PTR [rcx+240]
mov r8, QWORD PTR [r10+248]
mov QWORD PTR [r10+240], rax
sbb r8, QWORD PTR [rcx+248]
mov rax, QWORD PTR [r10+256]
mov QWORD PTR [r10+248], r8
sbb rax, QWORD PTR [rcx+256]
mov r8, QWORD PTR [r10+264]
mov QWORD PTR [r10+256], rax
sbb r8, QWORD PTR [rcx+264]
mov rax, QWORD PTR [r10+272]
mov QWORD PTR [r10+264], r8
sbb rax, QWORD PTR [rcx+272]
mov r8, QWORD PTR [r10+280]
mov QWORD PTR [r10+272], rax
sbb r8, QWORD PTR [rcx+280]
mov rax, QWORD PTR [r10+288]
mov QWORD PTR [r10+280], r8
sbb rax, QWORD PTR [rcx+288]
mov r8, QWORD PTR [r10+296]
mov QWORD PTR [r10+288], rax
sbb r8, QWORD PTR [rcx+296]
mov rax, QWORD PTR [r10+304]
mov QWORD PTR [r10+296], r8
sbb rax, QWORD PTR [rcx+304]
mov r8, QWORD PTR [r10+312]
mov QWORD PTR [r10+304], rax
sbb r8, QWORD PTR [rcx+312]
mov rax, QWORD PTR [r10+320]
mov QWORD PTR [r10+312], r8
sbb rax, QWORD PTR [rcx+320]
mov r8, QWORD PTR [r10+328]
mov QWORD PTR [r10+320], rax
sbb r8, QWORD PTR [rcx+328]
mov rax, QWORD PTR [r10+336]
mov QWORD PTR [r10+328], r8
sbb rax, QWORD PTR [rcx+336]
mov r8, QWORD PTR [r10+344]
mov QWORD PTR [r10+336], rax
sbb r8, QWORD PTR [rcx+344]
mov rax, QWORD PTR [r10+352]
mov QWORD PTR [r10+344], r8
sbb rax, QWORD PTR [rcx+352]
mov r8, QWORD PTR [r10+360]
mov QWORD PTR [r10+352], rax
sbb r8, QWORD PTR [rcx+360]
mov rax, QWORD PTR [r10+368]
mov QWORD PTR [r10+360], r8
sbb rax, QWORD PTR [rcx+368]
mov r8, QWORD PTR [r10+376]
mov QWORD PTR [r10+368], rax
sbb r8, QWORD PTR [rcx+376]
mov rax, QWORD PTR [r10+384]
mov QWORD PTR [r10+376], r8
sbb rax, QWORD PTR [rcx+384]
mov r8, QWORD PTR [r10+392]
mov QWORD PTR [r10+384], rax
sbb r8, QWORD PTR [rcx+392]
mov rax, QWORD PTR [r10+400]
mov QWORD PTR [r10+392], r8
sbb rax, QWORD PTR [rcx+400]
mov r8, QWORD PTR [r10+408]
mov QWORD PTR [r10+400], rax
sbb r8, QWORD PTR [rcx+408]
mov rax, QWORD PTR [r10+416]
mov QWORD PTR [r10+408], r8
sbb rax, QWORD PTR [rcx+416]
mov r8, QWORD PTR [r10+424]
mov QWORD PTR [r10+416], rax
sbb r8, QWORD PTR [rcx+424]
mov rax, QWORD PTR [r10+432]
mov QWORD PTR [r10+424], r8
sbb rax, QWORD PTR [rcx+432]
mov r8, QWORD PTR [r10+440]
mov QWORD PTR [r10+432], rax
sbb r8, QWORD PTR [rcx+440]
mov rax, QWORD PTR [r10+448]
mov QWORD PTR [r10+440], r8
sbb rax, QWORD PTR [rcx+448]
mov r8, QWORD PTR [r10+456]
mov QWORD PTR [r10+448], rax
sbb r8, QWORD PTR [rcx+456]
mov rax, QWORD PTR [r10+464]
mov QWORD PTR [r10+456], r8
sbb rax, QWORD PTR [rcx+464]
mov r8, QWORD PTR [r10+472]
mov QWORD PTR [r10+464], rax
sbb r8, QWORD PTR [rcx+472]
mov rax, QWORD PTR [r10+480]
mov QWORD PTR [r10+472], r8
sbb rax, QWORD PTR [rcx+480]
mov r8, QWORD PTR [r10+488]
mov QWORD PTR [r10+480], rax
sbb r8, QWORD PTR [rcx+488]
mov rax, QWORD PTR [r10+496]
mov QWORD PTR [r10+488], r8
sbb rax, QWORD PTR [rcx+496]
mov r8, QWORD PTR [r10+504]
mov QWORD PTR [r10+496], rax
sbb r8, QWORD PTR [rcx+504]
mov QWORD PTR [r10+504], r8
sbb r9, 0
; Add in place
mov rax, QWORD PTR [rcx+256]
add rax, QWORD PTR [r10]
mov r8, QWORD PTR [rcx+264]
mov QWORD PTR [rcx+256], rax
adc r8, QWORD PTR [r10+8]
mov rax, QWORD PTR [rcx+272]
mov QWORD PTR [rcx+264], r8
adc rax, QWORD PTR [r10+16]
mov r8, QWORD PTR [rcx+280]
mov QWORD PTR [rcx+272], rax
adc r8, QWORD PTR [r10+24]
mov rax, QWORD PTR [rcx+288]
mov QWORD PTR [rcx+280], r8
adc rax, QWORD PTR [r10+32]
mov r8, QWORD PTR [rcx+296]
mov QWORD PTR [rcx+288], rax
adc r8, QWORD PTR [r10+40]
mov rax, QWORD PTR [rcx+304]
mov QWORD PTR [rcx+296], r8
adc rax, QWORD PTR [r10+48]
mov r8, QWORD PTR [rcx+312]
mov QWORD PTR [rcx+304], rax
adc r8, QWORD PTR [r10+56]
mov rax, QWORD PTR [rcx+320]
mov QWORD PTR [rcx+312], r8
adc rax, QWORD PTR [r10+64]
mov r8, QWORD PTR [rcx+328]
mov QWORD PTR [rcx+320], rax
adc r8, QWORD PTR [r10+72]
mov rax, QWORD PTR [rcx+336]
mov QWORD PTR [rcx+328], r8
adc rax, QWORD PTR [r10+80]
mov r8, QWORD PTR [rcx+344]
mov QWORD PTR [rcx+336], rax
adc r8, QWORD PTR [r10+88]
mov rax, QWORD PTR [rcx+352]
mov QWORD PTR [rcx+344], r8
adc rax, QWORD PTR [r10+96]
mov r8, QWORD PTR [rcx+360]
mov QWORD PTR [rcx+352], rax
adc r8, QWORD PTR [r10+104]
mov rax, QWORD PTR [rcx+368]
mov QWORD PTR [rcx+360], r8
adc rax, QWORD PTR [r10+112]
mov r8, QWORD PTR [rcx+376]
mov QWORD PTR [rcx+368], rax
adc r8, QWORD PTR [r10+120]
mov rax, QWORD PTR [rcx+384]
mov QWORD PTR [rcx+376], r8
adc rax, QWORD PTR [r10+128]
mov r8, QWORD PTR [rcx+392]
mov QWORD PTR [rcx+384], rax
adc r8, QWORD PTR [r10+136]
mov rax, QWORD PTR [rcx+400]
mov QWORD PTR [rcx+392], r8
adc rax, QWORD PTR [r10+144]
mov r8, QWORD PTR [rcx+408]
mov QWORD PTR [rcx+400], rax
adc r8, QWORD PTR [r10+152]
mov rax, QWORD PTR [rcx+416]
mov QWORD PTR [rcx+408], r8
adc rax, QWORD PTR [r10+160]
mov r8, QWORD PTR [rcx+424]
mov QWORD PTR [rcx+416], rax
adc r8, QWORD PTR [r10+168]
mov rax, QWORD PTR [rcx+432]
mov QWORD PTR [rcx+424], r8
adc rax, QWORD PTR [r10+176]
mov r8, QWORD PTR [rcx+440]
mov QWORD PTR [rcx+432], rax
adc r8, QWORD PTR [r10+184]
mov rax, QWORD PTR [rcx+448]
mov QWORD PTR [rcx+440], r8
adc rax, QWORD PTR [r10+192]
mov r8, QWORD PTR [rcx+456]
mov QWORD PTR [rcx+448], rax
adc r8, QWORD PTR [r10+200]
mov rax, QWORD PTR [rcx+464]
mov QWORD PTR [rcx+456], r8
adc rax, QWORD PTR [r10+208]
mov r8, QWORD PTR [rcx+472]
mov QWORD PTR [rcx+464], rax
adc r8, QWORD PTR [r10+216]
mov rax, QWORD PTR [rcx+480]
mov QWORD PTR [rcx+472], r8
adc rax, QWORD PTR [r10+224]
mov r8, QWORD PTR [rcx+488]
mov QWORD PTR [rcx+480], rax
adc r8, QWORD PTR [r10+232]
mov rax, QWORD PTR [rcx+496]
mov QWORD PTR [rcx+488], r8
adc rax, QWORD PTR [r10+240]
mov r8, QWORD PTR [rcx+504]
mov QWORD PTR [rcx+496], rax
adc r8, QWORD PTR [r10+248]
mov rax, QWORD PTR [rcx+512]
mov QWORD PTR [rcx+504], r8
adc rax, QWORD PTR [r10+256]
mov r8, QWORD PTR [rcx+520]
mov QWORD PTR [rcx+512], rax
adc r8, QWORD PTR [r10+264]
mov rax, QWORD PTR [rcx+528]
mov QWORD PTR [rcx+520], r8
adc rax, QWORD PTR [r10+272]
mov r8, QWORD PTR [rcx+536]
mov QWORD PTR [rcx+528], rax
adc r8, QWORD PTR [r10+280]
mov rax, QWORD PTR [rcx+544]
mov QWORD PTR [rcx+536], r8
adc rax, QWORD PTR [r10+288]
mov r8, QWORD PTR [rcx+552]
mov QWORD PTR [rcx+544], rax
adc r8, QWORD PTR [r10+296]
mov rax, QWORD PTR [rcx+560]
mov QWORD PTR [rcx+552], r8
adc rax, QWORD PTR [r10+304]
mov r8, QWORD PTR [rcx+568]
mov QWORD PTR [rcx+560], rax
adc r8, QWORD PTR [r10+312]
mov rax, QWORD PTR [rcx+576]
mov QWORD PTR [rcx+568], r8
adc rax, QWORD PTR [r10+320]
mov r8, QWORD PTR [rcx+584]
mov QWORD PTR [rcx+576], rax
adc r8, QWORD PTR [r10+328]
mov rax, QWORD PTR [rcx+592]
mov QWORD PTR [rcx+584], r8
adc rax, QWORD PTR [r10+336]
mov r8, QWORD PTR [rcx+600]
mov QWORD PTR [rcx+592], rax
adc r8, QWORD PTR [r10+344]
mov rax, QWORD PTR [rcx+608]
mov QWORD PTR [rcx+600], r8
adc rax, QWORD PTR [r10+352]
mov r8, QWORD PTR [rcx+616]
mov QWORD PTR [rcx+608], rax
adc r8, QWORD PTR [r10+360]
mov rax, QWORD PTR [rcx+624]
mov QWORD PTR [rcx+616], r8
adc rax, QWORD PTR [r10+368]
mov r8, QWORD PTR [rcx+632]
mov QWORD PTR [rcx+624], rax
adc r8, QWORD PTR [r10+376]
mov rax, QWORD PTR [rcx+640]
mov QWORD PTR [rcx+632], r8
adc rax, QWORD PTR [r10+384]
mov r8, QWORD PTR [rcx+648]
mov QWORD PTR [rcx+640], rax
adc r8, QWORD PTR [r10+392]
mov rax, QWORD PTR [rcx+656]
mov QWORD PTR [rcx+648], r8
adc rax, QWORD PTR [r10+400]
mov r8, QWORD PTR [rcx+664]
mov QWORD PTR [rcx+656], rax
adc r8, QWORD PTR [r10+408]
mov rax, QWORD PTR [rcx+672]
mov QWORD PTR [rcx+664], r8
adc rax, QWORD PTR [r10+416]
mov r8, QWORD PTR [rcx+680]
mov QWORD PTR [rcx+672], rax
adc r8, QWORD PTR [r10+424]
mov rax, QWORD PTR [rcx+688]
mov QWORD PTR [rcx+680], r8
adc rax, QWORD PTR [r10+432]
mov r8, QWORD PTR [rcx+696]
mov QWORD PTR [rcx+688], rax
adc r8, QWORD PTR [r10+440]
mov rax, QWORD PTR [rcx+704]
mov QWORD PTR [rcx+696], r8
adc rax, QWORD PTR [r10+448]
mov r8, QWORD PTR [rcx+712]
mov QWORD PTR [rcx+704], rax
adc r8, QWORD PTR [r10+456]
mov rax, QWORD PTR [rcx+720]
mov QWORD PTR [rcx+712], r8
adc rax, QWORD PTR [r10+464]
mov r8, QWORD PTR [rcx+728]
mov QWORD PTR [rcx+720], rax
adc r8, QWORD PTR [r10+472]
mov rax, QWORD PTR [rcx+736]
mov QWORD PTR [rcx+728], r8
adc rax, QWORD PTR [r10+480]
mov r8, QWORD PTR [rcx+744]
mov QWORD PTR [rcx+736], rax
adc r8, QWORD PTR [r10+488]
mov rax, QWORD PTR [rcx+752]
mov QWORD PTR [rcx+744], r8
adc rax, QWORD PTR [r10+496]
mov r8, QWORD PTR [rcx+760]
mov QWORD PTR [rcx+752], rax
adc r8, QWORD PTR [r10+504]
mov QWORD PTR [rcx+760], r8
adc r9, 0
mov QWORD PTR [rcx+768], r9
; Add in place
mov rax, QWORD PTR [rcx+512]
xor r9, r9
add rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rcx+520]
mov QWORD PTR [rcx+512], rax
adc r8, QWORD PTR [rdx+8]
mov rax, QWORD PTR [rcx+528]
mov QWORD PTR [rcx+520], r8
adc rax, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rcx+536]
mov QWORD PTR [rcx+528], rax
adc r8, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rcx+544]
mov QWORD PTR [rcx+536], r8
adc rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rcx+552]
mov QWORD PTR [rcx+544], rax
adc r8, QWORD PTR [rdx+40]
mov rax, QWORD PTR [rcx+560]
mov QWORD PTR [rcx+552], r8
adc rax, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rcx+568]
mov QWORD PTR [rcx+560], rax
adc r8, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rcx+576]
mov QWORD PTR [rcx+568], r8
adc rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rcx+584]
mov QWORD PTR [rcx+576], rax
adc r8, QWORD PTR [rdx+72]
mov rax, QWORD PTR [rcx+592]
mov QWORD PTR [rcx+584], r8
adc rax, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rcx+600]
mov QWORD PTR [rcx+592], rax
adc r8, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rcx+608]
mov QWORD PTR [rcx+600], r8
adc rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rcx+616]
mov QWORD PTR [rcx+608], rax
adc r8, QWORD PTR [rdx+104]
mov rax, QWORD PTR [rcx+624]
mov QWORD PTR [rcx+616], r8
adc rax, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rcx+632]
mov QWORD PTR [rcx+624], rax
adc r8, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rcx+640]
mov QWORD PTR [rcx+632], r8
adc rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rcx+648]
mov QWORD PTR [rcx+640], rax
adc r8, QWORD PTR [rdx+136]
mov rax, QWORD PTR [rcx+656]
mov QWORD PTR [rcx+648], r8
adc rax, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rcx+664]
mov QWORD PTR [rcx+656], rax
adc r8, QWORD PTR [rdx+152]
mov rax, QWORD PTR [rcx+672]
mov QWORD PTR [rcx+664], r8
adc rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rcx+680]
mov QWORD PTR [rcx+672], rax
adc r8, QWORD PTR [rdx+168]
mov rax, QWORD PTR [rcx+688]
mov QWORD PTR [rcx+680], r8
adc rax, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rcx+696]
mov QWORD PTR [rcx+688], rax
adc r8, QWORD PTR [rdx+184]
mov rax, QWORD PTR [rcx+704]
mov QWORD PTR [rcx+696], r8
adc rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rcx+712]
mov QWORD PTR [rcx+704], rax
adc r8, QWORD PTR [rdx+200]
mov rax, QWORD PTR [rcx+720]
mov QWORD PTR [rcx+712], r8
adc rax, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rcx+728]
mov QWORD PTR [rcx+720], rax
adc r8, QWORD PTR [rdx+216]
mov rax, QWORD PTR [rcx+736]
mov QWORD PTR [rcx+728], r8
adc rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rcx+744]
mov QWORD PTR [rcx+736], rax
adc r8, QWORD PTR [rdx+232]
mov rax, QWORD PTR [rcx+752]
mov QWORD PTR [rcx+744], r8
adc rax, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rcx+760]
mov QWORD PTR [rcx+752], rax
adc r8, QWORD PTR [rdx+248]
mov rax, QWORD PTR [rcx+768]
mov QWORD PTR [rcx+760], r8
adc rax, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+768], rax
adc r9, 0
; Add to zero
mov rax, QWORD PTR [rdx+264]
adc rax, 0
mov r8, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+776], rax
adc r8, 0
mov rax, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+784], r8
adc rax, 0
mov r8, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+792], rax
adc r8, 0
mov rax, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+800], r8
adc rax, 0
mov r8, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+808], rax
adc r8, 0
mov rax, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+816], r8
adc rax, 0
mov r8, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+824], rax
adc r8, 0
mov rax, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+832], r8
adc rax, 0
mov r8, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+840], rax
adc r8, 0
mov rax, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+848], r8
adc rax, 0
mov r8, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+856], rax
adc r8, 0
mov rax, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+864], r8
adc rax, 0
mov r8, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+872], rax
adc r8, 0
mov rax, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+880], r8
adc rax, 0
mov r8, QWORD PTR [rdx+384]
mov QWORD PTR [rcx+888], rax
adc r8, 0
mov rax, QWORD PTR [rdx+392]
mov QWORD PTR [rcx+896], r8
adc rax, 0
mov r8, QWORD PTR [rdx+400]
mov QWORD PTR [rcx+904], rax
adc r8, 0
mov rax, QWORD PTR [rdx+408]
mov QWORD PTR [rcx+912], r8
adc rax, 0
mov r8, QWORD PTR [rdx+416]
mov QWORD PTR [rcx+920], rax
adc r8, 0
mov rax, QWORD PTR [rdx+424]
mov QWORD PTR [rcx+928], r8
adc rax, 0
mov r8, QWORD PTR [rdx+432]
mov QWORD PTR [rcx+936], rax
adc r8, 0
mov rax, QWORD PTR [rdx+440]
mov QWORD PTR [rcx+944], r8
adc rax, 0
mov r8, QWORD PTR [rdx+448]
mov QWORD PTR [rcx+952], rax
adc r8, 0
mov rax, QWORD PTR [rdx+456]
mov QWORD PTR [rcx+960], r8
adc rax, 0
mov r8, QWORD PTR [rdx+464]
mov QWORD PTR [rcx+968], rax
adc r8, 0
mov rax, QWORD PTR [rdx+472]
mov QWORD PTR [rcx+976], r8
adc rax, 0
mov r8, QWORD PTR [rdx+480]
mov QWORD PTR [rcx+984], rax
adc r8, 0
mov rax, QWORD PTR [rdx+488]
mov QWORD PTR [rcx+992], r8
adc rax, 0
mov r8, QWORD PTR [rdx+496]
mov QWORD PTR [rcx+1000], rax
adc r8, 0
mov rax, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+1008], r8
adc rax, 0
mov QWORD PTR [rcx+1016], rax
add rsp, 1304
pop r12
ret
sp_4096_sqr_avx2_64 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_d_64 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+120]
add r10, rax
mov QWORD PTR [rcx+120], r10
adc r11, rdx
adc r12, 0
; A[16] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+128]
add r11, rax
mov QWORD PTR [rcx+128], r11
adc r12, rdx
adc r10, 0
; A[17] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+136]
add r12, rax
mov QWORD PTR [rcx+136], r12
adc r10, rdx
adc r11, 0
; A[18] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+144]
add r10, rax
mov QWORD PTR [rcx+144], r10
adc r11, rdx
adc r12, 0
; A[19] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+152]
add r11, rax
mov QWORD PTR [rcx+152], r11
adc r12, rdx
adc r10, 0
; A[20] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+160]
add r12, rax
mov QWORD PTR [rcx+160], r12
adc r10, rdx
adc r11, 0
; A[21] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+168]
add r10, rax
mov QWORD PTR [rcx+168], r10
adc r11, rdx
adc r12, 0
; A[22] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+176]
add r11, rax
mov QWORD PTR [rcx+176], r11
adc r12, rdx
adc r10, 0
; A[23] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+184]
add r12, rax
mov QWORD PTR [rcx+184], r12
adc r10, rdx
adc r11, 0
; A[24] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+192]
add r10, rax
mov QWORD PTR [rcx+192], r10
adc r11, rdx
adc r12, 0
; A[25] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+200]
add r11, rax
mov QWORD PTR [rcx+200], r11
adc r12, rdx
adc r10, 0
; A[26] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+208]
add r12, rax
mov QWORD PTR [rcx+208], r12
adc r10, rdx
adc r11, 0
; A[27] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+216]
add r10, rax
mov QWORD PTR [rcx+216], r10
adc r11, rdx
adc r12, 0
; A[28] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+224]
add r11, rax
mov QWORD PTR [rcx+224], r11
adc r12, rdx
adc r10, 0
; A[29] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+232]
add r12, rax
mov QWORD PTR [rcx+232], r12
adc r10, rdx
adc r11, 0
; A[30] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+240]
add r10, rax
mov QWORD PTR [rcx+240], r10
adc r11, rdx
adc r12, 0
; A[31] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+248]
add r11, rax
mov QWORD PTR [rcx+248], r11
adc r12, rdx
adc r10, 0
; A[32] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+256]
add r12, rax
mov QWORD PTR [rcx+256], r12
adc r10, rdx
adc r11, 0
; A[33] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+264]
add r10, rax
mov QWORD PTR [rcx+264], r10
adc r11, rdx
adc r12, 0
; A[34] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+272]
add r11, rax
mov QWORD PTR [rcx+272], r11
adc r12, rdx
adc r10, 0
; A[35] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+280]
add r12, rax
mov QWORD PTR [rcx+280], r12
adc r10, rdx
adc r11, 0
; A[36] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+288]
add r10, rax
mov QWORD PTR [rcx+288], r10
adc r11, rdx
adc r12, 0
; A[37] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+296]
add r11, rax
mov QWORD PTR [rcx+296], r11
adc r12, rdx
adc r10, 0
; A[38] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+304]
add r12, rax
mov QWORD PTR [rcx+304], r12
adc r10, rdx
adc r11, 0
; A[39] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+312]
add r10, rax
mov QWORD PTR [rcx+312], r10
adc r11, rdx
adc r12, 0
; A[40] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+320]
add r11, rax
mov QWORD PTR [rcx+320], r11
adc r12, rdx
adc r10, 0
; A[41] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+328]
add r12, rax
mov QWORD PTR [rcx+328], r12
adc r10, rdx
adc r11, 0
; A[42] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+336]
add r10, rax
mov QWORD PTR [rcx+336], r10
adc r11, rdx
adc r12, 0
; A[43] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+344]
add r11, rax
mov QWORD PTR [rcx+344], r11
adc r12, rdx
adc r10, 0
; A[44] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+352]
add r12, rax
mov QWORD PTR [rcx+352], r12
adc r10, rdx
adc r11, 0
; A[45] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+360]
add r10, rax
mov QWORD PTR [rcx+360], r10
adc r11, rdx
adc r12, 0
; A[46] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+368]
add r11, rax
mov QWORD PTR [rcx+368], r11
adc r12, rdx
adc r10, 0
; A[47] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+376]
add r12, rax
mov QWORD PTR [rcx+376], r12
adc r10, rdx
adc r11, 0
; A[48] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+384]
add r10, rax
mov QWORD PTR [rcx+384], r10
adc r11, rdx
adc r12, 0
; A[49] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+392]
add r11, rax
mov QWORD PTR [rcx+392], r11
adc r12, rdx
adc r10, 0
; A[50] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+400]
add r12, rax
mov QWORD PTR [rcx+400], r12
adc r10, rdx
adc r11, 0
; A[51] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+408]
add r10, rax
mov QWORD PTR [rcx+408], r10
adc r11, rdx
adc r12, 0
; A[52] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+416]
add r11, rax
mov QWORD PTR [rcx+416], r11
adc r12, rdx
adc r10, 0
; A[53] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+424]
add r12, rax
mov QWORD PTR [rcx+424], r12
adc r10, rdx
adc r11, 0
; A[54] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+432]
add r10, rax
mov QWORD PTR [rcx+432], r10
adc r11, rdx
adc r12, 0
; A[55] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+440]
add r11, rax
mov QWORD PTR [rcx+440], r11
adc r12, rdx
adc r10, 0
; A[56] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+448]
add r12, rax
mov QWORD PTR [rcx+448], r12
adc r10, rdx
adc r11, 0
; A[57] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+456]
add r10, rax
mov QWORD PTR [rcx+456], r10
adc r11, rdx
adc r12, 0
; A[58] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+464]
add r11, rax
mov QWORD PTR [rcx+464], r11
adc r12, rdx
adc r10, 0
; A[59] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+472]
add r12, rax
mov QWORD PTR [rcx+472], r12
adc r10, rdx
adc r11, 0
; A[60] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+480]
add r10, rax
mov QWORD PTR [rcx+480], r10
adc r11, rdx
adc r12, 0
; A[61] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+488]
add r11, rax
mov QWORD PTR [rcx+488], r11
adc r12, rdx
adc r10, 0
; A[62] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+496]
add r12, rax
mov QWORD PTR [rcx+496], r12
adc r10, rdx
adc r11, 0
; A[63] * B
mov rax, r8
mul QWORD PTR [r9+504]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+504], r10
mov QWORD PTR [rcx+512], r11
pop r12
ret
sp_4096_mul_d_64 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_sub_64 PROC
sub rsp, 512
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [r8+256]
mov r11, QWORD PTR [r8+264]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+256], r10
mov QWORD PTR [rsp+264], r11
mov r10, QWORD PTR [r8+272]
mov r11, QWORD PTR [r8+280]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+272], r10
mov QWORD PTR [rsp+280], r11
mov r10, QWORD PTR [r8+288]
mov r11, QWORD PTR [r8+296]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+288], r10
mov QWORD PTR [rsp+296], r11
mov r10, QWORD PTR [r8+304]
mov r11, QWORD PTR [r8+312]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+304], r10
mov QWORD PTR [rsp+312], r11
mov r10, QWORD PTR [r8+320]
mov r11, QWORD PTR [r8+328]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+320], r10
mov QWORD PTR [rsp+328], r11
mov r10, QWORD PTR [r8+336]
mov r11, QWORD PTR [r8+344]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+336], r10
mov QWORD PTR [rsp+344], r11
mov r10, QWORD PTR [r8+352]
mov r11, QWORD PTR [r8+360]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+352], r10
mov QWORD PTR [rsp+360], r11
mov r10, QWORD PTR [r8+368]
mov r11, QWORD PTR [r8+376]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+368], r10
mov QWORD PTR [rsp+376], r11
mov r10, QWORD PTR [r8+384]
mov r11, QWORD PTR [r8+392]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+384], r10
mov QWORD PTR [rsp+392], r11
mov r10, QWORD PTR [r8+400]
mov r11, QWORD PTR [r8+408]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+400], r10
mov QWORD PTR [rsp+408], r11
mov r10, QWORD PTR [r8+416]
mov r11, QWORD PTR [r8+424]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+416], r10
mov QWORD PTR [rsp+424], r11
mov r10, QWORD PTR [r8+432]
mov r11, QWORD PTR [r8+440]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+432], r10
mov QWORD PTR [rsp+440], r11
mov r10, QWORD PTR [r8+448]
mov r11, QWORD PTR [r8+456]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+448], r10
mov QWORD PTR [rsp+456], r11
mov r10, QWORD PTR [r8+464]
mov r11, QWORD PTR [r8+472]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+464], r10
mov QWORD PTR [rsp+472], r11
mov r10, QWORD PTR [r8+480]
mov r11, QWORD PTR [r8+488]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+480], r10
mov QWORD PTR [rsp+488], r11
mov r10, QWORD PTR [r8+496]
mov r11, QWORD PTR [r8+504]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+496], r10
mov QWORD PTR [rsp+504], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
sbb r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
sbb r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
sbb r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
sbb r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
sbb r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
sbb r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
sbb r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
sbb r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
sbb r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
sbb r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
sbb r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
sbb r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
sbb r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
sbb r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
sbb r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
sbb r11, r8
mov QWORD PTR [rcx+240], r10
mov r10, QWORD PTR [rdx+256]
mov r8, QWORD PTR [rsp+256]
sbb r10, r8
mov QWORD PTR [rcx+248], r11
mov r11, QWORD PTR [rdx+264]
mov r8, QWORD PTR [rsp+264]
sbb r11, r8
mov QWORD PTR [rcx+256], r10
mov r10, QWORD PTR [rdx+272]
mov r8, QWORD PTR [rsp+272]
sbb r10, r8
mov QWORD PTR [rcx+264], r11
mov r11, QWORD PTR [rdx+280]
mov r8, QWORD PTR [rsp+280]
sbb r11, r8
mov QWORD PTR [rcx+272], r10
mov r10, QWORD PTR [rdx+288]
mov r8, QWORD PTR [rsp+288]
sbb r10, r8
mov QWORD PTR [rcx+280], r11
mov r11, QWORD PTR [rdx+296]
mov r8, QWORD PTR [rsp+296]
sbb r11, r8
mov QWORD PTR [rcx+288], r10
mov r10, QWORD PTR [rdx+304]
mov r8, QWORD PTR [rsp+304]
sbb r10, r8
mov QWORD PTR [rcx+296], r11
mov r11, QWORD PTR [rdx+312]
mov r8, QWORD PTR [rsp+312]
sbb r11, r8
mov QWORD PTR [rcx+304], r10
mov r10, QWORD PTR [rdx+320]
mov r8, QWORD PTR [rsp+320]
sbb r10, r8
mov QWORD PTR [rcx+312], r11
mov r11, QWORD PTR [rdx+328]
mov r8, QWORD PTR [rsp+328]
sbb r11, r8
mov QWORD PTR [rcx+320], r10
mov r10, QWORD PTR [rdx+336]
mov r8, QWORD PTR [rsp+336]
sbb r10, r8
mov QWORD PTR [rcx+328], r11
mov r11, QWORD PTR [rdx+344]
mov r8, QWORD PTR [rsp+344]
sbb r11, r8
mov QWORD PTR [rcx+336], r10
mov r10, QWORD PTR [rdx+352]
mov r8, QWORD PTR [rsp+352]
sbb r10, r8
mov QWORD PTR [rcx+344], r11
mov r11, QWORD PTR [rdx+360]
mov r8, QWORD PTR [rsp+360]
sbb r11, r8
mov QWORD PTR [rcx+352], r10
mov r10, QWORD PTR [rdx+368]
mov r8, QWORD PTR [rsp+368]
sbb r10, r8
mov QWORD PTR [rcx+360], r11
mov r11, QWORD PTR [rdx+376]
mov r8, QWORD PTR [rsp+376]
sbb r11, r8
mov QWORD PTR [rcx+368], r10
mov r10, QWORD PTR [rdx+384]
mov r8, QWORD PTR [rsp+384]
sbb r10, r8
mov QWORD PTR [rcx+376], r11
mov r11, QWORD PTR [rdx+392]
mov r8, QWORD PTR [rsp+392]
sbb r11, r8
mov QWORD PTR [rcx+384], r10
mov r10, QWORD PTR [rdx+400]
mov r8, QWORD PTR [rsp+400]
sbb r10, r8
mov QWORD PTR [rcx+392], r11
mov r11, QWORD PTR [rdx+408]
mov r8, QWORD PTR [rsp+408]
sbb r11, r8
mov QWORD PTR [rcx+400], r10
mov r10, QWORD PTR [rdx+416]
mov r8, QWORD PTR [rsp+416]
sbb r10, r8
mov QWORD PTR [rcx+408], r11
mov r11, QWORD PTR [rdx+424]
mov r8, QWORD PTR [rsp+424]
sbb r11, r8
mov QWORD PTR [rcx+416], r10
mov r10, QWORD PTR [rdx+432]
mov r8, QWORD PTR [rsp+432]
sbb r10, r8
mov QWORD PTR [rcx+424], r11
mov r11, QWORD PTR [rdx+440]
mov r8, QWORD PTR [rsp+440]
sbb r11, r8
mov QWORD PTR [rcx+432], r10
mov r10, QWORD PTR [rdx+448]
mov r8, QWORD PTR [rsp+448]
sbb r10, r8
mov QWORD PTR [rcx+440], r11
mov r11, QWORD PTR [rdx+456]
mov r8, QWORD PTR [rsp+456]
sbb r11, r8
mov QWORD PTR [rcx+448], r10
mov r10, QWORD PTR [rdx+464]
mov r8, QWORD PTR [rsp+464]
sbb r10, r8
mov QWORD PTR [rcx+456], r11
mov r11, QWORD PTR [rdx+472]
mov r8, QWORD PTR [rsp+472]
sbb r11, r8
mov QWORD PTR [rcx+464], r10
mov r10, QWORD PTR [rdx+480]
mov r8, QWORD PTR [rsp+480]
sbb r10, r8
mov QWORD PTR [rcx+472], r11
mov r11, QWORD PTR [rdx+488]
mov r8, QWORD PTR [rsp+488]
sbb r11, r8
mov QWORD PTR [rcx+480], r10
mov r10, QWORD PTR [rdx+496]
mov r8, QWORD PTR [rsp+496]
sbb r10, r8
mov QWORD PTR [rcx+488], r11
mov r11, QWORD PTR [rdx+504]
mov r8, QWORD PTR [rsp+504]
sbb r11, r8
mov QWORD PTR [rcx+496], r10
mov QWORD PTR [rcx+504], r11
sbb rax, 0
add rsp, 512
ret
sp_4096_cond_sub_64 ENDP
_text ENDS
; /* Reduce the number back to 4096 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_4096_mont_reduce_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 64
mov r10, 64
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_4096_mont_loop_64:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+120], r14
adc r11, 0
; a[i+16] += m[16] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+128]
mov r14, QWORD PTR [rcx+128]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+128], r14
adc r12, 0
; a[i+17] += m[17] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+136]
mov r14, QWORD PTR [rcx+136]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+136], r14
adc r11, 0
; a[i+18] += m[18] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+144]
mov r14, QWORD PTR [rcx+144]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+144], r14
adc r12, 0
; a[i+19] += m[19] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+152]
mov r14, QWORD PTR [rcx+152]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+152], r14
adc r11, 0
; a[i+20] += m[20] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+160]
mov r14, QWORD PTR [rcx+160]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+160], r14
adc r12, 0
; a[i+21] += m[21] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+168]
mov r14, QWORD PTR [rcx+168]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+168], r14
adc r11, 0
; a[i+22] += m[22] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+176]
mov r14, QWORD PTR [rcx+176]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+176], r14
adc r12, 0
; a[i+23] += m[23] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+184]
mov r14, QWORD PTR [rcx+184]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+184], r14
adc r11, 0
; a[i+24] += m[24] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+192]
mov r14, QWORD PTR [rcx+192]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+192], r14
adc r12, 0
; a[i+25] += m[25] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+200]
mov r14, QWORD PTR [rcx+200]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+200], r14
adc r11, 0
; a[i+26] += m[26] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+208]
mov r14, QWORD PTR [rcx+208]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+208], r14
adc r12, 0
; a[i+27] += m[27] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+216]
mov r14, QWORD PTR [rcx+216]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+216], r14
adc r11, 0
; a[i+28] += m[28] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+224]
mov r14, QWORD PTR [rcx+224]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+224], r14
adc r12, 0
; a[i+29] += m[29] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+232]
mov r14, QWORD PTR [rcx+232]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+232], r14
adc r11, 0
; a[i+30] += m[30] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+240]
mov r14, QWORD PTR [rcx+240]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+240], r14
adc r12, 0
; a[i+31] += m[31] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+248]
mov r14, QWORD PTR [rcx+248]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+248], r14
adc r11, 0
; a[i+32] += m[32] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+256]
mov r14, QWORD PTR [rcx+256]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+256], r14
adc r12, 0
; a[i+33] += m[33] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+264]
mov r14, QWORD PTR [rcx+264]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+264], r14
adc r11, 0
; a[i+34] += m[34] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+272]
mov r14, QWORD PTR [rcx+272]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+272], r14
adc r12, 0
; a[i+35] += m[35] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+280]
mov r14, QWORD PTR [rcx+280]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+280], r14
adc r11, 0
; a[i+36] += m[36] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+288]
mov r14, QWORD PTR [rcx+288]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+288], r14
adc r12, 0
; a[i+37] += m[37] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+296]
mov r14, QWORD PTR [rcx+296]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+296], r14
adc r11, 0
; a[i+38] += m[38] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+304]
mov r14, QWORD PTR [rcx+304]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+304], r14
adc r12, 0
; a[i+39] += m[39] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+312]
mov r14, QWORD PTR [rcx+312]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+312], r14
adc r11, 0
; a[i+40] += m[40] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+320]
mov r14, QWORD PTR [rcx+320]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+320], r14
adc r12, 0
; a[i+41] += m[41] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+328]
mov r14, QWORD PTR [rcx+328]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+328], r14
adc r11, 0
; a[i+42] += m[42] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+336]
mov r14, QWORD PTR [rcx+336]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+336], r14
adc r12, 0
; a[i+43] += m[43] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+344]
mov r14, QWORD PTR [rcx+344]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+344], r14
adc r11, 0
; a[i+44] += m[44] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+352]
mov r14, QWORD PTR [rcx+352]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+352], r14
adc r12, 0
; a[i+45] += m[45] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+360]
mov r14, QWORD PTR [rcx+360]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+360], r14
adc r11, 0
; a[i+46] += m[46] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+368]
mov r14, QWORD PTR [rcx+368]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+368], r14
adc r12, 0
; a[i+47] += m[47] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+376]
mov r14, QWORD PTR [rcx+376]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+376], r14
adc r11, 0
; a[i+48] += m[48] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+384]
mov r14, QWORD PTR [rcx+384]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+384], r14
adc r12, 0
; a[i+49] += m[49] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+392]
mov r14, QWORD PTR [rcx+392]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+392], r14
adc r11, 0
; a[i+50] += m[50] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+400]
mov r14, QWORD PTR [rcx+400]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+400], r14
adc r12, 0
; a[i+51] += m[51] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+408]
mov r14, QWORD PTR [rcx+408]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+408], r14
adc r11, 0
; a[i+52] += m[52] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+416]
mov r14, QWORD PTR [rcx+416]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+416], r14
adc r12, 0
; a[i+53] += m[53] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+424]
mov r14, QWORD PTR [rcx+424]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+424], r14
adc r11, 0
; a[i+54] += m[54] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+432]
mov r14, QWORD PTR [rcx+432]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+432], r14
adc r12, 0
; a[i+55] += m[55] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+440]
mov r14, QWORD PTR [rcx+440]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+440], r14
adc r11, 0
; a[i+56] += m[56] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+448]
mov r14, QWORD PTR [rcx+448]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+448], r14
adc r12, 0
; a[i+57] += m[57] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+456]
mov r14, QWORD PTR [rcx+456]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+456], r14
adc r11, 0
; a[i+58] += m[58] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+464]
mov r14, QWORD PTR [rcx+464]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+464], r14
adc r12, 0
; a[i+59] += m[59] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+472]
mov r14, QWORD PTR [rcx+472]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+472], r14
adc r11, 0
; a[i+60] += m[60] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+480]
mov r14, QWORD PTR [rcx+480]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+480], r14
adc r12, 0
; a[i+61] += m[61] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+488]
mov r14, QWORD PTR [rcx+488]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+488], r14
adc r11, 0
; a[i+62] += m[62] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+496]
mov r14, QWORD PTR [rcx+496]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+496], r14
adc r12, 0
; a[i+63] += m[63] * mu
mov rax, r13
mul QWORD PTR [r9+504]
mov r14, QWORD PTR [rcx+504]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+504], r14
adc QWORD PTR [rcx+512], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_4096_mont_loop_64
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 512
call sp_4096_cond_sub_64
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mont_reduce_64 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_4096_sub_64 PROC
mov r9, QWORD PTR [rdx]
xor rax, rax
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov r9, QWORD PTR [rdx+128]
mov QWORD PTR [rcx+120], r10
sbb r9, QWORD PTR [r8+128]
mov r10, QWORD PTR [rdx+136]
mov QWORD PTR [rcx+128], r9
sbb r10, QWORD PTR [r8+136]
mov r9, QWORD PTR [rdx+144]
mov QWORD PTR [rcx+136], r10
sbb r9, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+152]
mov QWORD PTR [rcx+144], r9
sbb r10, QWORD PTR [r8+152]
mov r9, QWORD PTR [rdx+160]
mov QWORD PTR [rcx+152], r10
sbb r9, QWORD PTR [r8+160]
mov r10, QWORD PTR [rdx+168]
mov QWORD PTR [rcx+160], r9
sbb r10, QWORD PTR [r8+168]
mov r9, QWORD PTR [rdx+176]
mov QWORD PTR [rcx+168], r10
sbb r9, QWORD PTR [r8+176]
mov r10, QWORD PTR [rdx+184]
mov QWORD PTR [rcx+176], r9
sbb r10, QWORD PTR [r8+184]
mov r9, QWORD PTR [rdx+192]
mov QWORD PTR [rcx+184], r10
sbb r9, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+200]
mov QWORD PTR [rcx+192], r9
sbb r10, QWORD PTR [r8+200]
mov r9, QWORD PTR [rdx+208]
mov QWORD PTR [rcx+200], r10
sbb r9, QWORD PTR [r8+208]
mov r10, QWORD PTR [rdx+216]
mov QWORD PTR [rcx+208], r9
sbb r10, QWORD PTR [r8+216]
mov r9, QWORD PTR [rdx+224]
mov QWORD PTR [rcx+216], r10
sbb r9, QWORD PTR [r8+224]
mov r10, QWORD PTR [rdx+232]
mov QWORD PTR [rcx+224], r9
sbb r10, QWORD PTR [r8+232]
mov r9, QWORD PTR [rdx+240]
mov QWORD PTR [rcx+232], r10
sbb r9, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+248]
mov QWORD PTR [rcx+240], r9
sbb r10, QWORD PTR [r8+248]
mov r9, QWORD PTR [rdx+256]
mov QWORD PTR [rcx+248], r10
sbb r9, QWORD PTR [r8+256]
mov r10, QWORD PTR [rdx+264]
mov QWORD PTR [rcx+256], r9
sbb r10, QWORD PTR [r8+264]
mov r9, QWORD PTR [rdx+272]
mov QWORD PTR [rcx+264], r10
sbb r9, QWORD PTR [r8+272]
mov r10, QWORD PTR [rdx+280]
mov QWORD PTR [rcx+272], r9
sbb r10, QWORD PTR [r8+280]
mov r9, QWORD PTR [rdx+288]
mov QWORD PTR [rcx+280], r10
sbb r9, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+296]
mov QWORD PTR [rcx+288], r9
sbb r10, QWORD PTR [r8+296]
mov r9, QWORD PTR [rdx+304]
mov QWORD PTR [rcx+296], r10
sbb r9, QWORD PTR [r8+304]
mov r10, QWORD PTR [rdx+312]
mov QWORD PTR [rcx+304], r9
sbb r10, QWORD PTR [r8+312]
mov r9, QWORD PTR [rdx+320]
mov QWORD PTR [rcx+312], r10
sbb r9, QWORD PTR [r8+320]
mov r10, QWORD PTR [rdx+328]
mov QWORD PTR [rcx+320], r9
sbb r10, QWORD PTR [r8+328]
mov r9, QWORD PTR [rdx+336]
mov QWORD PTR [rcx+328], r10
sbb r9, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+344]
mov QWORD PTR [rcx+336], r9
sbb r10, QWORD PTR [r8+344]
mov r9, QWORD PTR [rdx+352]
mov QWORD PTR [rcx+344], r10
sbb r9, QWORD PTR [r8+352]
mov r10, QWORD PTR [rdx+360]
mov QWORD PTR [rcx+352], r9
sbb r10, QWORD PTR [r8+360]
mov r9, QWORD PTR [rdx+368]
mov QWORD PTR [rcx+360], r10
sbb r9, QWORD PTR [r8+368]
mov r10, QWORD PTR [rdx+376]
mov QWORD PTR [rcx+368], r9
sbb r10, QWORD PTR [r8+376]
mov r9, QWORD PTR [rdx+384]
mov QWORD PTR [rcx+376], r10
sbb r9, QWORD PTR [r8+384]
mov r10, QWORD PTR [rdx+392]
mov QWORD PTR [rcx+384], r9
sbb r10, QWORD PTR [r8+392]
mov r9, QWORD PTR [rdx+400]
mov QWORD PTR [rcx+392], r10
sbb r9, QWORD PTR [r8+400]
mov r10, QWORD PTR [rdx+408]
mov QWORD PTR [rcx+400], r9
sbb r10, QWORD PTR [r8+408]
mov r9, QWORD PTR [rdx+416]
mov QWORD PTR [rcx+408], r10
sbb r9, QWORD PTR [r8+416]
mov r10, QWORD PTR [rdx+424]
mov QWORD PTR [rcx+416], r9
sbb r10, QWORD PTR [r8+424]
mov r9, QWORD PTR [rdx+432]
mov QWORD PTR [rcx+424], r10
sbb r9, QWORD PTR [r8+432]
mov r10, QWORD PTR [rdx+440]
mov QWORD PTR [rcx+432], r9
sbb r10, QWORD PTR [r8+440]
mov r9, QWORD PTR [rdx+448]
mov QWORD PTR [rcx+440], r10
sbb r9, QWORD PTR [r8+448]
mov r10, QWORD PTR [rdx+456]
mov QWORD PTR [rcx+448], r9
sbb r10, QWORD PTR [r8+456]
mov r9, QWORD PTR [rdx+464]
mov QWORD PTR [rcx+456], r10
sbb r9, QWORD PTR [r8+464]
mov r10, QWORD PTR [rdx+472]
mov QWORD PTR [rcx+464], r9
sbb r10, QWORD PTR [r8+472]
mov r9, QWORD PTR [rdx+480]
mov QWORD PTR [rcx+472], r10
sbb r9, QWORD PTR [r8+480]
mov r10, QWORD PTR [rdx+488]
mov QWORD PTR [rcx+480], r9
sbb r10, QWORD PTR [r8+488]
mov r9, QWORD PTR [rdx+496]
mov QWORD PTR [rcx+488], r10
sbb r9, QWORD PTR [r8+496]
mov r10, QWORD PTR [rdx+504]
mov QWORD PTR [rcx+496], r9
sbb r10, QWORD PTR [r8+504]
mov QWORD PTR [rcx+504], r10
sbb rax, 0
ret
sp_4096_sub_64 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_4096_mul_d_avx2_64 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+120], r12
; A[16] * B
mulx r10, r9, QWORD PTR [rax+128]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+128], r11
; A[17] * B
mulx r10, r9, QWORD PTR [rax+136]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+136], r12
; A[18] * B
mulx r10, r9, QWORD PTR [rax+144]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+144], r11
; A[19] * B
mulx r10, r9, QWORD PTR [rax+152]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+152], r12
; A[20] * B
mulx r10, r9, QWORD PTR [rax+160]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+160], r11
; A[21] * B
mulx r10, r9, QWORD PTR [rax+168]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+168], r12
; A[22] * B
mulx r10, r9, QWORD PTR [rax+176]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+176], r11
; A[23] * B
mulx r10, r9, QWORD PTR [rax+184]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+184], r12
; A[24] * B
mulx r10, r9, QWORD PTR [rax+192]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+192], r11
; A[25] * B
mulx r10, r9, QWORD PTR [rax+200]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+200], r12
; A[26] * B
mulx r10, r9, QWORD PTR [rax+208]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+208], r11
; A[27] * B
mulx r10, r9, QWORD PTR [rax+216]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+216], r12
; A[28] * B
mulx r10, r9, QWORD PTR [rax+224]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+224], r11
; A[29] * B
mulx r10, r9, QWORD PTR [rax+232]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+232], r12
; A[30] * B
mulx r10, r9, QWORD PTR [rax+240]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+240], r11
; A[31] * B
mulx r10, r9, QWORD PTR [rax+248]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+248], r12
; A[32] * B
mulx r10, r9, QWORD PTR [rax+256]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+256], r11
; A[33] * B
mulx r10, r9, QWORD PTR [rax+264]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+264], r12
; A[34] * B
mulx r10, r9, QWORD PTR [rax+272]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+272], r11
; A[35] * B
mulx r10, r9, QWORD PTR [rax+280]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+280], r12
; A[36] * B
mulx r10, r9, QWORD PTR [rax+288]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+288], r11
; A[37] * B
mulx r10, r9, QWORD PTR [rax+296]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+296], r12
; A[38] * B
mulx r10, r9, QWORD PTR [rax+304]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+304], r11
; A[39] * B
mulx r10, r9, QWORD PTR [rax+312]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+312], r12
; A[40] * B
mulx r10, r9, QWORD PTR [rax+320]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+320], r11
; A[41] * B
mulx r10, r9, QWORD PTR [rax+328]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+328], r12
; A[42] * B
mulx r10, r9, QWORD PTR [rax+336]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+336], r11
; A[43] * B
mulx r10, r9, QWORD PTR [rax+344]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+344], r12
; A[44] * B
mulx r10, r9, QWORD PTR [rax+352]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+352], r11
; A[45] * B
mulx r10, r9, QWORD PTR [rax+360]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+360], r12
; A[46] * B
mulx r10, r9, QWORD PTR [rax+368]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+368], r11
; A[47] * B
mulx r10, r9, QWORD PTR [rax+376]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+376], r12
; A[48] * B
mulx r10, r9, QWORD PTR [rax+384]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+384], r11
; A[49] * B
mulx r10, r9, QWORD PTR [rax+392]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+392], r12
; A[50] * B
mulx r10, r9, QWORD PTR [rax+400]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+400], r11
; A[51] * B
mulx r10, r9, QWORD PTR [rax+408]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+408], r12
; A[52] * B
mulx r10, r9, QWORD PTR [rax+416]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+416], r11
; A[53] * B
mulx r10, r9, QWORD PTR [rax+424]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+424], r12
; A[54] * B
mulx r10, r9, QWORD PTR [rax+432]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+432], r11
; A[55] * B
mulx r10, r9, QWORD PTR [rax+440]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+440], r12
; A[56] * B
mulx r10, r9, QWORD PTR [rax+448]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+448], r11
; A[57] * B
mulx r10, r9, QWORD PTR [rax+456]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+456], r12
; A[58] * B
mulx r10, r9, QWORD PTR [rax+464]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+464], r11
; A[59] * B
mulx r10, r9, QWORD PTR [rax+472]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+472], r12
; A[60] * B
mulx r10, r9, QWORD PTR [rax+480]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+480], r11
; A[61] * B
mulx r10, r9, QWORD PTR [rax+488]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+488], r12
; A[62] * B
mulx r10, r9, QWORD PTR [rax+496]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+496], r11
; A[63] * B
mulx r10, r9, QWORD PTR [rax+504]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+504], r12
mov QWORD PTR [rcx+512], r11
pop r13
pop r12
ret
sp_4096_mul_d_avx2_64 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_4096_word_asm_64 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_4096_word_asm_64 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_sub_avx2_64 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
sbb r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
sbb r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
sbb r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
sbb r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
sbb r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
sbb r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
sbb r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
sbb r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
sbb r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
sbb r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
sbb r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
sbb r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
sbb r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
sbb r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
sbb r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
sbb r11, r12
mov r10, QWORD PTR [r8+256]
mov r12, QWORD PTR [rdx+256]
pext r10, r10, r9
mov QWORD PTR [rcx+248], r11
sbb r12, r10
mov r11, QWORD PTR [r8+264]
mov r10, QWORD PTR [rdx+264]
pext r11, r11, r9
mov QWORD PTR [rcx+256], r12
sbb r10, r11
mov r12, QWORD PTR [r8+272]
mov r11, QWORD PTR [rdx+272]
pext r12, r12, r9
mov QWORD PTR [rcx+264], r10
sbb r11, r12
mov r10, QWORD PTR [r8+280]
mov r12, QWORD PTR [rdx+280]
pext r10, r10, r9
mov QWORD PTR [rcx+272], r11
sbb r12, r10
mov r11, QWORD PTR [r8+288]
mov r10, QWORD PTR [rdx+288]
pext r11, r11, r9
mov QWORD PTR [rcx+280], r12
sbb r10, r11
mov r12, QWORD PTR [r8+296]
mov r11, QWORD PTR [rdx+296]
pext r12, r12, r9
mov QWORD PTR [rcx+288], r10
sbb r11, r12
mov r10, QWORD PTR [r8+304]
mov r12, QWORD PTR [rdx+304]
pext r10, r10, r9
mov QWORD PTR [rcx+296], r11
sbb r12, r10
mov r11, QWORD PTR [r8+312]
mov r10, QWORD PTR [rdx+312]
pext r11, r11, r9
mov QWORD PTR [rcx+304], r12
sbb r10, r11
mov r12, QWORD PTR [r8+320]
mov r11, QWORD PTR [rdx+320]
pext r12, r12, r9
mov QWORD PTR [rcx+312], r10
sbb r11, r12
mov r10, QWORD PTR [r8+328]
mov r12, QWORD PTR [rdx+328]
pext r10, r10, r9
mov QWORD PTR [rcx+320], r11
sbb r12, r10
mov r11, QWORD PTR [r8+336]
mov r10, QWORD PTR [rdx+336]
pext r11, r11, r9
mov QWORD PTR [rcx+328], r12
sbb r10, r11
mov r12, QWORD PTR [r8+344]
mov r11, QWORD PTR [rdx+344]
pext r12, r12, r9
mov QWORD PTR [rcx+336], r10
sbb r11, r12
mov r10, QWORD PTR [r8+352]
mov r12, QWORD PTR [rdx+352]
pext r10, r10, r9
mov QWORD PTR [rcx+344], r11
sbb r12, r10
mov r11, QWORD PTR [r8+360]
mov r10, QWORD PTR [rdx+360]
pext r11, r11, r9
mov QWORD PTR [rcx+352], r12
sbb r10, r11
mov r12, QWORD PTR [r8+368]
mov r11, QWORD PTR [rdx+368]
pext r12, r12, r9
mov QWORD PTR [rcx+360], r10
sbb r11, r12
mov r10, QWORD PTR [r8+376]
mov r12, QWORD PTR [rdx+376]
pext r10, r10, r9
mov QWORD PTR [rcx+368], r11
sbb r12, r10
mov r11, QWORD PTR [r8+384]
mov r10, QWORD PTR [rdx+384]
pext r11, r11, r9
mov QWORD PTR [rcx+376], r12
sbb r10, r11
mov r12, QWORD PTR [r8+392]
mov r11, QWORD PTR [rdx+392]
pext r12, r12, r9
mov QWORD PTR [rcx+384], r10
sbb r11, r12
mov r10, QWORD PTR [r8+400]
mov r12, QWORD PTR [rdx+400]
pext r10, r10, r9
mov QWORD PTR [rcx+392], r11
sbb r12, r10
mov r11, QWORD PTR [r8+408]
mov r10, QWORD PTR [rdx+408]
pext r11, r11, r9
mov QWORD PTR [rcx+400], r12
sbb r10, r11
mov r12, QWORD PTR [r8+416]
mov r11, QWORD PTR [rdx+416]
pext r12, r12, r9
mov QWORD PTR [rcx+408], r10
sbb r11, r12
mov r10, QWORD PTR [r8+424]
mov r12, QWORD PTR [rdx+424]
pext r10, r10, r9
mov QWORD PTR [rcx+416], r11
sbb r12, r10
mov r11, QWORD PTR [r8+432]
mov r10, QWORD PTR [rdx+432]
pext r11, r11, r9
mov QWORD PTR [rcx+424], r12
sbb r10, r11
mov r12, QWORD PTR [r8+440]
mov r11, QWORD PTR [rdx+440]
pext r12, r12, r9
mov QWORD PTR [rcx+432], r10
sbb r11, r12
mov r10, QWORD PTR [r8+448]
mov r12, QWORD PTR [rdx+448]
pext r10, r10, r9
mov QWORD PTR [rcx+440], r11
sbb r12, r10
mov r11, QWORD PTR [r8+456]
mov r10, QWORD PTR [rdx+456]
pext r11, r11, r9
mov QWORD PTR [rcx+448], r12
sbb r10, r11
mov r12, QWORD PTR [r8+464]
mov r11, QWORD PTR [rdx+464]
pext r12, r12, r9
mov QWORD PTR [rcx+456], r10
sbb r11, r12
mov r10, QWORD PTR [r8+472]
mov r12, QWORD PTR [rdx+472]
pext r10, r10, r9
mov QWORD PTR [rcx+464], r11
sbb r12, r10
mov r11, QWORD PTR [r8+480]
mov r10, QWORD PTR [rdx+480]
pext r11, r11, r9
mov QWORD PTR [rcx+472], r12
sbb r10, r11
mov r12, QWORD PTR [r8+488]
mov r11, QWORD PTR [rdx+488]
pext r12, r12, r9
mov QWORD PTR [rcx+480], r10
sbb r11, r12
mov r10, QWORD PTR [r8+496]
mov r12, QWORD PTR [rdx+496]
pext r10, r10, r9
mov QWORD PTR [rcx+488], r11
sbb r12, r10
mov r11, QWORD PTR [r8+504]
mov r10, QWORD PTR [rdx+504]
pext r11, r11, r9
mov QWORD PTR [rcx+496], r12
sbb r10, r11
mov QWORD PTR [rcx+504], r10
sbb rax, 0
pop r12
ret
sp_4096_cond_sub_avx2_64 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_4096_cmp_64 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+504]
mov r12, QWORD PTR [rdx+504]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+496]
mov r12, QWORD PTR [rdx+496]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+488]
mov r12, QWORD PTR [rdx+488]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+480]
mov r12, QWORD PTR [rdx+480]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+472]
mov r12, QWORD PTR [rdx+472]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+464]
mov r12, QWORD PTR [rdx+464]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+456]
mov r12, QWORD PTR [rdx+456]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+448]
mov r12, QWORD PTR [rdx+448]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+440]
mov r12, QWORD PTR [rdx+440]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+432]
mov r12, QWORD PTR [rdx+432]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+424]
mov r12, QWORD PTR [rdx+424]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+416]
mov r12, QWORD PTR [rdx+416]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+408]
mov r12, QWORD PTR [rdx+408]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+400]
mov r12, QWORD PTR [rdx+400]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+392]
mov r12, QWORD PTR [rdx+392]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+384]
mov r12, QWORD PTR [rdx+384]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+376]
mov r12, QWORD PTR [rdx+376]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+368]
mov r12, QWORD PTR [rdx+368]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+360]
mov r12, QWORD PTR [rdx+360]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+352]
mov r12, QWORD PTR [rdx+352]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+344]
mov r12, QWORD PTR [rdx+344]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+336]
mov r12, QWORD PTR [rdx+336]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+328]
mov r12, QWORD PTR [rdx+328]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+320]
mov r12, QWORD PTR [rdx+320]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+312]
mov r12, QWORD PTR [rdx+312]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+304]
mov r12, QWORD PTR [rdx+304]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+296]
mov r12, QWORD PTR [rdx+296]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+288]
mov r12, QWORD PTR [rdx+288]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+280]
mov r12, QWORD PTR [rdx+280]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+272]
mov r12, QWORD PTR [rdx+272]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+264]
mov r12, QWORD PTR [rdx+264]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+256]
mov r12, QWORD PTR [rdx+256]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+248]
mov r12, QWORD PTR [rdx+248]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+240]
mov r12, QWORD PTR [rdx+240]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+232]
mov r12, QWORD PTR [rdx+232]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+224]
mov r12, QWORD PTR [rdx+224]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+216]
mov r12, QWORD PTR [rdx+216]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+208]
mov r12, QWORD PTR [rdx+208]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+200]
mov r12, QWORD PTR [rdx+200]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+192]
mov r12, QWORD PTR [rdx+192]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+184]
mov r12, QWORD PTR [rdx+184]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+176]
mov r12, QWORD PTR [rdx+176]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+168]
mov r12, QWORD PTR [rdx+168]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+160]
mov r12, QWORD PTR [rdx+160]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+152]
mov r12, QWORD PTR [rdx+152]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+144]
mov r12, QWORD PTR [rdx+144]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+136]
mov r12, QWORD PTR [rdx+136]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+128]
mov r12, QWORD PTR [rdx+128]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_4096_cmp_64 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 4096 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_4096_mont_reduce_avx2_64 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 64
mov r11, 64
mov r15, QWORD PTR [r9]
mov rdi, QWORD PTR [r9+8]
mov rsi, QWORD PTR [r9+16]
mov rbx, QWORD PTR [r9+24]
add r9, 256
xor rbp, rbp
L_4096_mont_loop_avx2_64:
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-224]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-216]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-208]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-216], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-200]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-208], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+-192]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-200], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+-184]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-192], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+-176]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-184], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+-168]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-176], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+-160]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-168], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+-152]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-160], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+-144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-152], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+-136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-144], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+-128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-136], r13
; a[i+16] += m[16] * mu
mulx rcx, rax, QWORD PTR [r10+128]
mov r13, QWORD PTR [r9+-120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-128], r12
; a[i+17] += m[17] * mu
mulx rcx, rax, QWORD PTR [r10+136]
mov r12, QWORD PTR [r9+-112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-120], r13
; a[i+18] += m[18] * mu
mulx rcx, rax, QWORD PTR [r10+144]
mov r13, QWORD PTR [r9+-104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-112], r12
; a[i+19] += m[19] * mu
mulx rcx, rax, QWORD PTR [r10+152]
mov r12, QWORD PTR [r9+-96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-104], r13
; a[i+20] += m[20] * mu
mulx rcx, rax, QWORD PTR [r10+160]
mov r13, QWORD PTR [r9+-88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-96], r12
; a[i+21] += m[21] * mu
mulx rcx, rax, QWORD PTR [r10+168]
mov r12, QWORD PTR [r9+-80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-88], r13
; a[i+22] += m[22] * mu
mulx rcx, rax, QWORD PTR [r10+176]
mov r13, QWORD PTR [r9+-72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-80], r12
; a[i+23] += m[23] * mu
mulx rcx, rax, QWORD PTR [r10+184]
mov r12, QWORD PTR [r9+-64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-72], r13
; a[i+24] += m[24] * mu
mulx rcx, rax, QWORD PTR [r10+192]
mov r13, QWORD PTR [r9+-56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-64], r12
; a[i+25] += m[25] * mu
mulx rcx, rax, QWORD PTR [r10+200]
mov r12, QWORD PTR [r9+-48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-56], r13
; a[i+26] += m[26] * mu
mulx rcx, rax, QWORD PTR [r10+208]
mov r13, QWORD PTR [r9+-40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-48], r12
; a[i+27] += m[27] * mu
mulx rcx, rax, QWORD PTR [r10+216]
mov r12, QWORD PTR [r9+-32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-40], r13
; a[i+28] += m[28] * mu
mulx rcx, rax, QWORD PTR [r10+224]
mov r13, QWORD PTR [r9+-24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-32], r12
; a[i+29] += m[29] * mu
mulx rcx, rax, QWORD PTR [r10+232]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+30] += m[30] * mu
mulx rcx, rax, QWORD PTR [r10+240]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+31] += m[31] * mu
mulx rcx, rax, QWORD PTR [r10+248]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+32] += m[32] * mu
mulx rcx, rax, QWORD PTR [r10+256]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+33] += m[33] * mu
mulx rcx, rax, QWORD PTR [r10+264]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+34] += m[34] * mu
mulx rcx, rax, QWORD PTR [r10+272]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+35] += m[35] * mu
mulx rcx, rax, QWORD PTR [r10+280]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+36] += m[36] * mu
mulx rcx, rax, QWORD PTR [r10+288]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+37] += m[37] * mu
mulx rcx, rax, QWORD PTR [r10+296]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+38] += m[38] * mu
mulx rcx, rax, QWORD PTR [r10+304]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+39] += m[39] * mu
mulx rcx, rax, QWORD PTR [r10+312]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
; a[i+40] += m[40] * mu
mulx rcx, rax, QWORD PTR [r10+320]
mov r13, QWORD PTR [r9+72]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+64], r12
; a[i+41] += m[41] * mu
mulx rcx, rax, QWORD PTR [r10+328]
mov r12, QWORD PTR [r9+80]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+72], r13
; a[i+42] += m[42] * mu
mulx rcx, rax, QWORD PTR [r10+336]
mov r13, QWORD PTR [r9+88]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+80], r12
; a[i+43] += m[43] * mu
mulx rcx, rax, QWORD PTR [r10+344]
mov r12, QWORD PTR [r9+96]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+88], r13
; a[i+44] += m[44] * mu
mulx rcx, rax, QWORD PTR [r10+352]
mov r13, QWORD PTR [r9+104]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+96], r12
; a[i+45] += m[45] * mu
mulx rcx, rax, QWORD PTR [r10+360]
mov r12, QWORD PTR [r9+112]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+104], r13
; a[i+46] += m[46] * mu
mulx rcx, rax, QWORD PTR [r10+368]
mov r13, QWORD PTR [r9+120]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+112], r12
; a[i+47] += m[47] * mu
mulx rcx, rax, QWORD PTR [r10+376]
mov r12, QWORD PTR [r9+128]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+120], r13
; a[i+48] += m[48] * mu
mulx rcx, rax, QWORD PTR [r10+384]
mov r13, QWORD PTR [r9+136]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+128], r12
; a[i+49] += m[49] * mu
mulx rcx, rax, QWORD PTR [r10+392]
mov r12, QWORD PTR [r9+144]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+136], r13
; a[i+50] += m[50] * mu
mulx rcx, rax, QWORD PTR [r10+400]
mov r13, QWORD PTR [r9+152]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+144], r12
; a[i+51] += m[51] * mu
mulx rcx, rax, QWORD PTR [r10+408]
mov r12, QWORD PTR [r9+160]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+152], r13
; a[i+52] += m[52] * mu
mulx rcx, rax, QWORD PTR [r10+416]
mov r13, QWORD PTR [r9+168]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+160], r12
; a[i+53] += m[53] * mu
mulx rcx, rax, QWORD PTR [r10+424]
mov r12, QWORD PTR [r9+176]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+168], r13
; a[i+54] += m[54] * mu
mulx rcx, rax, QWORD PTR [r10+432]
mov r13, QWORD PTR [r9+184]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+176], r12
; a[i+55] += m[55] * mu
mulx rcx, rax, QWORD PTR [r10+440]
mov r12, QWORD PTR [r9+192]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+184], r13
; a[i+56] += m[56] * mu
mulx rcx, rax, QWORD PTR [r10+448]
mov r13, QWORD PTR [r9+200]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+192], r12
; a[i+57] += m[57] * mu
mulx rcx, rax, QWORD PTR [r10+456]
mov r12, QWORD PTR [r9+208]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+200], r13
; a[i+58] += m[58] * mu
mulx rcx, rax, QWORD PTR [r10+464]
mov r13, QWORD PTR [r9+216]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+208], r12
; a[i+59] += m[59] * mu
mulx rcx, rax, QWORD PTR [r10+472]
mov r12, QWORD PTR [r9+224]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+216], r13
; a[i+60] += m[60] * mu
mulx rcx, rax, QWORD PTR [r10+480]
mov r13, QWORD PTR [r9+232]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+224], r12
; a[i+61] += m[61] * mu
mulx rcx, rax, QWORD PTR [r10+488]
mov r12, QWORD PTR [r9+240]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+232], r13
; a[i+62] += m[62] * mu
mulx rcx, rax, QWORD PTR [r10+496]
mov r13, QWORD PTR [r9+248]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+240], r12
; a[i+63] += m[63] * mu
mulx rcx, rax, QWORD PTR [r10+504]
mov r12, QWORD PTR [r9+256]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+248], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+256], r12
adox rbp, r14
adcx rbp, r14
; a += 1
add r9, 8
; i -= 1
sub r11, 1
jnz L_4096_mont_loop_avx2_64
sub r9, 256
neg rbp
mov r8, r9
sub r9, 512
mov rcx, QWORD PTR [r10]
mov rdx, r15
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, rdi
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rsi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rbx
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+128]
mov rax, QWORD PTR [r8+128]
pext rcx, rcx, rbp
mov QWORD PTR [r9+120], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+136]
mov rcx, QWORD PTR [r8+136]
pext rdx, rdx, rbp
mov QWORD PTR [r9+128], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+144]
mov rdx, QWORD PTR [r8+144]
pext rax, rax, rbp
mov QWORD PTR [r9+136], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+152]
mov rax, QWORD PTR [r8+152]
pext rcx, rcx, rbp
mov QWORD PTR [r9+144], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+160]
mov rcx, QWORD PTR [r8+160]
pext rdx, rdx, rbp
mov QWORD PTR [r9+152], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+168]
mov rdx, QWORD PTR [r8+168]
pext rax, rax, rbp
mov QWORD PTR [r9+160], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+176]
mov rax, QWORD PTR [r8+176]
pext rcx, rcx, rbp
mov QWORD PTR [r9+168], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+184]
mov rcx, QWORD PTR [r8+184]
pext rdx, rdx, rbp
mov QWORD PTR [r9+176], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+192]
mov rdx, QWORD PTR [r8+192]
pext rax, rax, rbp
mov QWORD PTR [r9+184], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+200]
mov rax, QWORD PTR [r8+200]
pext rcx, rcx, rbp
mov QWORD PTR [r9+192], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+208]
mov rcx, QWORD PTR [r8+208]
pext rdx, rdx, rbp
mov QWORD PTR [r9+200], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+216]
mov rdx, QWORD PTR [r8+216]
pext rax, rax, rbp
mov QWORD PTR [r9+208], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+224]
mov rax, QWORD PTR [r8+224]
pext rcx, rcx, rbp
mov QWORD PTR [r9+216], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+232]
mov rcx, QWORD PTR [r8+232]
pext rdx, rdx, rbp
mov QWORD PTR [r9+224], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+240]
mov rdx, QWORD PTR [r8+240]
pext rax, rax, rbp
mov QWORD PTR [r9+232], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+248]
mov rax, QWORD PTR [r8+248]
pext rcx, rcx, rbp
mov QWORD PTR [r9+240], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+256]
mov rcx, QWORD PTR [r8+256]
pext rdx, rdx, rbp
mov QWORD PTR [r9+248], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+264]
mov rdx, QWORD PTR [r8+264]
pext rax, rax, rbp
mov QWORD PTR [r9+256], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+272]
mov rax, QWORD PTR [r8+272]
pext rcx, rcx, rbp
mov QWORD PTR [r9+264], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+280]
mov rcx, QWORD PTR [r8+280]
pext rdx, rdx, rbp
mov QWORD PTR [r9+272], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+288]
mov rdx, QWORD PTR [r8+288]
pext rax, rax, rbp
mov QWORD PTR [r9+280], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+296]
mov rax, QWORD PTR [r8+296]
pext rcx, rcx, rbp
mov QWORD PTR [r9+288], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+304]
mov rcx, QWORD PTR [r8+304]
pext rdx, rdx, rbp
mov QWORD PTR [r9+296], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+312]
mov rdx, QWORD PTR [r8+312]
pext rax, rax, rbp
mov QWORD PTR [r9+304], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+320]
mov rax, QWORD PTR [r8+320]
pext rcx, rcx, rbp
mov QWORD PTR [r9+312], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+328]
mov rcx, QWORD PTR [r8+328]
pext rdx, rdx, rbp
mov QWORD PTR [r9+320], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+336]
mov rdx, QWORD PTR [r8+336]
pext rax, rax, rbp
mov QWORD PTR [r9+328], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+344]
mov rax, QWORD PTR [r8+344]
pext rcx, rcx, rbp
mov QWORD PTR [r9+336], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+352]
mov rcx, QWORD PTR [r8+352]
pext rdx, rdx, rbp
mov QWORD PTR [r9+344], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+360]
mov rdx, QWORD PTR [r8+360]
pext rax, rax, rbp
mov QWORD PTR [r9+352], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+368]
mov rax, QWORD PTR [r8+368]
pext rcx, rcx, rbp
mov QWORD PTR [r9+360], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+376]
mov rcx, QWORD PTR [r8+376]
pext rdx, rdx, rbp
mov QWORD PTR [r9+368], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+384]
mov rdx, QWORD PTR [r8+384]
pext rax, rax, rbp
mov QWORD PTR [r9+376], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+392]
mov rax, QWORD PTR [r8+392]
pext rcx, rcx, rbp
mov QWORD PTR [r9+384], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+400]
mov rcx, QWORD PTR [r8+400]
pext rdx, rdx, rbp
mov QWORD PTR [r9+392], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+408]
mov rdx, QWORD PTR [r8+408]
pext rax, rax, rbp
mov QWORD PTR [r9+400], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+416]
mov rax, QWORD PTR [r8+416]
pext rcx, rcx, rbp
mov QWORD PTR [r9+408], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+424]
mov rcx, QWORD PTR [r8+424]
pext rdx, rdx, rbp
mov QWORD PTR [r9+416], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+432]
mov rdx, QWORD PTR [r8+432]
pext rax, rax, rbp
mov QWORD PTR [r9+424], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+440]
mov rax, QWORD PTR [r8+440]
pext rcx, rcx, rbp
mov QWORD PTR [r9+432], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+448]
mov rcx, QWORD PTR [r8+448]
pext rdx, rdx, rbp
mov QWORD PTR [r9+440], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+456]
mov rdx, QWORD PTR [r8+456]
pext rax, rax, rbp
mov QWORD PTR [r9+448], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+464]
mov rax, QWORD PTR [r8+464]
pext rcx, rcx, rbp
mov QWORD PTR [r9+456], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+472]
mov rcx, QWORD PTR [r8+472]
pext rdx, rdx, rbp
mov QWORD PTR [r9+464], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+480]
mov rdx, QWORD PTR [r8+480]
pext rax, rax, rbp
mov QWORD PTR [r9+472], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+488]
mov rax, QWORD PTR [r8+488]
pext rcx, rcx, rbp
mov QWORD PTR [r9+480], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+496]
mov rcx, QWORD PTR [r8+496]
pext rdx, rdx, rbp
mov QWORD PTR [r9+488], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+504]
mov rdx, QWORD PTR [r8+504]
pext rax, rax, rbp
mov QWORD PTR [r9+496], rcx
sbb rdx, rax
mov QWORD PTR [r9+504], rdx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_4096_mont_reduce_avx2_64 ENDP
_text ENDS
ENDIF
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_add_32 PROC
sub rsp, 256
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [r8+128]
mov r11, QWORD PTR [r8+136]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+128], r10
mov QWORD PTR [rsp+136], r11
mov r10, QWORD PTR [r8+144]
mov r11, QWORD PTR [r8+152]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+144], r10
mov QWORD PTR [rsp+152], r11
mov r10, QWORD PTR [r8+160]
mov r11, QWORD PTR [r8+168]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+160], r10
mov QWORD PTR [rsp+168], r11
mov r10, QWORD PTR [r8+176]
mov r11, QWORD PTR [r8+184]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+176], r10
mov QWORD PTR [rsp+184], r11
mov r10, QWORD PTR [r8+192]
mov r11, QWORD PTR [r8+200]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+192], r10
mov QWORD PTR [rsp+200], r11
mov r10, QWORD PTR [r8+208]
mov r11, QWORD PTR [r8+216]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+208], r10
mov QWORD PTR [rsp+216], r11
mov r10, QWORD PTR [r8+224]
mov r11, QWORD PTR [r8+232]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+224], r10
mov QWORD PTR [rsp+232], r11
mov r10, QWORD PTR [r8+240]
mov r11, QWORD PTR [r8+248]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+240], r10
mov QWORD PTR [rsp+248], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
adc r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
adc r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
adc r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
adc r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
adc r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
adc r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
adc r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
adc r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
adc r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
adc r11, r8
mov QWORD PTR [rcx+112], r10
mov r10, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rsp+128]
adc r10, r8
mov QWORD PTR [rcx+120], r11
mov r11, QWORD PTR [rdx+136]
mov r8, QWORD PTR [rsp+136]
adc r11, r8
mov QWORD PTR [rcx+128], r10
mov r10, QWORD PTR [rdx+144]
mov r8, QWORD PTR [rsp+144]
adc r10, r8
mov QWORD PTR [rcx+136], r11
mov r11, QWORD PTR [rdx+152]
mov r8, QWORD PTR [rsp+152]
adc r11, r8
mov QWORD PTR [rcx+144], r10
mov r10, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rsp+160]
adc r10, r8
mov QWORD PTR [rcx+152], r11
mov r11, QWORD PTR [rdx+168]
mov r8, QWORD PTR [rsp+168]
adc r11, r8
mov QWORD PTR [rcx+160], r10
mov r10, QWORD PTR [rdx+176]
mov r8, QWORD PTR [rsp+176]
adc r10, r8
mov QWORD PTR [rcx+168], r11
mov r11, QWORD PTR [rdx+184]
mov r8, QWORD PTR [rsp+184]
adc r11, r8
mov QWORD PTR [rcx+176], r10
mov r10, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rsp+192]
adc r10, r8
mov QWORD PTR [rcx+184], r11
mov r11, QWORD PTR [rdx+200]
mov r8, QWORD PTR [rsp+200]
adc r11, r8
mov QWORD PTR [rcx+192], r10
mov r10, QWORD PTR [rdx+208]
mov r8, QWORD PTR [rsp+208]
adc r10, r8
mov QWORD PTR [rcx+200], r11
mov r11, QWORD PTR [rdx+216]
mov r8, QWORD PTR [rsp+216]
adc r11, r8
mov QWORD PTR [rcx+208], r10
mov r10, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rsp+224]
adc r10, r8
mov QWORD PTR [rcx+216], r11
mov r11, QWORD PTR [rdx+232]
mov r8, QWORD PTR [rsp+232]
adc r11, r8
mov QWORD PTR [rcx+224], r10
mov r10, QWORD PTR [rdx+240]
mov r8, QWORD PTR [rsp+240]
adc r10, r8
mov QWORD PTR [rcx+232], r11
mov r11, QWORD PTR [rdx+248]
mov r8, QWORD PTR [rsp+248]
adc r11, r8
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
adc rax, 0
add rsp, 256
ret
sp_4096_cond_add_32 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_4096_cond_add_avx2_32 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
add r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
adc r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
adc r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
adc r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
adc r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
adc r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
adc r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
adc r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
adc r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
adc r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
adc r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
adc r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
adc r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
adc r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
adc r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
adc r10, r11
mov r12, QWORD PTR [r8+128]
mov r11, QWORD PTR [rdx+128]
pext r12, r12, r9
mov QWORD PTR [rcx+120], r10
adc r11, r12
mov r10, QWORD PTR [r8+136]
mov r12, QWORD PTR [rdx+136]
pext r10, r10, r9
mov QWORD PTR [rcx+128], r11
adc r12, r10
mov r11, QWORD PTR [r8+144]
mov r10, QWORD PTR [rdx+144]
pext r11, r11, r9
mov QWORD PTR [rcx+136], r12
adc r10, r11
mov r12, QWORD PTR [r8+152]
mov r11, QWORD PTR [rdx+152]
pext r12, r12, r9
mov QWORD PTR [rcx+144], r10
adc r11, r12
mov r10, QWORD PTR [r8+160]
mov r12, QWORD PTR [rdx+160]
pext r10, r10, r9
mov QWORD PTR [rcx+152], r11
adc r12, r10
mov r11, QWORD PTR [r8+168]
mov r10, QWORD PTR [rdx+168]
pext r11, r11, r9
mov QWORD PTR [rcx+160], r12
adc r10, r11
mov r12, QWORD PTR [r8+176]
mov r11, QWORD PTR [rdx+176]
pext r12, r12, r9
mov QWORD PTR [rcx+168], r10
adc r11, r12
mov r10, QWORD PTR [r8+184]
mov r12, QWORD PTR [rdx+184]
pext r10, r10, r9
mov QWORD PTR [rcx+176], r11
adc r12, r10
mov r11, QWORD PTR [r8+192]
mov r10, QWORD PTR [rdx+192]
pext r11, r11, r9
mov QWORD PTR [rcx+184], r12
adc r10, r11
mov r12, QWORD PTR [r8+200]
mov r11, QWORD PTR [rdx+200]
pext r12, r12, r9
mov QWORD PTR [rcx+192], r10
adc r11, r12
mov r10, QWORD PTR [r8+208]
mov r12, QWORD PTR [rdx+208]
pext r10, r10, r9
mov QWORD PTR [rcx+200], r11
adc r12, r10
mov r11, QWORD PTR [r8+216]
mov r10, QWORD PTR [rdx+216]
pext r11, r11, r9
mov QWORD PTR [rcx+208], r12
adc r10, r11
mov r12, QWORD PTR [r8+224]
mov r11, QWORD PTR [rdx+224]
pext r12, r12, r9
mov QWORD PTR [rcx+216], r10
adc r11, r12
mov r10, QWORD PTR [r8+232]
mov r12, QWORD PTR [rdx+232]
pext r10, r10, r9
mov QWORD PTR [rcx+224], r11
adc r12, r10
mov r11, QWORD PTR [r8+240]
mov r10, QWORD PTR [rdx+240]
pext r11, r11, r9
mov QWORD PTR [rcx+232], r12
adc r10, r11
mov r12, QWORD PTR [r8+248]
mov r11, QWORD PTR [rdx+248]
pext r12, r12, r9
mov QWORD PTR [rcx+240], r10
adc r11, r12
mov QWORD PTR [rcx+248], r11
adc rax, 0
pop r12
ret
sp_4096_cond_add_avx2_32 ENDP
_text ENDS
ENDIF
; /* Shift number left by n bit. (r = a << n)
; *
; * r Result of left shift by n.
; * a Number to shift.
; * n Amoutnt o shift.
; */
_text SEGMENT READONLY PARA
sp_4096_lshift_64 PROC
push r12
push r13
mov r9, rcx
mov rcx, r8
mov r12, 0
mov r13, QWORD PTR [rdx+472]
mov rax, QWORD PTR [rdx+480]
mov r8, QWORD PTR [rdx+488]
mov r10, QWORD PTR [rdx+496]
mov r11, QWORD PTR [rdx+504]
shld r12, r11, cl
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+480], rax
mov QWORD PTR [r9+488], r8
mov QWORD PTR [r9+496], r10
mov QWORD PTR [r9+504], r11
mov QWORD PTR [r9+512], r12
mov r11, QWORD PTR [rdx+440]
mov rax, QWORD PTR [rdx+448]
mov r8, QWORD PTR [rdx+456]
mov r10, QWORD PTR [rdx+464]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+448], rax
mov QWORD PTR [r9+456], r8
mov QWORD PTR [r9+464], r10
mov QWORD PTR [r9+472], r13
mov r13, QWORD PTR [rdx+408]
mov rax, QWORD PTR [rdx+416]
mov r8, QWORD PTR [rdx+424]
mov r10, QWORD PTR [rdx+432]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+416], rax
mov QWORD PTR [r9+424], r8
mov QWORD PTR [r9+432], r10
mov QWORD PTR [r9+440], r11
mov r11, QWORD PTR [rdx+376]
mov rax, QWORD PTR [rdx+384]
mov r8, QWORD PTR [rdx+392]
mov r10, QWORD PTR [rdx+400]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+384], rax
mov QWORD PTR [r9+392], r8
mov QWORD PTR [r9+400], r10
mov QWORD PTR [r9+408], r13
mov r13, QWORD PTR [rdx+344]
mov rax, QWORD PTR [rdx+352]
mov r8, QWORD PTR [rdx+360]
mov r10, QWORD PTR [rdx+368]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+352], rax
mov QWORD PTR [r9+360], r8
mov QWORD PTR [r9+368], r10
mov QWORD PTR [r9+376], r11
mov r11, QWORD PTR [rdx+312]
mov rax, QWORD PTR [rdx+320]
mov r8, QWORD PTR [rdx+328]
mov r10, QWORD PTR [rdx+336]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+320], rax
mov QWORD PTR [r9+328], r8
mov QWORD PTR [r9+336], r10
mov QWORD PTR [r9+344], r13
mov r13, QWORD PTR [rdx+280]
mov rax, QWORD PTR [rdx+288]
mov r8, QWORD PTR [rdx+296]
mov r10, QWORD PTR [rdx+304]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+288], rax
mov QWORD PTR [r9+296], r8
mov QWORD PTR [r9+304], r10
mov QWORD PTR [r9+312], r11
mov r11, QWORD PTR [rdx+248]
mov rax, QWORD PTR [rdx+256]
mov r8, QWORD PTR [rdx+264]
mov r10, QWORD PTR [rdx+272]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+256], rax
mov QWORD PTR [r9+264], r8
mov QWORD PTR [r9+272], r10
mov QWORD PTR [r9+280], r13
mov r13, QWORD PTR [rdx+216]
mov rax, QWORD PTR [rdx+224]
mov r8, QWORD PTR [rdx+232]
mov r10, QWORD PTR [rdx+240]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+224], rax
mov QWORD PTR [r9+232], r8
mov QWORD PTR [r9+240], r10
mov QWORD PTR [r9+248], r11
mov r11, QWORD PTR [rdx+184]
mov rax, QWORD PTR [rdx+192]
mov r8, QWORD PTR [rdx+200]
mov r10, QWORD PTR [rdx+208]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+192], rax
mov QWORD PTR [r9+200], r8
mov QWORD PTR [r9+208], r10
mov QWORD PTR [r9+216], r13
mov r13, QWORD PTR [rdx+152]
mov rax, QWORD PTR [rdx+160]
mov r8, QWORD PTR [rdx+168]
mov r10, QWORD PTR [rdx+176]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+160], rax
mov QWORD PTR [r9+168], r8
mov QWORD PTR [r9+176], r10
mov QWORD PTR [r9+184], r11
mov r11, QWORD PTR [rdx+120]
mov rax, QWORD PTR [rdx+128]
mov r8, QWORD PTR [rdx+136]
mov r10, QWORD PTR [rdx+144]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+128], rax
mov QWORD PTR [r9+136], r8
mov QWORD PTR [r9+144], r10
mov QWORD PTR [r9+152], r13
mov r13, QWORD PTR [rdx+88]
mov rax, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+96], rax
mov QWORD PTR [r9+104], r8
mov QWORD PTR [r9+112], r10
mov QWORD PTR [r9+120], r11
mov r11, QWORD PTR [rdx+56]
mov rax, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r11, cl
mov QWORD PTR [r9+64], rax
mov QWORD PTR [r9+72], r8
mov QWORD PTR [r9+80], r10
mov QWORD PTR [r9+88], r13
mov r13, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
shld r11, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shld rax, r13, cl
mov QWORD PTR [r9+32], rax
mov QWORD PTR [r9+40], r8
mov QWORD PTR [r9+48], r10
mov QWORD PTR [r9+56], r11
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
shld r13, r10, cl
shld r10, r8, cl
shld r8, rax, cl
shl rax, cl
mov QWORD PTR [r9], rax
mov QWORD PTR [r9+8], r8
mov QWORD PTR [r9+16], r10
mov QWORD PTR [r9+24], r13
pop r13
pop r12
ret
sp_4096_lshift_64 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WOLFSSL_SP_NO_256
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_mul_4 PROC
push r12
mov r9, rdx
sub rsp, 32
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+32], r11
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+40], r12
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
add rsp, 32
pop r12
ret
sp_256_mul_4 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_sqr_4 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 32
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+32], r10
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+40], r11
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r9, rax
adc r10, rdx
mov QWORD PTR [rcx+48], r9
mov QWORD PTR [rcx+56], r10
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
add rsp, 32
pop r14
pop r13
pop r12
ret
sp_256_sqr_4 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_add_4 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov QWORD PTR [rcx+24], r10
adc rax, 0
ret
sp_256_add_4 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_sub_4 PROC
push r12
xor rax, rax
mov r9, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
sub r9, QWORD PTR [r8]
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
sbb rax, 0
pop r12
ret
sp_256_sub_4 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_256_cond_copy_4 PROC
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
ret
sp_256_cond_copy_4 ENDP
_text ENDS
; /* Multiply two Montogmery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montogmery form.
; * b Second number to multiply in Montogmery form.
; * m Modulus (prime).
; * mp Montogmery mulitplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_mul_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r10, rdx
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10]
mov r11, rax
mov r12, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10]
xor r13, r13
add r12, rax
adc r13, rdx
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10+8]
xor r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10]
add r13, rax
adc r14, rdx
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10+8]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10+16]
add r13, rax
adc r14, rdx
adc r15, 0
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10]
xor rdi, rdi
add r14, rax
adc r15, rdx
adc rdi, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10+8]
add r14, rax
adc r15, rdx
adc rdi, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10+16]
add r14, rax
adc r15, rdx
adc rdi, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r10+24]
add r14, rax
adc r15, rdx
adc rdi, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10+8]
xor rsi, rsi
add r15, rax
adc rdi, rdx
adc rsi, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10+16]
add r15, rax
adc rdi, rdx
adc rsi, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r10+24]
add r15, rax
adc rdi, rdx
adc rsi, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10+16]
xor rbx, rbx
add rdi, rax
adc rsi, rdx
adc rbx, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r10+24]
add rdi, rax
adc rsi, rdx
adc rbx, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r10+24]
add rsi, rax
adc rbx, rdx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; + (a[0] * 2) << 192
mov rax, r11
mov rdx, r14
add rdx, r11
mov r10, r12
add rdx, r11
mov r8, r13
; a[0]-a[2] << 32
shl r11, 32
shld r13, r10, 32
shld r12, rax, 32
; - a[0] << 32 << 192
sub rdx, r11
; + a[0]-a[2] << 32 << 64
add r10, r11
adc r8, r12
adc rdx, r13
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
; a += mu << 256
xor r11, r11
add r15, rax
adc rdi, r10
adc rsi, r8
adc rbx, rdx
sbb r11, 0
; a += mu << 192
add r14, rax
adc r15, r10
adc rdi, r8
adc rsi, rdx
adc rbx, 0
sbb r11, 0
; mu <<= 32
mov r9, rdx
shld rdx, r8, 32
shld r8, r10, 32
shld r10, rax, 32
shr r9, 32
shl rax, 32
; a += (mu << 32) << 64
add r14, r8
adc r15, rdx
adc rdi, r9
adc rsi, 0
adc rbx, 0
sbb r11, 0
; a -= (mu << 32) << 192
sub r14, rax
sbb r15, r10
sbb rdi, r8
sbb rsi, rdx
sbb rbx, r9
adc r11, 0
mov rax, 4294967295
mov r10, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
and rax, r11
; m[2] = 0 & mask = 0
and r10, r11
sub r15, r11
sbb rdi, rax
sbb rsi, 0
sbb rbx, r10
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
mov QWORD PTR [rcx+16], rsi
mov QWORD PTR [rcx+24], rbx
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_mul_4 ENDP
_text ENDS
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montogmery form.
; * m Modulus (prime).
; * mp Montogmery mulitplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_sqr_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r8+8]
mov r11, rax
mov r12, rdx
; A[0] * A[2]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r8+16]
xor r13, r13
add r12, rax
adc r13, rdx
; A[0] * A[3]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r8+24]
xor r14, r14
add r13, rax
adc r14, rdx
; A[1] * A[2]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8+16]
xor r15, r15
add r13, rax
adc r14, rdx
adc r15, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8+24]
add r14, rax
adc r15, rdx
; A[2] * A[3]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+24]
xor rdi, rdi
add r15, rax
adc rdi, rdx
; Double
xor rsi, rsi
add r11, r11
adc r12, r12
adc r13, r13
adc r14, r14
adc r15, r15
adc rdi, rdi
adc rsi, 0
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
mov rax, rax
mov rdx, rdx
mov r10, rax
mov rbx, rdx
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
mov rax, rax
mov rdx, rdx
add r11, rbx
adc r12, rax
adc rdx, 0
mov rbx, rdx
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
mov rax, rax
mov rdx, rdx
add r13, rbx
adc r14, rax
adc rdx, 0
mov rbx, rdx
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
mov rax, rax
mov rdx, rdx
add r15, rbx
adc rdi, rax
adc rsi, rdx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; + (a[0] * 2) << 192
mov rax, r10
mov rdx, r13
add rdx, r10
mov r8, r11
add rdx, r10
mov rbx, r12
; a[0]-a[2] << 32
shl r10, 32
shld r12, r8, 32
shld r11, rax, 32
; - a[0] << 32 << 192
sub rdx, r10
; + a[0]-a[2] << 32 << 64
add r8, r10
adc rbx, r11
adc rdx, r12
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
; a += mu << 256
xor r10, r10
add r14, rax
adc r15, r8
adc rdi, rbx
adc rsi, rdx
sbb r10, 0
; a += mu << 192
add r13, rax
adc r14, r8
adc r15, rbx
adc rdi, rdx
adc rsi, 0
sbb r10, 0
; mu <<= 32
mov r9, rdx
shld rdx, rbx, 32
shld rbx, r8, 32
shld r8, rax, 32
shr r9, 32
shl rax, 32
; a += (mu << 32) << 64
add r13, rbx
adc r14, rdx
adc r15, r9
adc rdi, 0
adc rsi, 0
sbb r10, 0
; a -= (mu << 32) << 192
sub r13, rax
sbb r14, r8
sbb r15, rbx
sbb rdi, rdx
sbb rsi, r9
adc r10, 0
mov rax, 4294967295
mov r8, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
and rax, r10
; m[2] = 0 & mask = 0
and r8, r10
sub r14, r10
sbb r15, rax
sbb rdi, 0
sbb rsi, r8
mov QWORD PTR [rcx], r14
mov QWORD PTR [rcx+8], r15
mov QWORD PTR [rcx+16], rdi
mov QWORD PTR [rcx+24], rsi
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_sqr_4 ENDP
_text ENDS
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_256_cmp_4 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_256_cmp_4 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_256_cond_sub_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov rax, 0
mov r14, QWORD PTR [r8]
mov r15, QWORD PTR [r8+8]
mov rdi, QWORD PTR [r8+16]
mov rsi, QWORD PTR [r8+24]
and r14, r9
and r15, r9
and rdi, r9
and rsi, r9
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
mov r12, QWORD PTR [rdx+16]
mov r13, QWORD PTR [rdx+24]
sub r10, r14
sbb r11, r15
sbb r12, rdi
sbb r13, rsi
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
sbb rax, 0
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_cond_sub_4 ENDP
_text ENDS
; /* Reduce the number back to 256 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_256_mont_reduce_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
; i = 0
xor rdi, rdi
mov r10, 4
mov r15, rcx
L_mont_loop_4:
; mu = a[i] * mp
mov r14, QWORD PTR [r15]
imul r14, r8
; a[i+0] += m[0] * mu
mov rax, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
mul r14
mov rsi, QWORD PTR [r15]
add rsi, rax
mov r11, rdx
mov QWORD PTR [r15], rsi
adc r11, 0
; a[i+1] += m[1] * mu
mov rax, r12
mul r14
mov r12, QWORD PTR [r9+16]
mov rsi, QWORD PTR [r15+8]
add rax, r11
mov r13, rdx
adc r13, 0
add rsi, rax
mov QWORD PTR [r15+8], rsi
adc r13, 0
; a[i+2] += m[2] * mu
mov rax, r12
mul r14
mov r12, QWORD PTR [r9+24]
mov rsi, QWORD PTR [r15+16]
add rax, r13
mov r11, rdx
adc r11, 0
add rsi, rax
mov QWORD PTR [r15+16], rsi
adc r11, 0
; a[i+3] += m[3] * mu
mov rax, r12
mul r14
mov rsi, QWORD PTR [r15+24]
add rax, r11
adc rdx, rdi
mov rdi, 0
adc rdi, 0
add rsi, rax
mov QWORD PTR [r15+24], rsi
adc QWORD PTR [r15+32], rdx
adc rdi, 0
; i += 1
add r15, 8
dec r10
jnz L_mont_loop_4
xor rax, rax
mov rdx, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
mov rsi, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
sub rax, rdi
mov r12, QWORD PTR [r9]
mov r13, QWORD PTR [r9+8]
mov r14, QWORD PTR [r9+16]
mov r15, QWORD PTR [r9+24]
and r12, rax
and r13, rax
and r14, rax
and r15, rax
sub rdx, r12
sbb r10, r13
sbb rsi, r14
sbb r11, r15
mov QWORD PTR [rcx], rdx
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], rsi
mov QWORD PTR [rcx+24], r11
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_reduce_4 ENDP
_text ENDS
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montogmery form.
; * b Second number to add in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_add_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, 4294967295
mov r13, 18446744069414584321
add rax, QWORD PTR [r8]
adc r9, QWORD PTR [r8+8]
adc r10, QWORD PTR [r8+16]
mov rdx, 0
adc r11, QWORD PTR [r8+24]
sbb rdx, 0
and r12, rdx
and r13, rdx
sub rax, rdx
sbb r9, r12
sbb r10, 0
sbb r11, r13
adc rdx, 0
and r12, rdx
and r13, rdx
sub rax, rdx
sbb r9, r12
mov QWORD PTR [rcx], rax
sbb r10, 0
mov QWORD PTR [rcx+8], r9
sbb r11, r13
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
pop r13
pop r12
ret
sp_256_mont_add_4 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of doubling.
; * a Number to double in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_dbl_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, 4294967295
mov r12, 18446744069414584321
add rax, rax
adc r8, r8
adc r9, r9
mov r13, 0
adc r10, r10
sbb r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
mov QWORD PTR [rcx], rax
sbb r9, 0
mov QWORD PTR [rcx+8], r8
sbb r10, r12
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_dbl_4 ENDP
_text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of Tripling.
; * a Number to triple in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_tpl_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, 4294967295
mov r12, 18446744069414584321
add rax, rax
adc r8, r8
adc r9, r9
mov r13, 0
adc r10, r10
sbb r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
mov r11, 4294967295
mov r12, 18446744069414584321
add rax, QWORD PTR [rdx]
adc r8, QWORD PTR [rdx+8]
adc r9, QWORD PTR [rdx+16]
mov r13, 0
adc r10, QWORD PTR [rdx+24]
sbb r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
mov QWORD PTR [rcx], rax
sbb r9, 0
mov QWORD PTR [rcx+8], r8
sbb r10, r12
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_tpl_4 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of subtration.
; * a Number to subtract from in Montogmery form.
; * b Number to subtract with in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_sub_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, 4294967295
mov r13, 18446744069414584321
sub rax, QWORD PTR [r8]
sbb r9, QWORD PTR [r8+8]
sbb r10, QWORD PTR [r8+16]
mov rdx, 0
sbb r11, QWORD PTR [r8+24]
sbb rdx, 0
and r12, rdx
and r13, rdx
add rax, rdx
adc r9, r12
adc r10, 0
adc r11, r13
adc rdx, 0
and r12, rdx
and r13, rdx
add rax, rdx
adc r9, r12
mov QWORD PTR [rcx], rax
adc r10, 0
mov QWORD PTR [rcx+8], r9
adc r11, r13
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
pop r13
pop r12
ret
sp_256_mont_sub_4 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_div2_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, 4294967295
mov r12, 18446744069414584321
mov r13, rax
and r13, 1
neg r13
and r11, r13
and r12, r13
add rax, r13
adc r8, r11
adc r9, 0
adc r10, r12
mov r13, 0
adc r13, 0
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r13, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_div2_4 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_point_33_4 PROC
mov rax, 1
movd xmm13, r8d
add rdx, 200
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_256_get_point_33_4_start:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, [rdx]
movdqu xmm7, [rdx+16]
movdqu xmm8, [rdx+64]
movdqu xmm9, [rdx+80]
movdqu xmm10, [rdx+128]
movdqu xmm11, [rdx+144]
add rdx, 200
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_256_get_point_33_4_start
movdqu [rcx], xmm0
movdqu [rcx+16], xmm1
movdqu [rcx+64], xmm2
movdqu [rcx+80], xmm3
movdqu [rcx+128], xmm4
movdqu [rcx+144], xmm5
ret
sp_256_get_point_33_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_point_33_avx2_4 PROC
mov rax, 1
movd xmm7, r8d
add rdx, 200
movd xmm9, eax
mov rax, 32
vpxor ymm8, ymm8, ymm8
vpermd ymm7, ymm8, ymm7
vpermd ymm9, ymm8, ymm9
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vpxor ymm2, ymm2, ymm2
vmovdqa ymm8, ymm9
L_256_get_point_33_avx2_4_start:
vpcmpeqd ymm6, ymm8, ymm7
vpaddd ymm8, ymm8, ymm9
vmovupd ymm3, [rdx]
vmovupd ymm4, [rdx+64]
vmovupd ymm5, [rdx+128]
add rdx, 200
vpand ymm3, ymm3, ymm6
vpand ymm4, ymm4, ymm6
vpand ymm5, ymm5, ymm6
vpor ymm0, ymm0, ymm3
vpor ymm1, ymm1, ymm4
vpor ymm2, ymm2, ymm5
dec rax
jnz L_256_get_point_33_avx2_4_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+64], ymm1
vmovupd YMMWORD PTR [rcx+128], ymm2
ret
sp_256_get_point_33_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply two Montogmery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montogmery form.
; * b Second number to multiply in Montogmery form.
; * m Modulus (prime).
; * mp Montogmery mulitplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_mul_avx2_4 PROC
push rbx
push r12
push r13
push r14
push r15
push rbp
push rdi
push rsi
mov rbp, r8
mov rdi, rdx
; A[0] * B[0]
mov rdx, QWORD PTR [rbp]
mulx r9, r8, QWORD PTR [rdi]
; A[2] * B[0]
mulx r11, r10, QWORD PTR [rdi+16]
; A[1] * B[0]
mulx rsi, rax, QWORD PTR [rdi+8]
xor r15, r15
adcx r9, rax
; A[1] * B[3]
mov rdx, QWORD PTR [rbp+24]
mulx r13, r12, QWORD PTR [rdi+8]
adcx r10, rsi
; A[0] * B[1]
mov rdx, QWORD PTR [rbp+8]
mulx rsi, rax, QWORD PTR [rdi]
adox r9, rax
; A[2] * B[1]
mulx r14, rax, QWORD PTR [rdi+16]
adox r10, rsi
adcx r11, rax
; A[1] * B[2]
mov rdx, QWORD PTR [rbp+16]
mulx rsi, rax, QWORD PTR [rdi+8]
adcx r12, r14
adox r11, rax
adcx r13, r15
adox r12, rsi
; A[0] * B[2]
mulx rsi, rax, QWORD PTR [rdi]
adox r13, r15
xor r14, r14
adcx r10, rax
; A[1] * B[1]
mov rdx, QWORD PTR [rbp+8]
mulx rax, rdx, QWORD PTR [rdi+8]
adcx r11, rsi
adox r10, rdx
; A[3] * B[1]
mov rdx, QWORD PTR [rbp+8]
adox r11, rax
mulx rsi, rax, QWORD PTR [rdi+24]
adcx r12, rax
; A[2] * B[2]
mov rdx, QWORD PTR [rbp+16]
mulx rax, rdx, QWORD PTR [rdi+16]
adcx r13, rsi
adox r12, rdx
; A[3] * B[3]
mov rdx, QWORD PTR [rbp+24]
adox r13, rax
mulx rsi, rax, QWORD PTR [rdi+24]
adox r14, r15
adcx r14, rax
; A[0] * B[3]
mulx rax, rdx, QWORD PTR [rdi]
adcx r15, rsi
xor rsi, rsi
adcx r11, rdx
; A[3] * B[0]
mov rdx, QWORD PTR [rdi+24]
adcx r12, rax
mulx rax, rbx, QWORD PTR [rbp]
adox r11, rbx
adox r12, rax
; A[3] * B[2]
mulx rax, rdx, QWORD PTR [rbp+16]
adcx r13, rdx
; A[2] * B[3]
mov rdx, QWORD PTR [rbp+24]
adcx r14, rax
mulx rdx, rax, QWORD PTR [rdi+16]
adcx r15, rsi
adox r13, rax
adox r14, rdx
adox r15, rsi
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; + (a[0] * 2) << 192
mov rax, r8
mov rdx, r11
add rdx, r8
mov rdi, r9
add rdx, r8
mov rbp, r10
; a[0]-a[2] << 32
shl r8, 32
shld r10, rdi, 32
shld r9, rax, 32
; - a[0] << 32 << 192
sub rdx, r8
; + a[0]-a[2] << 32 << 64
add rdi, r8
adc rbp, r9
adc rdx, r10
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
; a += mu << 256
xor r8, r8
add r12, rax
adc r13, rdi
adc r14, rbp
adc r15, rdx
sbb r8, 0
; a += mu << 192
add r11, rax
adc r12, rdi
adc r13, rbp
adc r14, rdx
adc r15, 0
sbb r8, 0
; mu <<= 32
mov rsi, rdx
shld rdx, rbp, 32
shld rbp, rdi, 32
shld rdi, rax, 32
shr rsi, 32
shl rax, 32
; a += (mu << 32) << 64
add r11, rbp
adc r12, rdx
adc r13, rsi
adc r14, 0
adc r15, 0
sbb r8, 0
; a -= (mu << 32) << 192
sub r11, rax
sbb r12, rdi
sbb r13, rbp
sbb r14, rdx
sbb r15, rsi
adc r8, 0
mov rax, 4294967295
mov rdi, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
and rax, r8
; m[2] = 0 & mask = 0
and rdi, r8
sub r12, r8
sbb r13, rax
sbb r14, 0
sbb r15, rdi
mov QWORD PTR [rcx], r12
mov QWORD PTR [rcx+8], r13
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rsi
pop rdi
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
ret
sp_256_mont_mul_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montogmery form.
; * m Modulus (prime).
; * mp Montogmery mulitplier.
; */
_text SEGMENT READONLY PARA
sp_256_mont_sqr_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
; A[0] * A[1]
mov rdx, QWORD PTR [rax]
mov r15, QWORD PTR [rax+16]
mulx r10, r9, QWORD PTR [rax+8]
; A[0] * A[3]
mulx r12, r11, QWORD PTR [rax+24]
; A[2] * A[1]
mov rdx, r15
mulx rbx, rsi, QWORD PTR [rax+8]
; A[2] * A[3]
mulx r14, r13, QWORD PTR [rax+24]
xor r15, r15
adox r11, rsi
adox r12, rbx
; A[2] * A[0]
mulx rbx, rsi, QWORD PTR [rax]
; A[1] * A[3]
mov rdx, QWORD PTR [rax+8]
adox r13, r15
mulx r8, rdi, QWORD PTR [rax+24]
adcx r10, rsi
adox r14, r15
adcx r11, rbx
adcx r12, rdi
adcx r13, r8
adcx r14, r15
; Double with Carry Flag
xor r15, r15
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx rdi, r8, rdx
adcx r9, r9
adcx r10, r10
adox r9, rdi
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx rbx, rsi, rdx
adcx r11, r11
adox r10, rsi
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx rsi, rdi, rdx
adcx r12, r12
adox r11, rbx
adcx r13, r13
adox r12, rdi
adcx r14, r14
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx rbx, rdi, rdx
adox r13, rsi
adcx r15, r15
adox r14, rdi
adox r15, rbx
; Start Reduction
; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
; - a[0] << 32 << 192
; + (a[0] * 2) << 192
mov rdi, r8
mov rdx, r11
add rdx, r8
mov rax, r9
add rdx, r8
mov rsi, r10
; a[0]-a[2] << 32
shl r8, 32
shld r10, rax, 32
shld r9, rdi, 32
; - a[0] << 32 << 192
sub rdx, r8
; + a[0]-a[2] << 32 << 64
add rax, r8
adc rsi, r9
adc rdx, r10
; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
; a += mu << 256
xor r8, r8
add r12, rdi
adc r13, rax
adc r14, rsi
adc r15, rdx
sbb r8, 0
; a += mu << 192
add r11, rdi
adc r12, rax
adc r13, rsi
adc r14, rdx
adc r15, 0
sbb r8, 0
; mu <<= 32
mov rbx, rdx
shld rdx, rsi, 32
shld rsi, rax, 32
shld rax, rdi, 32
shr rbx, 32
shl rdi, 32
; a += (mu << 32) << 64
add r11, rsi
adc r12, rdx
adc r13, rbx
adc r14, 0
adc r15, 0
sbb r8, 0
; a -= (mu << 32) << 192
sub r11, rdi
sbb r12, rax
sbb r13, rsi
sbb r14, rdx
sbb r15, rbx
adc r8, 0
mov rdi, 4294967295
mov rax, 18446744069414584321
; mask m and sub from result if overflow
; m[0] = -1 & mask = mask
and rdi, r8
; m[2] = 0 & mask = 0
and rax, r8
sub r12, r8
sbb r13, rdi
sbb r14, 0
sbb r15, rax
mov QWORD PTR [rcx], r12
mov QWORD PTR [rcx+8], r13
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_sqr_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_256_cond_sub_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov rax, 0
mov r14, QWORD PTR [r8]
mov r15, QWORD PTR [r8+8]
mov rdi, QWORD PTR [r8+16]
mov rsi, QWORD PTR [r8+24]
and r14, r9
and r15, r9
and rdi, r9
and rsi, r9
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
mov r12, QWORD PTR [rdx+16]
mov r13, QWORD PTR [rdx+24]
sub r10, r14
sbb r11, r15
sbb r12, rdi
sbb r13, rsi
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
sbb rax, 0
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_cond_sub_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 256 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_256_mont_reduce_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rcx
mov r10, rdx
mov r11, r8
mov r14, QWORD PTR [rax]
mov r15, QWORD PTR [rax+8]
mov rdi, QWORD PTR [rax+16]
mov rsi, QWORD PTR [rax+24]
xor r13, r13
xor r12, r12
; a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp)
mov rbx, QWORD PTR [rax+32]
; mu = a[0] * mp
mov rdx, r14
mulx rcx, rdx, r11
; a[0] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx r14, r8
; a[1] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox r15, r9
adcx r15, r8
; a[2] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox rdi, rcx
adcx rdi, r8
; a[3] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox rsi, r9
adcx rsi, r8
; a[4] += carry
adox rbx, rcx
adcx rbx, r12
; carry
adox r13, r12
adcx r13, r12
; a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp)
mov r14, QWORD PTR [rax+40]
; mu = a[1] * mp
mov rdx, r15
mulx rcx, rdx, r11
; a[1] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx r15, r8
; a[2] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox rdi, r9
adcx rdi, r8
; a[3] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox rsi, rcx
adcx rsi, r8
; a[4] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox rbx, r9
adcx rbx, r8
; a[5] += carry
adox r14, rcx
adcx r14, r13
mov r13, r12
; carry
adox r13, r12
adcx r13, r12
; a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp)
mov r15, QWORD PTR [rax+48]
; mu = a[2] * mp
mov rdx, rdi
mulx rcx, rdx, r11
; a[2] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx rdi, r8
; a[3] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox rsi, r9
adcx rsi, r8
; a[4] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox rbx, rcx
adcx rbx, r8
; a[5] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox r14, r9
adcx r14, r8
; a[6] += carry
adox r15, rcx
adcx r15, r13
mov r13, r12
; carry
adox r13, r12
adcx r13, r12
; a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp)
mov rdi, QWORD PTR [rax+56]
; mu = a[3] * mp
mov rdx, rsi
mulx rcx, rdx, r11
; a[3] += m[0] * mu
mulx r9, r8, QWORD PTR [r10]
adcx rsi, r8
; a[4] += m[1] * mu
mulx rcx, r8, QWORD PTR [r10+8]
adox rbx, r9
adcx rbx, r8
; a[5] += m[2] * mu
mulx r9, r8, QWORD PTR [r10+16]
adox r14, rcx
adcx r14, r8
; a[6] += m[3] * mu
mulx rcx, r8, QWORD PTR [r10+24]
adox r15, r9
adcx r15, r8
; a[7] += carry
adox rdi, rcx
adcx rdi, r13
mov r13, r12
; carry
adox r13, r12
adcx r13, r12
; Subtract mod if carry
neg r13
mov r8, 17562291160714782033
mov r9, 13611842547513532036
mov rdx, 18446744069414584320
and r8, r13
and r9, r13
and rdx, r13
sub rbx, r8
sbb r14, r9
sbb r15, r13
sbb rdi, rdx
mov QWORD PTR [rax], rbx
mov QWORD PTR [rax+8], r14
mov QWORD PTR [rax+16], r15
mov QWORD PTR [rax+24], rdi
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mont_reduce_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_div2_avx2_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, 4294967295
mov r12, 18446744069414584321
mov r13, rax
and r13, 1
neg r13
and r11, r13
and r12, r13
add rax, r13
adc r8, r11
adc r9, 0
adc r10, r12
mov r13, 0
adc r13, 0
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r13, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_div2_avx2_4 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_64_4 PROC
mov rax, 1
movd xmm9, r8d
add rdx, 64
movd xmm11, eax
mov rax, 63
pshufd xmm11, xmm11, 0
pshufd xmm9, xmm9, 0
pxor xmm10, xmm10
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
movdqa xmm10, xmm11
L_256_get_entry_64_4_start:
movdqa xmm8, xmm10
paddd xmm10, xmm11
pcmpeqd xmm8, xmm9
movdqu xmm4, [rdx]
movdqu xmm5, [rdx+16]
movdqu xmm6, [rdx+32]
movdqu xmm7, [rdx+48]
add rdx, 64
pand xmm4, xmm8
pand xmm5, xmm8
pand xmm6, xmm8
pand xmm7, xmm8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
dec rax
jnz L_256_get_entry_64_4_start
movdqu [rcx], xmm0
movdqu [rcx+16], xmm1
movdqu [rcx+64], xmm2
movdqu [rcx+80], xmm3
ret
sp_256_get_entry_64_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_64_avx2_4 PROC
mov rax, 1
movd xmm5, r8d
add rdx, 64
movd xmm7, eax
mov rax, 64
vpxor ymm6, ymm6, ymm6
vpermd ymm5, ymm6, ymm5
vpermd ymm7, ymm6, ymm7
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vmovdqa ymm6, ymm7
L_256_get_entry_64_avx2_4_start:
vpcmpeqd ymm4, ymm6, ymm5
vpaddd ymm6, ymm6, ymm7
vmovupd ymm2, [rdx]
vmovupd ymm3, [rdx+32]
add rdx, 64
vpand ymm2, ymm2, ymm4
vpand ymm3, ymm3, ymm4
vpor ymm0, ymm0, ymm2
vpor ymm1, ymm1, ymm3
dec rax
jnz L_256_get_entry_64_avx2_4_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+64], ymm1
ret
sp_256_get_entry_64_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_65_4 PROC
mov rax, 1
movd xmm9, r8d
add rdx, 64
movd xmm11, eax
mov rax, 64
pshufd xmm11, xmm11, 0
pshufd xmm9, xmm9, 0
pxor xmm10, xmm10
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
movdqa xmm10, xmm11
L_256_get_entry_65_4_start:
movdqa xmm8, xmm10
paddd xmm10, xmm11
pcmpeqd xmm8, xmm9
movdqu xmm4, [rdx]
movdqu xmm5, [rdx+16]
movdqu xmm6, [rdx+32]
movdqu xmm7, [rdx+48]
add rdx, 64
pand xmm4, xmm8
pand xmm5, xmm8
pand xmm6, xmm8
pand xmm7, xmm8
por xmm0, xmm4
por xmm1, xmm5
por xmm2, xmm6
por xmm3, xmm7
dec rax
jnz L_256_get_entry_65_4_start
movdqu [rcx], xmm0
movdqu [rcx+16], xmm1
movdqu [rcx+64], xmm2
movdqu [rcx+80], xmm3
ret
sp_256_get_entry_65_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_256_get_entry_65_avx2_4 PROC
mov rax, 1
movd xmm5, r8d
add rdx, 64
movd xmm7, eax
mov rax, 65
vpxor ymm6, ymm6, ymm6
vpermd ymm5, ymm6, ymm5
vpermd ymm7, ymm6, ymm7
vpxor ymm0, ymm0, ymm0
vpxor ymm1, ymm1, ymm1
vmovdqa ymm6, ymm7
L_256_get_entry_65_avx2_4_start:
vpcmpeqd ymm4, ymm6, ymm5
vpaddd ymm6, ymm6, ymm7
vmovupd ymm2, [rdx]
vmovupd ymm3, [rdx+32]
add rdx, 64
vpand ymm2, ymm2, ymm4
vpand ymm3, ymm3, ymm4
vpor ymm0, ymm0, ymm2
vpor ymm1, ymm1, ymm3
dec rax
jnz L_256_get_entry_65_avx2_4_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovupd YMMWORD PTR [rcx+64], ymm1
ret
sp_256_get_entry_65_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
; /* Add 1 to a. (a = a + 1)
; *
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_add_one_4 PROC
add QWORD PTR [rcx], 1
adc QWORD PTR [rcx+8], 0
adc QWORD PTR [rcx+16], 0
adc QWORD PTR [rcx+24], 0
ret
sp_256_add_one_4 ENDP
_text ENDS
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_256_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 32
xor r13, r13
jmp L_256_from_bin_bswap_64_end
L_256_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_256_from_bin_bswap_64_end:
cmp r9, 63
jg L_256_from_bin_bswap_64_start
jmp L_256_from_bin_bswap_8_end
L_256_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_256_from_bin_bswap_8_end:
cmp r9, 7
jg L_256_from_bin_bswap_8_start
cmp r9, r13
je L_256_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_256_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_256_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_256_from_bin_bswap_hi_end:
cmp rcx, r12
je L_256_from_bin_bswap_zero_end
L_256_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_256_from_bin_bswap_zero_start
L_256_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_256_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_256_from_bin_movbe PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 32
xor r13, r13
jmp L_256_from_bin_movbe_64_end
L_256_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_256_from_bin_movbe_64_end:
cmp r9, 63
jg L_256_from_bin_movbe_64_start
jmp L_256_from_bin_movbe_8_end
L_256_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_256_from_bin_movbe_8_end:
cmp r9, 7
jg L_256_from_bin_movbe_8_start
cmp r9, r13
je L_256_from_bin_movbe_hi_end
mov r10, r13
mov rax, r13
L_256_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_256_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_256_from_bin_movbe_hi_end:
cmp rcx, r12
je L_256_from_bin_movbe_zero_end
L_256_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_256_from_bin_movbe_zero_start
L_256_from_bin_movbe_zero_end:
pop r13
pop r12
ret
sp_256_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 32
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_256_to_bin_bswap_4 PROC
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
ret
sp_256_to_bin_bswap_4 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 32
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_256_to_bin_movbe_4 PROC
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
ret
sp_256_to_bin_movbe_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_256_mul_avx2_4 PROC
push rbx
push r12
push r13
push r14
push r15
push rbp
push rdi
push rsi
mov rbp, r8
mov rdi, rdx
; A[0] * B[0]
mov rdx, QWORD PTR [rbp]
mulx r9, r8, QWORD PTR [rdi]
; A[2] * B[0]
mulx r11, r10, QWORD PTR [rdi+16]
; A[1] * B[0]
mulx rsi, rax, QWORD PTR [rdi+8]
xor r15, r15
adcx r9, rax
; A[1] * B[3]
mov rdx, QWORD PTR [rbp+24]
mulx r13, r12, QWORD PTR [rdi+8]
adcx r10, rsi
; A[0] * B[1]
mov rdx, QWORD PTR [rbp+8]
mulx rsi, rax, QWORD PTR [rdi]
adox r9, rax
; A[2] * B[1]
mulx r14, rax, QWORD PTR [rdi+16]
adox r10, rsi
adcx r11, rax
; A[1] * B[2]
mov rdx, QWORD PTR [rbp+16]
mulx rsi, rax, QWORD PTR [rdi+8]
adcx r12, r14
adox r11, rax
adcx r13, r15
adox r12, rsi
; A[0] * B[2]
mulx rsi, rax, QWORD PTR [rdi]
adox r13, r15
xor r14, r14
adcx r10, rax
; A[1] * B[1]
mov rdx, QWORD PTR [rbp+8]
mulx rax, rdx, QWORD PTR [rdi+8]
adcx r11, rsi
adox r10, rdx
; A[3] * B[1]
mov rdx, QWORD PTR [rbp+8]
adox r11, rax
mulx rsi, rax, QWORD PTR [rdi+24]
adcx r12, rax
; A[2] * B[2]
mov rdx, QWORD PTR [rbp+16]
mulx rax, rdx, QWORD PTR [rdi+16]
adcx r13, rsi
adox r12, rdx
; A[3] * B[3]
mov rdx, QWORD PTR [rbp+24]
adox r13, rax
mulx rsi, rax, QWORD PTR [rdi+24]
adox r14, r15
adcx r14, rax
; A[0] * B[3]
mulx rax, rdx, QWORD PTR [rdi]
adcx r15, rsi
xor rsi, rsi
adcx r11, rdx
; A[3] * B[0]
mov rdx, QWORD PTR [rdi+24]
adcx r12, rax
mulx rax, rbx, QWORD PTR [rbp]
adox r11, rbx
adox r12, rax
; A[3] * B[2]
mulx rax, rdx, QWORD PTR [rbp+16]
adcx r13, rdx
; A[2] * B[3]
mov rdx, QWORD PTR [rbp+24]
adcx r14, rax
mulx rdx, rax, QWORD PTR [rdi+16]
adcx r15, rsi
adox r13, rax
adox r14, rdx
adox r15, rsi
mov QWORD PTR [rcx], r8
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
mov QWORD PTR [rcx+48], r14
mov QWORD PTR [rcx+56], r15
pop rsi
pop rdi
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
ret
sp_256_mul_avx2_4 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_256_sub_in_place_4 PROC
xor rax, rax
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
sub QWORD PTR [rcx], r8
sbb QWORD PTR [rcx+8], r9
sbb QWORD PTR [rcx+16], r10
sbb QWORD PTR [rcx+24], r11
sbb rax, 0
ret
sp_256_sub_in_place_4 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_256_mul_d_4 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
pop r12
ret
sp_256_mul_d_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_256_mul_d_avx2_4 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r11
pop r13
pop r12
ret
sp_256_mul_d_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_256_word_asm_4 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_256_word_asm_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply two Montogmery form numbers mod the modulus (prime).
; * (r = a * b mod m)
; *
; * r Result of multiplication.
; * a First number to multiply in Montogmery form.
; * b Second number to multiply in Montogmery form.
; */
_text SEGMENT READONLY PARA
sp_256_mont_mul_order_avx2_4 PROC
push rbx
push r12
push r13
push r14
push r15
push rbp
push rdi
push rsi
mov rbp, r8
mov rdi, rdx
; A[0] * B[0]
mov rdx, QWORD PTR [rbp]
mulx r9, r8, QWORD PTR [rdi]
; A[2] * B[0]
mulx r11, r10, QWORD PTR [rdi+16]
; A[1] * B[0]
mulx rsi, rax, QWORD PTR [rdi+8]
xor r15, r15
adcx r9, rax
; A[1] * B[3]
mov rdx, QWORD PTR [rbp+24]
mulx r13, r12, QWORD PTR [rdi+8]
adcx r10, rsi
; A[0] * B[1]
mov rdx, QWORD PTR [rbp+8]
mulx rsi, rax, QWORD PTR [rdi]
adox r9, rax
; A[2] * B[1]
mulx r14, rax, QWORD PTR [rdi+16]
adox r10, rsi
adcx r11, rax
; A[1] * B[2]
mov rdx, QWORD PTR [rbp+16]
mulx rsi, rax, QWORD PTR [rdi+8]
adcx r12, r14
adox r11, rax
adcx r13, r15
adox r12, rsi
; A[0] * B[2]
mulx rsi, rax, QWORD PTR [rdi]
adox r13, r15
xor r14, r14
adcx r10, rax
; A[1] * B[1]
mov rdx, QWORD PTR [rbp+8]
mulx rax, rdx, QWORD PTR [rdi+8]
adcx r11, rsi
adox r10, rdx
; A[3] * B[1]
mov rdx, QWORD PTR [rbp+8]
adox r11, rax
mulx rsi, rax, QWORD PTR [rdi+24]
adcx r12, rax
; A[2] * B[2]
mov rdx, QWORD PTR [rbp+16]
mulx rax, rdx, QWORD PTR [rdi+16]
adcx r13, rsi
adox r12, rdx
; A[3] * B[3]
mov rdx, QWORD PTR [rbp+24]
adox r13, rax
mulx rsi, rax, QWORD PTR [rdi+24]
adox r14, r15
adcx r14, rax
; A[0] * B[3]
mulx rax, rdx, QWORD PTR [rdi]
adcx r15, rsi
xor rsi, rsi
adcx r11, rdx
; A[3] * B[0]
mov rdx, QWORD PTR [rdi+24]
adcx r12, rax
mulx rax, rbx, QWORD PTR [rbp]
adox r11, rbx
adox r12, rax
; A[3] * B[2]
mulx rax, rdx, QWORD PTR [rbp+16]
adcx r13, rdx
; A[2] * B[3]
mov rdx, QWORD PTR [rbp+24]
adcx r14, rax
mulx rdx, rax, QWORD PTR [rdi+16]
adcx r15, rsi
adox r13, rax
adox r14, rdx
adox r15, rsi
; Start Reduction
mov rbx, 14758798090332847183
; A[0]
mov rdx, rbx
imul rdx, r8
mov rax, 17562291160714782033
xor rbp, rbp
mulx rdi, rsi, rax
mov rax, 13611842547513532036
adcx r8, rsi
adox r9, rdi
mulx rdi, rsi, rax
mov rax, 18446744073709551615
adcx r9, rsi
adox r10, rdi
mulx rdi, rsi, rax
mov rax, 18446744069414584320
adcx r10, rsi
adox r11, rdi
mulx rdi, rsi, rax
adcx r11, rsi
adox r12, rdi
adcx r12, rbp
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[1]
mov rdx, rbx
imul rdx, r9
mov rax, 17562291160714782033
xor rbp, rbp
mulx rdi, rsi, rax
mov rax, 13611842547513532036
adcx r9, rsi
adox r10, rdi
mulx rdi, rsi, rax
mov rax, 18446744073709551615
adcx r10, rsi
adox r11, rdi
mulx rdi, rsi, rax
mov rax, 18446744069414584320
adcx r11, rsi
adox r12, rdi
mulx rdi, rsi, rax
adcx r12, rsi
adox r13, rdi
adcx r13, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[2]
mov rdx, rbx
imul rdx, r10
mov rax, 17562291160714782033
xor rbp, rbp
mulx rdi, rsi, rax
mov rax, 13611842547513532036
adcx r10, rsi
adox r11, rdi
mulx rdi, rsi, rax
mov rax, 18446744073709551615
adcx r11, rsi
adox r12, rdi
mulx rdi, rsi, rax
mov rax, 18446744069414584320
adcx r12, rsi
adox r13, rdi
mulx rdi, rsi, rax
adcx r13, rsi
adox r14, rdi
adcx r14, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[3]
mov rdx, rbx
imul rdx, r11
mov rax, 17562291160714782033
xor rbp, rbp
mulx rdi, rsi, rax
mov rax, 13611842547513532036
adcx r11, rsi
adox r12, rdi
mulx rdi, rsi, rax
mov rax, 18446744073709551615
adcx r12, rsi
adox r13, rdi
mulx rdi, rsi, rax
mov rax, 18446744069414584320
adcx r13, rsi
adox r14, rdi
mulx rdi, rsi, rax
adcx r14, rsi
adox r15, rdi
adcx r15, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
neg r8
mov rax, 17562291160714782033
mov rbx, 13611842547513532036
and rax, r8
mov rbp, 18446744069414584320
and rbx, r8
and rbp, r8
sub r12, rax
sbb r13, rbx
mov QWORD PTR [rcx], r12
sbb r14, r8
mov QWORD PTR [rcx+8], r13
sbb r15, rbp
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rsi
pop rdi
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
ret
sp_256_mont_mul_order_avx2_4 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
; *
; * r Result of squaring.
; * a Number to square in Montogmery form.
; */
_text SEGMENT READONLY PARA
sp_256_mont_sqr_order_avx2_4 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
; A[0] * A[1]
mov rdx, QWORD PTR [rax]
mov r15, QWORD PTR [rax+16]
mulx r10, r9, QWORD PTR [rax+8]
; A[0] * A[3]
mulx r12, r11, QWORD PTR [rax+24]
; A[2] * A[1]
mov rdx, r15
mulx rbx, rsi, QWORD PTR [rax+8]
; A[2] * A[3]
mulx r14, r13, QWORD PTR [rax+24]
xor r15, r15
adox r11, rsi
adox r12, rbx
; A[2] * A[0]
mulx rbx, rsi, QWORD PTR [rax]
; A[1] * A[3]
mov rdx, QWORD PTR [rax+8]
adox r13, r15
mulx r8, rdi, QWORD PTR [rax+24]
adcx r10, rsi
adox r14, r15
adcx r11, rbx
adcx r12, rdi
adcx r13, r8
adcx r14, r15
; Double with Carry Flag
xor r15, r15
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx rdi, r8, rdx
adcx r9, r9
adcx r10, r10
adox r9, rdi
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx rbx, rsi, rdx
adcx r11, r11
adox r10, rsi
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx rsi, rdi, rdx
adcx r12, r12
adox r11, rbx
adcx r13, r13
adox r12, rdi
adcx r14, r14
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx rbx, rdi, rdx
adox r13, rsi
adcx r15, r15
adox r14, rdi
adox r15, rbx
; Start Reduction
mov rbx, 14758798090332847183
; A[0]
mov rdx, rbx
imul rdx, r8
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r8, rsi
adox r9, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r9, rsi
adox r10, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
adcx r11, rsi
adox r12, rax
adcx r12, rbp
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[1]
mov rdx, rbx
imul rdx, r9
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r9, rsi
adox r10, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
adcx r12, rsi
adox r13, rax
adcx r13, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[2]
mov rdx, rbx
imul rdx, r10
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r10, rsi
adox r11, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r12, rsi
adox r13, rax
mulx rax, rsi, rdi
adcx r13, rsi
adox r14, rax
adcx r14, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
; A[3]
mov rdx, rbx
imul rdx, r11
mov rdi, 17562291160714782033
xor rbp, rbp
mulx rax, rsi, rdi
mov rdi, 13611842547513532036
adcx r11, rsi
adox r12, rax
mulx rax, rsi, rdi
mov rdi, 18446744073709551615
adcx r12, rsi
adox r13, rax
mulx rax, rsi, rdi
mov rdi, 18446744069414584320
adcx r13, rsi
adox r14, rax
mulx rax, rsi, rdi
adcx r14, rsi
adox r15, rax
adcx r15, r8
mov r8, rbp
; carry
adox r8, rbp
adcx r8, rbp
neg r8
mov rdi, 17562291160714782033
mov rbx, 13611842547513532036
and rdi, r8
mov rbp, 18446744069414584320
and rbx, r8
and rbp, r8
sub r12, rdi
sbb r13, rbx
mov QWORD PTR [rcx], r12
sbb r14, r8
mov QWORD PTR [rcx+8], r13
sbb r15, rbp
mov QWORD PTR [rcx+16], r14
mov QWORD PTR [rcx+24], r15
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_256_mont_sqr_order_avx2_4 ENDP
_text ENDS
ENDIF
; /* Non-constant time modular inversion.
; *
; * @param [out] r Resulting number.
; * @param [in] a Number to invert.
; * @param [in] m Modulus.
; * @return MP_OKAY on success.
; */
_text SEGMENT READONLY PARA
sp_256_mod_inv_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
sub rsp, 513
mov r9, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
mov r13, QWORD PTR [rdx]
mov r14, QWORD PTR [rdx+8]
mov r15, QWORD PTR [rdx+16]
mov rdi, QWORD PTR [rdx+24]
mov rsi, 0
test r13b, 1
jnz L_256_mod_inv_4_v_even_end
L_256_mod_inv_4_v_even_start:
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shr rdi, 1
mov BYTE PTR [rsp+rsi], 1
inc rsi
test r13b, 1
jz L_256_mod_inv_4_v_even_start
L_256_mod_inv_4_v_even_end:
L_256_mod_inv_4_uv_start:
cmp r12, rdi
jb L_256_mod_inv_4_uv_v
ja L_256_mod_inv_4_uv_u
cmp r11, r15
jb L_256_mod_inv_4_uv_v
ja L_256_mod_inv_4_uv_u
cmp r10, r14
jb L_256_mod_inv_4_uv_v
ja L_256_mod_inv_4_uv_u
cmp r9, r13
jb L_256_mod_inv_4_uv_v
L_256_mod_inv_4_uv_u:
mov BYTE PTR [rsp+rsi], 2
inc rsi
sub r9, r13
sbb r10, r14
sbb r11, r15
sbb r12, rdi
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shr r12, 1
test r9b, 1
jnz L_256_mod_inv_4_usubv_even_end
L_256_mod_inv_4_usubv_even_start:
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shr r12, 1
mov BYTE PTR [rsp+rsi], 0
inc rsi
test r9b, 1
jz L_256_mod_inv_4_usubv_even_start
L_256_mod_inv_4_usubv_even_end:
cmp r9, 1
jne L_256_mod_inv_4_uv_start
mov rdx, r10
or rdx, r11
jne L_256_mod_inv_4_uv_start
or rdx, r12
jne L_256_mod_inv_4_uv_start
mov al, 1
jmp L_256_mod_inv_4_uv_end
L_256_mod_inv_4_uv_v:
mov BYTE PTR [rsp+rsi], 3
inc rsi
sub r13, r9
sbb r14, r10
sbb r15, r11
sbb rdi, r12
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shr rdi, 1
test r13b, 1
jnz L_256_mod_inv_4_vsubu_even_end
L_256_mod_inv_4_vsubu_even_start:
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shr rdi, 1
mov BYTE PTR [rsp+rsi], 1
inc rsi
test r13b, 1
jz L_256_mod_inv_4_vsubu_even_start
L_256_mod_inv_4_vsubu_even_end:
cmp r13, 1
jne L_256_mod_inv_4_uv_start
mov rdx, r14
or rdx, r15
jne L_256_mod_inv_4_uv_start
or rdx, rdi
jne L_256_mod_inv_4_uv_start
mov al, 0
L_256_mod_inv_4_uv_end:
mov r9, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
mov r13, 1
xor r14, r14
xor r15, r15
xor rdi, rdi
mov BYTE PTR [rsp+rsi], 7
mov dl, BYTE PTR [rsp]
mov rsi, 1
cmp dl, 1
je L_256_mod_inv_4_op_div2_d
jl L_256_mod_inv_4_op_div2_b
cmp dl, 3
je L_256_mod_inv_4_op_d_sub_b
jl L_256_mod_inv_4_op_b_sub_d
jmp L_256_mod_inv_4_op_end
L_256_mod_inv_4_op_b_sub_d:
sub r9, r13
sbb r10, r14
sbb r11, r15
sbb r12, rdi
jnc L_256_mod_inv_4_op_div2_b
add r9, QWORD PTR [r8]
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
L_256_mod_inv_4_op_div2_b:
test r9b, 1
mov rdx, 0
jz L_256_mod_inv_4_op_div2_b_mod
add r9, QWORD PTR [r8]
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
adc rdx, 0
L_256_mod_inv_4_op_div2_b_mod:
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shrd r12, rdx, 1
mov dl, BYTE PTR [rsp+rsi]
inc rsi
cmp dl, 1
je L_256_mod_inv_4_op_div2_d
jl L_256_mod_inv_4_op_div2_b
cmp dl, 3
je L_256_mod_inv_4_op_d_sub_b
jl L_256_mod_inv_4_op_b_sub_d
jmp L_256_mod_inv_4_op_end
L_256_mod_inv_4_op_d_sub_b:
sub r13, r9
sbb r14, r10
sbb r15, r11
sbb rdi, r12
jnc L_256_mod_inv_4_op_div2_d
add r13, QWORD PTR [r8]
adc r14, QWORD PTR [r8+8]
adc r15, QWORD PTR [r8+16]
adc rdi, QWORD PTR [r8+24]
L_256_mod_inv_4_op_div2_d:
test r13b, 1
mov rdx, 0
jz L_256_mod_inv_4_op_div2_d_mod
add r13, QWORD PTR [r8]
adc r14, QWORD PTR [r8+8]
adc r15, QWORD PTR [r8+16]
adc rdi, QWORD PTR [r8+24]
adc rdx, 0
L_256_mod_inv_4_op_div2_d_mod:
shrd r13, r14, 1
shrd r14, r15, 1
shrd r15, rdi, 1
shrd rdi, rdx, 1
mov dl, BYTE PTR [rsp+rsi]
inc rsi
cmp dl, 1
je L_256_mod_inv_4_op_div2_d
jl L_256_mod_inv_4_op_div2_b
cmp dl, 3
je L_256_mod_inv_4_op_d_sub_b
jl L_256_mod_inv_4_op_b_sub_d
L_256_mod_inv_4_op_end:
cmp al, 1
jne L_256_mod_inv_4_store_d
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
jmp L_256_mod_inv_4_store_end
L_256_mod_inv_4_store_d:
mov QWORD PTR [rcx], r13
mov QWORD PTR [rcx+8], r14
mov QWORD PTR [rcx+16], r15
mov QWORD PTR [rcx+24], rdi
L_256_mod_inv_4_store_end:
add rsp, 513
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mod_inv_4 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_order DWORD 6497617,32001851,62711546,67108863,67043328,0,0,0,41070783,45522014,67108863,1023,4194303,0,0,0
ptr_L_sp256_mod_inv_avx2_4_order QWORD L_sp256_mod_inv_avx2_4_order
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_one QWORD 1, 0,
0, 0
ptr_L_sp256_mod_inv_avx2_4_one QWORD L_sp256_mod_inv_avx2_4_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_all_one DWORD 1,1,1,1,1,1,1,1
ptr_L_sp256_mod_inv_avx2_4_all_one QWORD L_sp256_mod_inv_avx2_4_all_one
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_mask01111 DWORD 0,1,1,1,1,0,0,0
ptr_L_sp256_mod_inv_avx2_4_mask01111 QWORD L_sp256_mod_inv_avx2_4_mask01111
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_down_one_dword DWORD 1,2,3,4,5,6,7,7
ptr_L_sp256_mod_inv_avx2_4_down_one_dword QWORD L_sp256_mod_inv_avx2_4_down_one_dword
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_neg DWORD 0,0,0,0,2147483648,0,0,0
ptr_L_sp256_mod_inv_avx2_4_neg QWORD L_sp256_mod_inv_avx2_4_neg
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_up_one_dword DWORD 7,0,1,2,3,7,7,7
ptr_L_sp256_mod_inv_avx2_4_up_one_dword QWORD L_sp256_mod_inv_avx2_4_up_one_dword
_DATA ENDS
_DATA SEGMENT
ALIGN 16
L_sp256_mod_inv_avx2_4_mask26 DWORD 67108863,67108863,67108863,67108863,67108863,0,0,0
ptr_L_sp256_mod_inv_avx2_4_mask26 QWORD L_sp256_mod_inv_avx2_4_mask26
_DATA ENDS
; /* Non-constant time modular inversion.
; *
; * @param [out] r Resulting number.
; * @param [in] a Number to invert.
; * @param [in] m Modulus.
; * @return MP_OKAY on success.
; */
_text SEGMENT READONLY PARA
sp_256_mod_inv_avx2_4 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, QWORD PTR [r8]
mov r9, QWORD PTR [r8+8]
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov r12, QWORD PTR [rdx]
mov r13, QWORD PTR [rdx+8]
mov r14, QWORD PTR [rdx+16]
mov r15, QWORD PTR [rdx+24]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_order
vmovupd ymm6, [rbx]
vmovupd ymm7, [rbx+32]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_one
vmovupd ymm8, [rbx]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_mask01111
vmovupd ymm9, [rbx]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_all_one
vmovupd ymm10, [rbx]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_down_one_dword
vmovupd ymm11, [rbx]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_neg
vmovupd ymm12, [rbx]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_up_one_dword
vmovupd ymm13, [rbx]
mov rbx, ptr_L_sp256_mod_inv_avx2_4_mask26
vmovupd ymm14, [rbx]
vpxor xmm0, xmm0, xmm0
vpxor xmm1, xmm1, xmm1
vmovdqu ymm2, ymm8
vpxor xmm3, xmm3, xmm3
test r12b, 1
jnz L_256_mod_inv_avx2_4_v_even_end
L_256_mod_inv_avx2_4_v_even_start:
shrd r12, r13, 1
shrd r13, r14, 1
shrd r14, r15, 1
shr r15, 1
vptest ymm2, ymm8
jz L_256_mod_inv_avx2_4_v_even_shr1
vpaddd ymm2, ymm2, ymm6
vpaddd ymm3, ymm3, ymm7
L_256_mod_inv_avx2_4_v_even_shr1:
vpand ymm4, ymm2, ymm9
vpand ymm5, ymm3, ymm10
vpermd ymm4, ymm11, ymm4
vpsrad ymm2, ymm2, 1
vpsrad ymm3, ymm3, 1
vpslld ymm5, ymm5, 25
vpslld xmm4, xmm4, 25
vpaddd ymm2, ymm2, ymm5
vpaddd ymm3, ymm3, ymm4
test r12b, 1
jz L_256_mod_inv_avx2_4_v_even_start
L_256_mod_inv_avx2_4_v_even_end:
L_256_mod_inv_avx2_4_uv_start:
cmp r11, r15
jb L_256_mod_inv_avx2_4_uv_v
ja L_256_mod_inv_avx2_4_uv_u
cmp r10, r14
jb L_256_mod_inv_avx2_4_uv_v
ja L_256_mod_inv_avx2_4_uv_u
cmp r9, r13
jb L_256_mod_inv_avx2_4_uv_v
ja L_256_mod_inv_avx2_4_uv_u
cmp rax, r12
jb L_256_mod_inv_avx2_4_uv_v
L_256_mod_inv_avx2_4_uv_u:
sub rax, r12
sbb r9, r13
vpsubd ymm0, ymm0, ymm2
sbb r10, r14
vpsubd ymm1, ymm1, ymm3
sbb r11, r15
vptest ymm1, ymm12
jz L_256_mod_inv_avx2_4_usubv_done_neg
vpaddd ymm0, ymm0, ymm6
vpaddd ymm1, ymm1, ymm7
L_256_mod_inv_avx2_4_usubv_done_neg:
L_256_mod_inv_avx2_4_usubv_shr1:
shrd rax, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shr r11, 1
vptest ymm0, ymm8
jz L_256_mod_inv_avx2_4_usubv_sub_shr1
vpaddd ymm0, ymm0, ymm6
vpaddd ymm1, ymm1, ymm7
L_256_mod_inv_avx2_4_usubv_sub_shr1:
vpand ymm4, ymm0, ymm9
vpand ymm5, ymm1, ymm10
vpermd ymm4, ymm11, ymm4
vpsrad ymm0, ymm0, 1
vpsrad ymm1, ymm1, 1
vpslld ymm5, ymm5, 25
vpslld xmm4, xmm4, 25
vpaddd ymm0, ymm0, ymm5
vpaddd ymm1, ymm1, ymm4
test al, 1
jz L_256_mod_inv_avx2_4_usubv_shr1
cmp rax, 1
jne L_256_mod_inv_avx2_4_uv_start
mov rdx, r9
or rdx, r10
jne L_256_mod_inv_avx2_4_uv_start
or rdx, r11
jne L_256_mod_inv_avx2_4_uv_start
vpsrad ymm5, ymm1, 26
vpsrad ymm4, ymm0, 26
vpermd ymm5, ymm13, ymm5
vpand ymm0, ymm0, ymm14
vpand ymm1, ymm1, ymm14
vpaddd ymm0, ymm0, ymm5
vpaddd ymm1, ymm1, ymm4
vpsrad ymm5, ymm1, 26
vpsrad ymm4, ymm0, 26
vpermd ymm5, ymm13, ymm5
vpand ymm0, ymm0, ymm14
vpand ymm1, ymm1, ymm14
vpaddd ymm0, ymm0, ymm5
vpaddd ymm1, ymm1, ymm4
vpextrd eax, xmm0, 0
vpextrd r10d, xmm0, 1
vpextrd r12d, xmm0, 2
vpextrd r14d, xmm0, 3
vextracti128 xmm0, ymm0, 1
vpextrd r9d, xmm1, 0
vpextrd r11d, xmm1, 1
vpextrd r13d, xmm1, 2
vpextrd r15d, xmm1, 3
vextracti128 xmm1, ymm1, 1
vpextrd edi, xmm0, 0
vpextrd esi, xmm1, 0
jmp L_256_mod_inv_avx2_4_store_done
L_256_mod_inv_avx2_4_uv_v:
sub r12, rax
sbb r13, r9
vpsubd ymm2, ymm2, ymm0
sbb r14, r10
vpsubd ymm3, ymm3, ymm1
sbb r15, r11
vptest ymm3, ymm12
jz L_256_mod_inv_avx2_4_vsubu_done_neg
vpaddd ymm2, ymm2, ymm6
vpaddd ymm3, ymm3, ymm7
L_256_mod_inv_avx2_4_vsubu_done_neg:
L_256_mod_inv_avx2_4_vsubu_shr1:
shrd r12, r13, 1
shrd r13, r14, 1
shrd r14, r15, 1
shr r15, 1
vptest ymm2, ymm8
jz L_256_mod_inv_avx2_4_vsubu_sub_shr1
vpaddd ymm2, ymm2, ymm6
vpaddd ymm3, ymm3, ymm7
L_256_mod_inv_avx2_4_vsubu_sub_shr1:
vpand ymm4, ymm2, ymm9
vpand ymm5, ymm3, ymm10
vpermd ymm4, ymm11, ymm4
vpsrad ymm2, ymm2, 1
vpsrad ymm3, ymm3, 1
vpslld ymm5, ymm5, 25
vpslld xmm4, xmm4, 25
vpaddd ymm2, ymm2, ymm5
vpaddd ymm3, ymm3, ymm4
test r12b, 1
jz L_256_mod_inv_avx2_4_vsubu_shr1
cmp r12, 1
jne L_256_mod_inv_avx2_4_uv_start
mov rdx, r13
or rdx, r14
jne L_256_mod_inv_avx2_4_uv_start
or rdx, r15
jne L_256_mod_inv_avx2_4_uv_start
vpsrad ymm5, ymm3, 26
vpsrad ymm4, ymm2, 26
vpermd ymm5, ymm13, ymm5
vpand ymm2, ymm2, ymm14
vpand ymm3, ymm3, ymm14
vpaddd ymm2, ymm2, ymm5
vpaddd ymm3, ymm3, ymm4
vpsrad ymm5, ymm3, 26
vpsrad ymm4, ymm2, 26
vpermd ymm5, ymm13, ymm5
vpand ymm2, ymm2, ymm14
vpand ymm3, ymm3, ymm14
vpaddd ymm2, ymm2, ymm5
vpaddd ymm3, ymm3, ymm4
vpextrd eax, xmm2, 0
vpextrd r10d, xmm2, 1
vpextrd r12d, xmm2, 2
vpextrd r14d, xmm2, 3
vextracti128 xmm2, ymm2, 1
vpextrd r9d, xmm3, 0
vpextrd r11d, xmm3, 1
vpextrd r13d, xmm3, 2
vpextrd r15d, xmm3, 3
vextracti128 xmm3, ymm3, 1
vpextrd edi, xmm2, 0
vpextrd esi, xmm3, 0
L_256_mod_inv_avx2_4_store_done:
movsxd rax, eax
shl r9, 26
add rax, r9
movsxd r10, r10d
shl r11, 26
add r10, r11
movsxd r12, r12d
shl r13, 26
add r12, r13
movsxd r14, r14d
shl r15, 26
add r14, r15
movsxd rdi, edi
shl rsi, 26
add rdi, rsi
mov r9, r10
mov r11, r12
mov r13, r14
shl r9, 52
sar r10, 12
shl r11, 40
sar r12, 24
shl r13, 28
sar r14, 36
shl rdi, 16
add rax, r9
adc r10, r11
adc r12, r13
adc r14, rdi
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r14
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_256_mod_inv_avx2_4 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF WOLFSSL_SP_384
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_mul_6 PROC
push r12
mov r9, rdx
sub rsp, 48
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+48], r10
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+56], r11
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+64], r12
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+72], r10
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
add rsp, 48
pop r12
ret
sp_384_mul_6 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_sqr_6 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 48
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+48], r9
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+56], r10
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+64], r11
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+72], r9
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
add rsp, 48
pop r14
pop r13
pop r12
ret
sp_384_sqr_6 ENDP
_text ENDS
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_add_6 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov QWORD PTR [rcx+40], r10
adc rax, 0
ret
sp_384_add_6 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_sub_6 PROC
push r12
push r13
push r14
xor rax, rax
mov r9, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
mov r13, QWORD PTR [rdx+32]
mov r14, QWORD PTR [rdx+40]
sub r9, QWORD PTR [r8]
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
sbb r13, QWORD PTR [r8+32]
sbb r14, QWORD PTR [r8+40]
mov QWORD PTR [rcx], r9
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r13
mov QWORD PTR [rcx+40], r14
sbb rax, 0
pop r14
pop r13
pop r12
ret
sp_384_sub_6 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_copy_6 PROC
push r12
push r13
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rcx+32]
mov r13, QWORD PTR [rcx+40]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
xor r12, QWORD PTR [rdx+32]
xor r13, QWORD PTR [rdx+40]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
and r12, r8
and r13, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
xor QWORD PTR [rcx+32], r12
xor QWORD PTR [rcx+40], r13
pop r13
pop r12
ret
sp_384_cond_copy_6 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_sub_6 PROC
sub rsp, 48
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
sbb rax, 0
add rsp, 48
ret
sp_384_cond_sub_6 ENDP
_text ENDS
; /* Reduce the number back to 384 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_384_mont_reduce_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r12, QWORD PTR [rcx]
mov r13, QWORD PTR [rcx+8]
mov r14, QWORD PTR [rcx+16]
mov r15, QWORD PTR [rcx+24]
mov rdi, QWORD PTR [rcx+32]
mov rsi, QWORD PTR [rcx+40]
xor r11, r11
; a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)
mov rbx, QWORD PTR [rcx+48]
mov rbp, QWORD PTR [rcx+56]
mov rdx, r12
mov rax, r13
shld rax, rdx, 32
shl rdx, 32
add rdx, r12
adc rax, r13
add rax, r12
mov r8, rdx
mov r9, rax
mov r10, rax
shld r9, r8, 32
shl r8, 32
shr r10, 32
add r12, r8
adc r13, r9
adc r14, r10
adc r15, 0
adc rdi, 0
adc rsi, 0
adc rbx, rdx
adc rbp, rax
adc r11, 0
add r8, rax
adc r9, rdx
adc r10, rax
mov rax, 0
adc rax, 0
sub r14, r9
sbb r15, r10
sbb rdi, rax
sbb rsi, 0
sbb rbx, 0
sbb rbp, 0
sbb r11, 0
; a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)
mov r12, QWORD PTR [rcx+64]
mov r13, QWORD PTR [rcx+72]
mov rdx, r14
mov rax, r15
shld rax, rdx, 32
shl rdx, 32
add rdx, r14
adc rax, r15
add rax, r14
mov r8, rdx
mov r9, rax
mov r10, rax
shld r9, r8, 32
shl r8, 32
shr r10, 32
add r12, r11
adc r13, 0
mov r11, 0
adc r11, 0
add r14, r8
adc r15, r9
adc rdi, r10
adc rsi, 0
adc rbx, 0
adc rbp, 0
adc r12, rdx
adc r13, rax
adc r11, 0
add r8, rax
adc r9, rdx
adc r10, rax
mov rax, 0
adc rax, 0
sub rdi, r9
sbb rsi, r10
sbb rbx, rax
sbb rbp, 0
sbb r12, 0
sbb r13, 0
sbb r11, 0
; a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)
mov r14, QWORD PTR [rcx+80]
mov r15, QWORD PTR [rcx+88]
mov rdx, rdi
mov rax, rsi
shld rax, rdx, 32
shl rdx, 32
add rdx, rdi
adc rax, rsi
add rax, rdi
mov r8, rdx
mov r9, rax
mov r10, rax
shld r9, r8, 32
shl r8, 32
shr r10, 32
add r14, r11
adc r15, 0
mov r11, 0
adc r11, 0
add rdi, r8
adc rsi, r9
adc rbx, r10
adc rbp, 0
adc r12, 0
adc r13, 0
adc r14, rdx
adc r15, rax
adc r11, 0
add r8, rax
adc r9, rdx
adc r10, rax
mov rax, 0
adc rax, 0
sub rbx, r9
sbb rbp, r10
sbb r12, rax
sbb r13, 0
sbb r14, 0
sbb r15, 0
sbb r11, 0
; Subtract mod if carry
neg r11
mov r10, 18446744073709551614
mov r8, r11
mov r9, r11
shr r8, 32
shl r9, 32
and r10, r11
sub rbx, r8
sbb rbp, r9
sbb r12, r10
sbb r13, r11
sbb r14, r11
sbb r15, r11
mov QWORD PTR [rcx], rbx
mov QWORD PTR [rcx+8], rbp
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov QWORD PTR [rcx+32], r14
mov QWORD PTR [rcx+40], r15
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_reduce_6 ENDP
_text ENDS
; /* Reduce the number back to 384 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_384_mont_reduce_order_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 6
mov r10, 6
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_384_mont_loop_order_6:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+40], r14
adc QWORD PTR [rcx+48], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_384_mont_loop_order_6
mov QWORD PTR [rcx], r15
mov QWORD PTR [rcx+8], rdi
neg rsi
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 48
call sp_384_cond_sub_6
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_reduce_order_6 ENDP
_text ENDS
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_384_cmp_6 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_384_cmp_6 ENDP
_text ENDS
; /* Add a to a into r. (r = a + a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_dbl_6 PROC
mov r8, QWORD PTR [rdx]
xor rax, rax
add r8, r8
mov r9, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r8
adc r9, r9
mov r8, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r9
adc r8, r8
mov r9, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r8
adc r9, r9
mov r8, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r9
adc r8, r8
mov r9, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r8
adc r9, r9
mov QWORD PTR [rcx+40], r9
adc rax, 0
ret
sp_384_dbl_6 ENDP
_text ENDS
; /* Conditionally add a and b using the mask m.
; * m is -1 to add and 0 when not.
; *
; * r A single precision number representing conditional add result.
; * a A single precision number to add with.
; * b A single precision number to add.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_add_6 PROC
sub rsp, 48
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
add r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
adc r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
adc r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
adc r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
adc r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
adc r11, r8
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
adc rax, 0
add rsp, 48
ret
sp_384_cond_add_6 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_div2_6 PROC
push r12
push r13
sub rsp, 48
mov r13, QWORD PTR [rdx]
xor r12, r12
mov rax, r13
and r13, 1
neg r13
mov r10, QWORD PTR [r8]
and r10, r13
mov QWORD PTR [rsp], r10
mov r10, QWORD PTR [r8+8]
and r10, r13
mov QWORD PTR [rsp+8], r10
mov r10, QWORD PTR [r8+16]
and r10, r13
mov QWORD PTR [rsp+16], r10
mov r10, QWORD PTR [r8+24]
and r10, r13
mov QWORD PTR [rsp+24], r10
mov r10, QWORD PTR [r8+32]
and r10, r13
mov QWORD PTR [rsp+32], r10
mov r10, QWORD PTR [r8+40]
and r10, r13
mov QWORD PTR [rsp+40], r10
add QWORD PTR [rsp], rax
mov rax, QWORD PTR [rdx+8]
adc QWORD PTR [rsp+8], rax
mov rax, QWORD PTR [rdx+16]
adc QWORD PTR [rsp+16], rax
mov rax, QWORD PTR [rdx+24]
adc QWORD PTR [rsp+24], rax
mov rax, QWORD PTR [rdx+32]
adc QWORD PTR [rsp+32], rax
mov rax, QWORD PTR [rdx+40]
adc QWORD PTR [rsp+40], rax
adc r12, 0
mov rax, QWORD PTR [rsp]
mov r9, QWORD PTR [rsp+8]
shrd rax, r9, 1
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [rsp+16]
shrd r9, rax, 1
mov QWORD PTR [rcx+8], r9
mov r9, QWORD PTR [rsp+24]
shrd rax, r9, 1
mov QWORD PTR [rcx+16], rax
mov rax, QWORD PTR [rsp+32]
shrd r9, rax, 1
mov QWORD PTR [rcx+24], r9
mov r9, QWORD PTR [rsp+40]
shrd rax, r9, 1
mov QWORD PTR [rcx+32], rax
shrd r9, r12, 1
mov QWORD PTR [rcx+40], r9
add rsp, 48
pop r13
pop r12
ret
sp_384_div2_6 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_point_33_6 PROC
mov rax, 1
movd xmm13, r8d
add rdx, 296
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_384_get_point_33_6_start:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, [rdx]
movdqu xmm7, [rdx+16]
movdqu xmm8, [rdx+32]
movdqu xmm9, [rdx+96]
movdqu xmm10, [rdx+112]
movdqu xmm11, [rdx+128]
add rdx, 296
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_384_get_point_33_6_start
movdqu [rcx], xmm0
movdqu [rcx+16], xmm1
movdqu [rcx+32], xmm2
movdqu [rcx+96], xmm3
movdqu [rcx+112], xmm4
movdqu [rcx+128], xmm5
mov rax, 1
movd xmm13, r8d
sub rdx, 9472
movd xmm15, eax
mov rax, 32
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
movdqa xmm14, xmm15
L_384_get_point_33_6_start_2:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, [rdx+192]
movdqu xmm7, [rdx+208]
movdqu xmm8, [rdx+224]
add rdx, 296
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
dec rax
jnz L_384_get_point_33_6_start_2
movdqu [rcx+192], xmm0
movdqu [rcx+208], xmm1
movdqu [rcx+224], xmm2
ret
sp_384_get_point_33_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible point that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of point to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_point_33_avx2_6 PROC
mov rax, 1
movd xmm13, r8d
add rdx, 296
movd xmm15, eax
mov rax, 32
vpxor ymm14, ymm14, ymm14
vpermd ymm13, ymm14, ymm13
vpermd ymm15, ymm14, ymm15
vpxor ymm0, ymm0, ymm0
vpxor xmm1, xmm1, xmm1
vpxor ymm2, ymm2, ymm2
vpxor xmm3, xmm3, xmm3
vpxor ymm4, ymm4, ymm4
vpxor xmm5, xmm5, xmm5
vmovdqa ymm14, ymm15
L_384_get_point_33_avx2_6_start:
vpcmpeqd ymm12, ymm14, ymm13
vpaddd ymm14, ymm14, ymm15
vmovupd ymm6, [rdx]
vmovdqu xmm7, OWORD PTR [rdx+32]
vmovupd ymm8, [rdx+96]
vmovdqu xmm9, OWORD PTR [rdx+128]
vmovupd ymm10, [rdx+192]
vmovdqu xmm11, OWORD PTR [rdx+224]
add rdx, 296
vpand ymm6, ymm6, ymm12
vpand xmm7, xmm7, xmm12
vpand ymm8, ymm8, ymm12
vpand xmm9, xmm9, xmm12
vpand ymm10, ymm10, ymm12
vpand xmm11, xmm11, xmm12
vpor ymm0, ymm0, ymm6
vpor xmm1, xmm1, xmm7
vpor ymm2, ymm2, ymm8
vpor xmm3, xmm3, xmm9
vpor ymm4, ymm4, ymm10
vpor xmm5, xmm5, xmm11
dec rax
jnz L_384_get_point_33_avx2_6_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovdqu [rcx+32], xmm1
vmovupd YMMWORD PTR [rcx+96], ymm2
vmovdqu [rcx+128], xmm3
vmovupd YMMWORD PTR [rcx+192], ymm4
vmovdqu [rcx+224], xmm5
ret
sp_384_get_point_33_avx2_6 ENDP
_text ENDS
ENDIF
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_384_mul_avx2_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov rax, rdx
sub rsp, 40
xor rbx, rbx
mov rdx, QWORD PTR [rax]
; A[0] * B[0]
mulx r12, r11, QWORD PTR [r8]
; A[0] * B[1]
mulx r13, r9, QWORD PTR [r8+8]
adcx r12, r9
; A[0] * B[2]
mulx r14, r9, QWORD PTR [r8+16]
adcx r13, r9
; A[0] * B[3]
mulx r15, r9, QWORD PTR [r8+24]
adcx r14, r9
; A[0] * B[4]
mulx rdi, r9, QWORD PTR [r8+32]
adcx r15, r9
; A[0] * B[5]
mulx rsi, r9, QWORD PTR [r8+40]
adcx rdi, r9
adcx rsi, rbx
mov QWORD PTR [rsp], r11
mov r11, 0
adcx r11, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+8]
; A[1] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r12, r9
adox r13, r10
; A[1] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx r13, r9
adox r14, r10
; A[1] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx r14, r9
adox r15, r10
; A[1] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx r15, r9
adox rdi, r10
; A[1] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx rdi, r9
adox rsi, r10
; A[1] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx rsi, r9
adox r11, r10
adcx r11, rbx
mov QWORD PTR [rsp+8], r12
mov r12, 0
adcx r12, rbx
adox r12, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+16]
; A[2] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r13, r9
adox r14, r10
; A[2] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx r14, r9
adox r15, r10
; A[2] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx r15, r9
adox rdi, r10
; A[2] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx rdi, r9
adox rsi, r10
; A[2] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx rsi, r9
adox r11, r10
; A[2] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx r11, r9
adox r12, r10
adcx r12, rbx
mov QWORD PTR [rsp+16], r13
mov r13, 0
adcx r13, rbx
adox r13, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+24]
; A[3] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r14, r9
adox r15, r10
; A[3] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx r15, r9
adox rdi, r10
; A[3] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx rdi, r9
adox rsi, r10
; A[3] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx rsi, r9
adox r11, r10
; A[3] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx r11, r9
adox r12, r10
; A[3] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx r12, r9
adox r13, r10
adcx r13, rbx
mov QWORD PTR [rsp+24], r14
mov r14, 0
adcx r14, rbx
adox r14, rbx
xor rbx, rbx
mov rdx, QWORD PTR [rax+32]
; A[4] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx r15, r9
adox rdi, r10
; A[4] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx rdi, r9
adox rsi, r10
; A[4] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx rsi, r9
adox r11, r10
; A[4] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx r11, r9
adox r12, r10
; A[4] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx r12, r9
adox r13, r10
; A[4] * B[5]
mulx r10, r9, QWORD PTR [r8+40]
adcx r13, r9
adox r14, r10
adcx r14, rbx
mov QWORD PTR [rsp+32], r15
mov rdx, QWORD PTR [rax+40]
; A[5] * B[0]
mulx r10, r9, QWORD PTR [r8]
adcx rdi, r9
adox rsi, r10
; A[5] * B[1]
mulx r10, r9, QWORD PTR [r8+8]
adcx rsi, r9
adox r11, r10
; A[5] * B[2]
mulx r10, r9, QWORD PTR [r8+16]
adcx r11, r9
adox r12, r10
; A[5] * B[3]
mulx r10, r9, QWORD PTR [r8+24]
adcx r12, r9
adox r13, r10
; A[5] * B[4]
mulx r10, r9, QWORD PTR [r8+32]
adcx r13, r9
adox r14, r10
; A[5] * B[5]
mulx r15, r9, QWORD PTR [r8+40]
adcx r14, r9
adox r15, rbx
adcx r15, rbx
mov QWORD PTR [rcx+40], rdi
mov QWORD PTR [rcx+48], rsi
mov QWORD PTR [rcx+56], r11
mov QWORD PTR [rcx+64], r12
mov QWORD PTR [rcx+72], r13
mov QWORD PTR [rcx+80], r14
mov QWORD PTR [rcx+88], r15
mov r11, QWORD PTR [rsp]
mov r12, QWORD PTR [rsp+8]
mov r13, QWORD PTR [rsp+16]
mov r14, QWORD PTR [rsp+24]
mov r15, QWORD PTR [rsp+32]
mov QWORD PTR [rcx], r11
mov QWORD PTR [rcx+8], r12
mov QWORD PTR [rcx+16], r13
mov QWORD PTR [rcx+24], r14
mov QWORD PTR [rcx+32], r15
add rsp, 40
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mul_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 384 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_384_mont_reduce_order_avx2_6 PROC
push r12
push r13
push r14
push r15
mov rax, rdx
xor r15, r15
mov r14, QWORD PTR [rcx]
xor r13, r13
L_mont_loop_order_avx2_6:
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+8]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+16]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+24]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+32]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+40]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+48]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
adcx r11, r15
mov QWORD PTR [rcx+48], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+16]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+24]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+32]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+24], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+40]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+32], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+48]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+40], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+56]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+48], r12
adcx r11, r15
mov QWORD PTR [rcx+56], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+24]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+32]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+40]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+48]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+56]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+64]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
adcx r11, r15
mov QWORD PTR [rcx+64], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+32]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+40]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+48]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+40], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+56]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+48], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+64]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+56], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+72]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+64], r12
adcx r11, r15
mov QWORD PTR [rcx+72], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+40]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+48]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+56]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+64]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+72]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+80]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
adcx r11, r15
mov QWORD PTR [rcx+80], r11
mov r15, r13
adox r15, r13
adcx r15, r13
; mu = a[i] * mp
mov rdx, r14
mov r11, r14
imul rdx, r8
xor r13, r13
; a[i+0] += m[0] * mu
mulx r10, r9, QWORD PTR [rax]
mov r14, QWORD PTR [rcx+48]
adcx r11, r9
adox r14, r10
; a[i+1] += m[1] * mu
mulx r10, r9, QWORD PTR [rax+8]
mov r11, QWORD PTR [rcx+56]
adcx r14, r9
adox r11, r10
; a[i+2] += m[2] * mu
mulx r10, r9, QWORD PTR [rax+16]
mov r12, QWORD PTR [rcx+64]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+56], r11
; a[i+3] += m[3] * mu
mulx r10, r9, QWORD PTR [rax+24]
mov r11, QWORD PTR [rcx+72]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+64], r12
; a[i+4] += m[4] * mu
mulx r10, r9, QWORD PTR [rax+32]
mov r12, QWORD PTR [rcx+80]
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+72], r11
; a[i+5] += m[5] * mu
mulx r10, r9, QWORD PTR [rax+40]
mov r11, QWORD PTR [rcx+88]
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+80], r12
adcx r11, r15
mov QWORD PTR [rcx+88], r11
mov r15, r13
adox r15, r13
adcx r15, r13
neg r15
mov r8, rcx
add rcx, 48
mov r10, QWORD PTR [rax]
mov rdx, r14
pext r10, r10, r15
sub rdx, r10
mov r10, QWORD PTR [rax+8]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r15
mov QWORD PTR [r8], rdx
sbb r9, r10
mov rdx, QWORD PTR [rax+16]
mov r10, QWORD PTR [rcx+16]
pext rdx, rdx, r15
mov QWORD PTR [r8+8], r9
sbb r10, rdx
mov r9, QWORD PTR [rax+24]
mov rdx, QWORD PTR [rcx+24]
pext r9, r9, r15
mov QWORD PTR [r8+16], r10
sbb rdx, r9
mov r10, QWORD PTR [rax+32]
mov r9, QWORD PTR [rcx+32]
pext r10, r10, r15
mov QWORD PTR [r8+24], rdx
sbb r9, r10
mov rdx, QWORD PTR [rax+40]
mov r10, QWORD PTR [rcx+40]
pext rdx, rdx, r15
mov QWORD PTR [r8+32], r9
sbb r10, rdx
mov QWORD PTR [r8+40], r10
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_reduce_order_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r Result of squaring.
; * a Number to square in Montogmery form.
; */
_text SEGMENT READONLY PARA
sp_384_sqr_avx2_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov rax, rdx
push rcx
xor rcx, rcx
mov rdx, QWORD PTR [rax]
mov rsi, QWORD PTR [rax+8]
mov rbx, QWORD PTR [rax+16]
mov rbp, QWORD PTR [rax+24]
; Diagonal 0
; A[1] * A[0]
mulx r11, r10, QWORD PTR [rax+8]
; A[2] * A[0]
mulx r12, r8, QWORD PTR [rax+16]
adcx r11, r8
; A[3] * A[0]
mulx r13, r8, QWORD PTR [rax+24]
adcx r12, r8
; A[4] * A[0]
mulx r14, r8, QWORD PTR [rax+32]
adcx r13, r8
; A[5] * A[0]
mulx r15, r8, QWORD PTR [rax+40]
adcx r14, r8
adcx r15, rcx
; Diagonal 1
mov rdx, rsi
; A[2] * A[1]
mulx r9, r8, QWORD PTR [rax+16]
adcx r12, r8
adox r13, r9
; A[3] * A[1]
mulx r9, r8, QWORD PTR [rax+24]
adcx r13, r8
adox r14, r9
; A[4] * A[1]
mulx r9, r8, QWORD PTR [rax+32]
adcx r14, r8
adox r15, r9
; A[5] * A[1]
mulx rdi, r8, QWORD PTR [rax+40]
adcx r15, r8
adox rdi, rcx
mov rdx, rbx
; A[5] * A[2]
mulx rsi, r8, QWORD PTR [rax+40]
adcx rdi, r8
adox rsi, rcx
adcx rsi, rcx
adcx rbx, rcx
; Diagonal 2
; A[3] * A[2]
mulx r9, r8, QWORD PTR [rax+24]
adcx r14, r8
adox r15, r9
; A[4] * A[2]
mulx r9, r8, QWORD PTR [rax+32]
adcx r15, r8
adox rdi, r9
mov rdx, rbp
; A[4] * A[3]
mulx r9, r8, QWORD PTR [rax+32]
adcx rdi, r8
adox rsi, r9
; A[5] * A[3]
mulx rbx, r8, QWORD PTR [rax+40]
adcx rsi, r8
adox rbx, rcx
mov rdx, QWORD PTR [rax+32]
; A[5] * A[4]
mulx rbp, r8, QWORD PTR [rax+40]
adcx rbx, r8
adox rbp, rcx
adcx rbp, rcx
adcx rcx, rcx
; Doubling previous result as we add in square words results
; A[0] * A[0]
mov rdx, QWORD PTR [rax]
mulx r9, r8, rdx
pop rdx
mov QWORD PTR [rdx], r8
adox r10, r10
push rdx
adcx r10, r9
; A[1] * A[1]
mov rdx, QWORD PTR [rax+8]
mulx r9, r8, rdx
adox r11, r11
adcx r11, r8
adox r12, r12
adcx r12, r9
; A[2] * A[2]
mov rdx, QWORD PTR [rax+16]
mulx r9, r8, rdx
adox r13, r13
adcx r13, r8
adox r14, r14
adcx r14, r9
; A[3] * A[3]
mov rdx, QWORD PTR [rax+24]
mulx r9, r8, rdx
adox r15, r15
adcx r15, r8
adox rdi, rdi
adcx rdi, r9
; A[4] * A[4]
mov rdx, QWORD PTR [rax+32]
mulx r9, r8, rdx
adox rsi, rsi
adcx rsi, r8
adox rbx, rbx
adcx rbx, r9
; A[5] * A[5]
mov rdx, QWORD PTR [rax+40]
mulx r9, r8, rdx
adox rbp, rbp
adcx rbp, r8
adcx r9, rcx
mov r8, 0
adox r9, r8
pop rcx
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov QWORD PTR [rcx+32], r13
mov QWORD PTR [rcx+40], r14
mov QWORD PTR [rcx+48], r15
mov QWORD PTR [rcx+56], rdi
mov QWORD PTR [rcx+64], rsi
mov QWORD PTR [rcx+72], rbx
mov QWORD PTR [rcx+80], rbp
mov QWORD PTR [rcx+88], r9
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_sqr_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_384_cond_sub_avx2_6 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov QWORD PTR [rcx+40], r12
sbb rax, 0
pop r12
ret
sp_384_cond_sub_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_div2_avx2_6 PROC
push r12
push r13
mov r13, QWORD PTR [rdx]
xor r12, r12
mov r10, r13
and r13, 1
neg r13
mov rax, QWORD PTR [r8]
mov r9, QWORD PTR [r8+8]
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
pext rax, rax, r13
pext r9, r9, r13
add r10, rax
adc r11, r9
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov rax, QWORD PTR [r8+16]
mov r9, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [r8+32]
mov r9, QWORD PTR [r8+40]
mov r10, QWORD PTR [rdx+32]
mov r11, QWORD PTR [rdx+40]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
adc r12, 0
mov r10, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+8]
shrd r10, r11, 1
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rcx+16]
shrd r11, r10, 1
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rcx+24]
shrd r10, r11, 1
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rcx+32]
shrd r11, r10, 1
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rcx+40]
shrd r10, r11, 1
mov QWORD PTR [rcx+32], r10
shrd r11, r12, 1
mov QWORD PTR [rcx+40], r11
pop r13
pop r12
ret
sp_384_div2_avx2_6 ENDP
_text ENDS
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_64_6 PROC
mov rax, 1
movd xmm13, r8d
add rdx, 96
movd xmm15, eax
mov rax, 63
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_384_get_entry_64_6_start:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, [rdx]
movdqu xmm7, [rdx+16]
movdqu xmm8, [rdx+32]
movdqu xmm9, [rdx+48]
movdqu xmm10, [rdx+64]
movdqu xmm11, [rdx+80]
add rdx, 96
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_384_get_entry_64_6_start
movdqu [rcx], xmm0
movdqu [rcx+16], xmm1
movdqu [rcx+32], xmm2
movdqu [rcx+96], xmm3
movdqu [rcx+112], xmm4
movdqu [rcx+128], xmm5
ret
sp_384_get_entry_64_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_64_avx2_6 PROC
mov rax, 1
movd xmm9, r8d
add rdx, 96
movd xmm11, eax
mov rax, 64
vpxor ymm10, ymm10, ymm10
vpermd ymm9, ymm10, ymm9
vpermd ymm11, ymm10, ymm11
vpxor ymm0, ymm0, ymm0
vpxor xmm1, xmm1, xmm1
vpxor ymm2, ymm2, ymm2
vpxor xmm3, xmm3, xmm3
vmovdqa ymm10, ymm11
L_384_get_entry_64_avx2_6_start:
vpcmpeqd ymm8, ymm10, ymm9
vpaddd ymm10, ymm10, ymm11
vmovupd ymm4, [rdx]
vmovdqu xmm5, OWORD PTR [rdx+32]
vmovupd ymm6, [rdx+48]
vmovdqu xmm7, OWORD PTR [rdx+80]
add rdx, 96
vpand ymm4, ymm4, ymm8
vpand xmm5, xmm5, xmm8
vpand ymm6, ymm6, ymm8
vpand xmm7, xmm7, xmm8
vpor ymm0, ymm0, ymm4
vpor xmm1, xmm1, xmm5
vpor ymm2, ymm2, ymm6
vpor xmm3, xmm3, xmm7
dec rax
jnz L_384_get_entry_64_avx2_6_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovdqu [rcx+32], xmm1
vmovupd YMMWORD PTR [rcx+96], ymm2
vmovdqu [rcx+128], xmm3
ret
sp_384_get_entry_64_avx2_6 ENDP
_text ENDS
ENDIF
ENDIF
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_65_6 PROC
mov rax, 1
movd xmm13, r8d
add rdx, 96
movd xmm15, eax
mov rax, 64
pshufd xmm15, xmm15, 0
pshufd xmm13, xmm13, 0
pxor xmm14, xmm14
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
pxor xmm5, xmm5
movdqa xmm14, xmm15
L_384_get_entry_65_6_start:
movdqa xmm12, xmm14
paddd xmm14, xmm15
pcmpeqd xmm12, xmm13
movdqu xmm6, [rdx]
movdqu xmm7, [rdx+16]
movdqu xmm8, [rdx+32]
movdqu xmm9, [rdx+48]
movdqu xmm10, [rdx+64]
movdqu xmm11, [rdx+80]
add rdx, 96
pand xmm6, xmm12
pand xmm7, xmm12
pand xmm8, xmm12
pand xmm9, xmm12
pand xmm10, xmm12
pand xmm11, xmm12
por xmm0, xmm6
por xmm1, xmm7
por xmm2, xmm8
por xmm3, xmm9
por xmm4, xmm10
por xmm5, xmm11
dec rax
jnz L_384_get_entry_65_6_start
movdqu [rcx], xmm0
movdqu [rcx+16], xmm1
movdqu [rcx+32], xmm2
movdqu [rcx+96], xmm3
movdqu [rcx+112], xmm4
movdqu [rcx+128], xmm5
ret
sp_384_get_entry_65_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Touch each possible entry that could be being copied.
; *
; * r Point to copy into.
; * table Table - start of the entires to access
; * idx Index of entry to retrieve.
; */
_text SEGMENT READONLY PARA
sp_384_get_entry_65_avx2_6 PROC
mov rax, 1
movd xmm9, r8d
add rdx, 96
movd xmm11, eax
mov rax, 65
vpxor ymm10, ymm10, ymm10
vpermd ymm9, ymm10, ymm9
vpermd ymm11, ymm10, ymm11
vpxor ymm0, ymm0, ymm0
vpxor xmm1, xmm1, xmm1
vpxor ymm2, ymm2, ymm2
vpxor xmm3, xmm3, xmm3
vmovdqa ymm10, ymm11
L_384_get_entry_65_avx2_6_start:
vpcmpeqd ymm8, ymm10, ymm9
vpaddd ymm10, ymm10, ymm11
vmovupd ymm4, [rdx]
vmovdqu xmm5, OWORD PTR [rdx+32]
vmovupd ymm6, [rdx+48]
vmovdqu xmm7, OWORD PTR [rdx+80]
add rdx, 96
vpand ymm4, ymm4, ymm8
vpand xmm5, xmm5, xmm8
vpand ymm6, ymm6, ymm8
vpand xmm7, xmm7, xmm8
vpor ymm0, ymm0, ymm4
vpor xmm1, xmm1, xmm5
vpor ymm2, ymm2, ymm6
vpor xmm3, xmm3, xmm7
dec rax
jnz L_384_get_entry_65_avx2_6_start
vmovupd YMMWORD PTR [rcx], ymm0
vmovdqu [rcx+32], xmm1
vmovupd YMMWORD PTR [rcx+96], ymm2
vmovdqu [rcx+128], xmm3
ret
sp_384_get_entry_65_avx2_6 ENDP
_text ENDS
ENDIF
ENDIF
; /* Add 1 to a. (a = a + 1)
; *
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_add_one_6 PROC
add QWORD PTR [rcx], 1
adc QWORD PTR [rcx+8], 0
adc QWORD PTR [rcx+16], 0
adc QWORD PTR [rcx+24], 0
adc QWORD PTR [rcx+32], 0
adc QWORD PTR [rcx+40], 0
ret
sp_384_add_one_6 ENDP
_text ENDS
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_384_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 48
xor r13, r13
jmp L_384_from_bin_bswap_64_end
L_384_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_384_from_bin_bswap_64_end:
cmp r9, 63
jg L_384_from_bin_bswap_64_start
jmp L_384_from_bin_bswap_8_end
L_384_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_384_from_bin_bswap_8_end:
cmp r9, 7
jg L_384_from_bin_bswap_8_start
cmp r9, r13
je L_384_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_384_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_384_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_384_from_bin_bswap_hi_end:
cmp rcx, r12
je L_384_from_bin_bswap_zero_end
L_384_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_384_from_bin_bswap_zero_start
L_384_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_384_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_384_from_bin_movbe PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 48
xor r13, r13
jmp L_384_from_bin_movbe_64_end
L_384_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_384_from_bin_movbe_64_end:
cmp r9, 63
jg L_384_from_bin_movbe_64_start
jmp L_384_from_bin_movbe_8_end
L_384_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_384_from_bin_movbe_8_end:
cmp r9, 7
jg L_384_from_bin_movbe_8_start
cmp r9, r13
je L_384_from_bin_movbe_hi_end
mov r10, r13
mov rax, r13
L_384_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_384_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_384_from_bin_movbe_hi_end:
cmp rcx, r12
je L_384_from_bin_movbe_zero_end
L_384_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_384_from_bin_movbe_zero_start
L_384_from_bin_movbe_zero_end:
pop r13
pop r12
ret
sp_384_from_bin_movbe ENDP
_text ENDS
ENDIF
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 48
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_384_to_bin_bswap_6 PROC
mov rax, QWORD PTR [rcx+40]
mov r8, QWORD PTR [rcx+32]
bswap rax
bswap r8
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
mov rax, QWORD PTR [rcx+24]
mov r8, QWORD PTR [rcx+16]
bswap rax
bswap r8
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
mov rax, QWORD PTR [rcx+8]
mov r8, QWORD PTR [rcx]
bswap rax
bswap r8
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
ret
sp_384_to_bin_bswap_6 ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Write r as big endian to byte array.
; * Fixed length number of bytes written: 48
; * Uses the movbe instruction which is optional.
; *
; * r A single precision integer.
; * a Byte array.
; */
_text SEGMENT READONLY PARA
sp_384_to_bin_movbe_6 PROC
movbe rax, QWORD PTR [rcx+40]
movbe r8, QWORD PTR [rcx+32]
mov QWORD PTR [rdx], rax
mov QWORD PTR [rdx+8], r8
movbe rax, QWORD PTR [rcx+24]
movbe r8, QWORD PTR [rcx+16]
mov QWORD PTR [rdx+16], rax
mov QWORD PTR [rdx+24], r8
movbe rax, QWORD PTR [rcx+8]
movbe r8, QWORD PTR [rcx]
mov QWORD PTR [rdx+32], rax
mov QWORD PTR [rdx+40], r8
ret
sp_384_to_bin_movbe_6 ENDP
_text ENDS
ENDIF
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_384_sub_in_place_6 PROC
xor rax, rax
mov r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
sub QWORD PTR [rcx], r8
sbb QWORD PTR [rcx+8], r9
sbb QWORD PTR [rcx+16], r10
sbb QWORD PTR [rcx+24], r11
sbb QWORD PTR [rcx+32], r12
sbb QWORD PTR [rcx+40], r13
sbb rax, 0
pop r13
pop r12
ret
sp_384_sub_in_place_6 ENDP
_text ENDS
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_384_mul_d_6 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r10
pop r12
ret
sp_384_mul_d_6 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_384_mul_d_avx2_6 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+40], r12
mov QWORD PTR [rcx+48], r11
pop r13
pop r12
ret
sp_384_mul_d_avx2_6 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_384_word_asm_6 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_384_word_asm_6 ENDP
_text ENDS
ENDIF
; /* Shift number right by 1 bit. (r = a >> 1)
; *
; * r Result of right shift by 1.
; * a Number to shift.
; */
_text SEGMENT READONLY PARA
sp_384_rshift1_6 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
shrd rax, r8, 1
shrd r8, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shr r12, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r8
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop r12
ret
sp_384_rshift1_6 ENDP
_text ENDS
; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus
; */
_text SEGMENT READONLY PARA
sp_384_div2_mod_6 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
mov r14, QWORD PTR [r8]
mov r15, QWORD PTR [r8+8]
mov rdi, QWORD PTR [r8+16]
mov rsi, QWORD PTR [r8+24]
mov rbx, QWORD PTR [r8+32]
mov rbp, QWORD PTR [r8+40]
mov r8, rax
and r8, 1
je L_384_mod_inv_6_div2_mod_no_add
add rax, r14
adc r9, r15
adc r10, rdi
adc r11, rsi
adc r12, rbx
adc r13, rbp
mov r8, 0
adc r8, 0
L_384_mod_inv_6_div2_mod_no_add:
shrd rax, r9, 1
shrd r9, r10, 1
shrd r10, r11, 1
shrd r11, r12, 1
shrd r12, r13, 1
shrd r13, r8, 1
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_div2_mod_6 ENDP
_text ENDS
_text SEGMENT READONLY PARA
sp_384_num_bits_6 PROC
xor rax, rax
mov rdx, QWORD PTR [rcx+40]
cmp rdx, 0
je L_384_num_bits_6_end_320
mov rax, -1
bsr rax, rdx
add rax, 321
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_320:
mov rdx, QWORD PTR [rcx+32]
cmp rdx, 0
je L_384_num_bits_6_end_256
mov rax, -1
bsr rax, rdx
add rax, 257
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_256:
mov rdx, QWORD PTR [rcx+24]
cmp rdx, 0
je L_384_num_bits_6_end_192
mov rax, -1
bsr rax, rdx
add rax, 193
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_192:
mov rdx, QWORD PTR [rcx+16]
cmp rdx, 0
je L_384_num_bits_6_end_128
mov rax, -1
bsr rax, rdx
add rax, 129
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_128:
mov rdx, QWORD PTR [rcx+8]
cmp rdx, 0
je L_384_num_bits_6_end_64
mov rax, -1
bsr rax, rdx
add rax, 65
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_64:
mov rdx, QWORD PTR [rcx]
cmp rdx, 0
je L_384_num_bits_6_end_0
mov rax, -1
bsr rax, rdx
add rax, 1
jmp L_384_num_bits_6_done
L_384_num_bits_6_end_0:
L_384_num_bits_6_done:
ret
sp_384_num_bits_6 ENDP
_text ENDS
ENDIF
IFDEF WOLFSSL_SP_1024
; /* Multiply a and b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_16 PROC
push r12
mov r9, rdx
sub rsp, 128
; A[0] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9]
xor r12, r12
mov QWORD PTR [rsp], rax
mov r11, rdx
; A[0] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+8], r11
; A[0] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+16], r12
; A[0] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+24], r10
; A[0] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+32], r11
; A[0] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+40], r12
; A[0] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+48], r10
; A[0] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+56], r11
; A[0] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+64], r12
; A[0] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+72], r10
; A[0] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+80], r11
; A[0] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+88], r12
; A[0] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+96], r10
; A[0] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[1] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+8]
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rsp+104], r11
; A[0] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[1] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+8]
add r12, rax
adc r10, rdx
adc r11, 0
; A[2] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+16]
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+112], r12
; A[0] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[1] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+8]
add r10, rax
adc r11, rdx
adc r12, 0
; A[2] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+16]
add r10, rax
adc r11, rdx
adc r12, 0
; A[3] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+24]
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[0]
mov rax, QWORD PTR [r8]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rsp+120], r10
; A[1] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+8]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[2] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+16]
add r11, rax
adc r12, rdx
adc r10, 0
; A[3] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+24]
add r11, rax
adc r12, rdx
adc r10, 0
; A[4] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+32]
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+128], r11
; A[2] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+16]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[3] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+24]
add r12, rax
adc r10, rdx
adc r11, 0
; A[4] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+32]
add r12, rax
adc r10, rdx
adc r11, 0
; A[5] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+40]
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+136], r12
; A[3] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+24]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[4] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+32]
add r10, rax
adc r11, rdx
adc r12, 0
; A[5] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+40]
add r10, rax
adc r11, rdx
adc r12, 0
; A[6] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+48]
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+144], r10
; A[4] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+32]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[5] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+40]
add r11, rax
adc r12, rdx
adc r10, 0
; A[6] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+48]
add r11, rax
adc r12, rdx
adc r10, 0
; A[7] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+56]
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+152], r11
; A[5] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+40]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[6] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+48]
add r12, rax
adc r10, rdx
adc r11, 0
; A[7] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+56]
add r12, rax
adc r10, rdx
adc r11, 0
; A[8] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+64]
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+160], r12
; A[6] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+48]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[7] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+56]
add r10, rax
adc r11, rdx
adc r12, 0
; A[8] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+64]
add r10, rax
adc r11, rdx
adc r12, 0
; A[9] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+72]
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+168], r10
; A[7] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+56]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[8] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+64]
add r11, rax
adc r12, rdx
adc r10, 0
; A[9] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+72]
add r11, rax
adc r12, rdx
adc r10, 0
; A[10] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+80]
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+176], r11
; A[8] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+64]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[9] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+72]
add r12, rax
adc r10, rdx
adc r11, 0
; A[10] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+80]
add r12, rax
adc r10, rdx
adc r11, 0
; A[11] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+88]
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+184], r12
; A[9] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+72]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[10] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+80]
add r10, rax
adc r11, rdx
adc r12, 0
; A[11] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+88]
add r10, rax
adc r11, rdx
adc r12, 0
; A[12] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+96]
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+192], r10
; A[10] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+80]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[11] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+88]
add r11, rax
adc r12, rdx
adc r10, 0
; A[12] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+96]
add r11, rax
adc r12, rdx
adc r10, 0
; A[13] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+104]
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+200], r11
; A[11] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+88]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[12] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+96]
add r12, rax
adc r10, rdx
adc r11, 0
; A[13] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+104]
add r12, rax
adc r10, rdx
adc r11, 0
; A[14] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+112]
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+208], r12
; A[12] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+96]
xor r12, r12
add r10, rax
adc r11, rdx
adc r12, 0
; A[13] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+104]
add r10, rax
adc r11, rdx
adc r12, 0
; A[14] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+112]
add r10, rax
adc r11, rdx
adc r12, 0
; A[15] * B[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
adc r12, 0
mov QWORD PTR [rcx+216], r10
; A[13] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+104]
xor r10, r10
add r11, rax
adc r12, rdx
adc r10, 0
; A[14] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+112]
add r11, rax
adc r12, rdx
adc r10, 0
; A[15] * B[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r9+120]
add r11, rax
adc r12, rdx
adc r10, 0
mov QWORD PTR [rcx+224], r11
; A[14] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+112]
xor r11, r11
add r12, rax
adc r10, rdx
adc r11, 0
; A[15] * B[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r9+120]
add r12, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+232], r12
; A[15] * B[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+240], r10
mov QWORD PTR [rcx+248], r11
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r10, QWORD PTR [rsp+16]
mov r11, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r10, QWORD PTR [rsp+48]
mov r11, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r10, QWORD PTR [rsp+80]
mov r11, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r10, QWORD PTR [rsp+112]
mov r11, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
add rsp, 128
pop r12
ret
sp_1024_mul_16 ENDP
_text ENDS
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sqr_16 PROC
push r12
push r13
push r14
mov r8, rdx
sub rsp, 128
; A[0] * A[0]
mov rax, QWORD PTR [r8]
mul rax
xor r11, r11
mov QWORD PTR [rsp], rax
mov r10, rdx
; A[0] * A[1]
mov rax, QWORD PTR [r8+8]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+8], r10
; A[0] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[1] * A[1]
mov rax, QWORD PTR [r8+8]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rsp+16], r11
; A[0] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[1] * A[2]
mov rax, QWORD PTR [r8+16]
mul QWORD PTR [r8+8]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rsp+24], r9
; A[0] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[1] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+8]
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[2] * A[2]
mov rax, QWORD PTR [r8+16]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rsp+32], r10
; A[0] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[3]
mov rax, QWORD PTR [r8+24]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+40], r11
; A[0] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[3]
mov rax, QWORD PTR [r8+24]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+48], r9
; A[0] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[4]
mov rax, QWORD PTR [r8+32]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+56], r10
; A[0] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[4]
mov rax, QWORD PTR [r8+32]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+64], r11
; A[0] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[5]
mov rax, QWORD PTR [r8+40]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+72], r9
; A[0] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[5]
mov rax, QWORD PTR [r8+40]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+80], r10
; A[0] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[6]
mov rax, QWORD PTR [r8+48]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+88], r11
; A[0] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[6]
mov rax, QWORD PTR [r8+48]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+96], r9
; A[0] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[7]
mov rax, QWORD PTR [r8+56]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rsp+104], r10
; A[0] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[7]
mov rax, QWORD PTR [r8+56]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rsp+112], r11
; A[0] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[1] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+8]
add r12, rax
adc r13, rdx
adc r14, 0
; A[2] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[8]
mov rax, QWORD PTR [r8+64]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rsp+120], r9
; A[1] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+8]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[2] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+16]
add r12, rax
adc r13, rdx
adc r14, 0
; A[3] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[8]
mov rax, QWORD PTR [r8+64]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+128], r10
; A[2] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+16]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[3] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+24]
add r12, rax
adc r13, rdx
adc r14, 0
; A[4] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[9]
mov rax, QWORD PTR [r8+72]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+136], r11
; A[3] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+24]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[4] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+32]
add r12, rax
adc r13, rdx
adc r14, 0
; A[5] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[9]
mov rax, QWORD PTR [r8+72]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+144], r9
; A[4] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+32]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[5] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+40]
add r12, rax
adc r13, rdx
adc r14, 0
; A[6] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[10]
mov rax, QWORD PTR [r8+80]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+152], r10
; A[5] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+40]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[6] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+48]
add r12, rax
adc r13, rdx
adc r14, 0
; A[7] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[10]
mov rax, QWORD PTR [r8+80]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+160], r11
; A[6] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+48]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[7] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+56]
add r12, rax
adc r13, rdx
adc r14, 0
; A[8] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[11]
mov rax, QWORD PTR [r8+88]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+168], r9
; A[7] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+56]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[8] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+64]
add r12, rax
adc r13, rdx
adc r14, 0
; A[9] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[11]
mov rax, QWORD PTR [r8+88]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+176], r10
; A[8] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+64]
xor r10, r10
xor r14, r14
mov r12, rax
mov r13, rdx
; A[9] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+72]
add r12, rax
adc r13, rdx
adc r14, 0
; A[10] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[12]
mov rax, QWORD PTR [r8+96]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r11, r12
adc r9, r13
adc r10, r14
mov QWORD PTR [rcx+184], r11
; A[9] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+72]
xor r11, r11
xor r14, r14
mov r12, rax
mov r13, rdx
; A[10] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+80]
add r12, rax
adc r13, rdx
adc r14, 0
; A[11] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[12]
mov rax, QWORD PTR [r8+96]
mul rax
add r12, r12
adc r13, r13
adc r14, r14
add r12, rax
adc r13, rdx
adc r14, 0
add r9, r12
adc r10, r13
adc r11, r14
mov QWORD PTR [rcx+192], r9
; A[10] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+80]
xor r9, r9
xor r14, r14
mov r12, rax
mov r13, rdx
; A[11] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+88]
add r12, rax
adc r13, rdx
adc r14, 0
; A[12] * A[13]
mov rax, QWORD PTR [r8+104]
mul QWORD PTR [r8+96]
add r12, rax
adc r13, rdx
adc r14, 0
add r12, r12
adc r13, r13
adc r14, r14
add r10, r12
adc r11, r13
adc r9, r14
mov QWORD PTR [rcx+200], r10
; A[11] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+88]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[12] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+96]
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
; A[13] * A[13]
mov rax, QWORD PTR [r8+104]
mul rax
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+208], r11
; A[12] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+96]
xor r11, r11
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
; A[13] * A[14]
mov rax, QWORD PTR [r8+112]
mul QWORD PTR [r8+104]
add r9, rax
adc r10, rdx
adc r11, 0
add r9, rax
adc r10, rdx
adc r11, 0
mov QWORD PTR [rcx+216], r9
; A[13] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+104]
xor r9, r9
add r10, rax
adc r11, rdx
adc r9, 0
add r10, rax
adc r11, rdx
adc r9, 0
; A[14] * A[14]
mov rax, QWORD PTR [r8+112]
mul rax
add r10, rax
adc r11, rdx
adc r9, 0
mov QWORD PTR [rcx+224], r10
; A[14] * A[15]
mov rax, QWORD PTR [r8+120]
mul QWORD PTR [r8+112]
xor r10, r10
add r11, rax
adc r9, rdx
adc r10, 0
add r11, rax
adc r9, rdx
adc r10, 0
mov QWORD PTR [rcx+232], r11
; A[15] * A[15]
mov rax, QWORD PTR [r8+120]
mul rax
add r9, rax
adc r10, rdx
mov QWORD PTR [rcx+240], r9
mov QWORD PTR [rcx+248], r10
mov rax, QWORD PTR [rsp]
mov rdx, QWORD PTR [rsp+8]
mov r12, QWORD PTR [rsp+16]
mov r13, QWORD PTR [rsp+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], rdx
mov QWORD PTR [rcx+16], r12
mov QWORD PTR [rcx+24], r13
mov rax, QWORD PTR [rsp+32]
mov rdx, QWORD PTR [rsp+40]
mov r12, QWORD PTR [rsp+48]
mov r13, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], rdx
mov QWORD PTR [rcx+48], r12
mov QWORD PTR [rcx+56], r13
mov rax, QWORD PTR [rsp+64]
mov rdx, QWORD PTR [rsp+72]
mov r12, QWORD PTR [rsp+80]
mov r13, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], rdx
mov QWORD PTR [rcx+80], r12
mov QWORD PTR [rcx+88], r13
mov rax, QWORD PTR [rsp+96]
mov rdx, QWORD PTR [rsp+104]
mov r12, QWORD PTR [rsp+112]
mov r13, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], rdx
mov QWORD PTR [rcx+112], r12
mov QWORD PTR [rcx+120], r13
add rsp, 128
pop r14
pop r13
pop r12
ret
sp_1024_sqr_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Multiply a and b into r. (r = a * b)
; *
; * r Result of multiplication.
; * a First number to multiply.
; * b Second number to multiply.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_avx2_16 PROC
push rbx
push rbp
push r12
push r13
push r14
push r15
push rdi
mov rbp, r8
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbx, rsp
cmovne rbx, r8
cmp rbp, r8
cmove rbx, rsp
add r8, 128
xor rdi, rdi
mov rdx, QWORD PTR [r9]
; A[0] * B[0]
mulx r11, r10, QWORD PTR [rbp]
; A[0] * B[1]
mulx r12, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx], r10
adcx r11, rax
; A[0] * B[2]
mulx r13, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
; A[0] * B[3]
mulx r14, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
mov QWORD PTR [rbx+24], r13
; A[0] * B[4]
mulx r10, rax, QWORD PTR [rbp+32]
adcx r14, rax
; A[0] * B[5]
mulx r11, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
; A[0] * B[6]
mulx r12, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
; A[0] * B[7]
mulx r13, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
mov QWORD PTR [rbx+56], r12
; A[0] * B[8]
mulx r14, rax, QWORD PTR [rbp+64]
adcx r13, rax
; A[0] * B[9]
mulx r10, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
; A[0] * B[10]
mulx r11, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
; A[0] * B[11]
mulx r12, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
mov QWORD PTR [rbx+88], r11
; A[0] * B[12]
mulx r13, rax, QWORD PTR [rbp+96]
adcx r12, rax
; A[0] * B[13]
mulx r14, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
; A[0] * B[14]
mulx r10, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
; A[0] * B[15]
mulx r11, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adcx r11, rdi
mov r15, rdi
adcx r15, rdi
mov QWORD PTR [rbx+120], r10
mov QWORD PTR [r8], r11
mov rdx, QWORD PTR [r9+8]
mov r11, QWORD PTR [rbx+8]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
; A[1] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[1] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+8], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[1] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+32], r14
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[1] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[1] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[1] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[1] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[1] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[1] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[1] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[1] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[1] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[1] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [rbx+120], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8], r11
mov QWORD PTR [r8+8], r12
mov rdx, QWORD PTR [r9+16]
mov r12, QWORD PTR [rbx+16]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
; A[2] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[2] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+16], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[2] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+40], r10
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[2] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[2] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[2] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[2] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[2] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[2] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[2] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[2] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[2] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[2] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r13
mov rdx, QWORD PTR [r9+24]
mov r13, QWORD PTR [rbx+24]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
; A[3] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[3] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+24], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[3] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+48], r11
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[3] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[3] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[3] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[3] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[3] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[3] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[3] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[3] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[3] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[3] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+8], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+16], r13
mov QWORD PTR [r8+24], r14
mov rdx, QWORD PTR [r9+32]
mov r14, QWORD PTR [rbx+32]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
; A[4] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[4] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+32], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[4] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+56], r12
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[4] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[4] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[4] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[4] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[4] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[4] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[4] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[4] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[4] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[4] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+16], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+24], r14
mov QWORD PTR [r8+32], r10
mov rdx, QWORD PTR [r9+40]
mov r10, QWORD PTR [rbx+40]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
; A[5] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[5] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+40], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[5] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+64], r13
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[5] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[5] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[5] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[5] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[5] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[5] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[5] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[5] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[5] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[5] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+24], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov rdx, QWORD PTR [r9+48]
mov r11, QWORD PTR [rbx+48]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
; A[6] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[6] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+48], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[6] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+72], r14
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[6] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[6] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[6] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[6] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[6] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[6] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[6] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[6] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[6] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[6] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+32], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov rdx, QWORD PTR [r9+56]
mov r12, QWORD PTR [rbx+56]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
; A[7] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[7] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+56], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[7] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+80], r10
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[7] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[7] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[7] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[7] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[7] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[7] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[7] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[7] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[7] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[7] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+40], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+48], r12
mov QWORD PTR [r8+56], r13
mov rdx, QWORD PTR [r9+64]
mov r13, QWORD PTR [rbx+64]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
; A[8] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[8] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+64], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[8] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbx+88], r11
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[8] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[8] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[8] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[8] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[8] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[8] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[8] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[8] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[8] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[8] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+48], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+56], r13
mov QWORD PTR [r8+64], r14
mov rdx, QWORD PTR [r9+72]
mov r14, QWORD PTR [rbx+72]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
; A[9] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[9] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+72], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[9] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [rbx+96], r12
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[9] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[9] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[9] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[9] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[9] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[9] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[9] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[9] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[9] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[9] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+56], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+64], r14
mov QWORD PTR [r8+72], r10
mov rdx, QWORD PTR [r9+80]
mov r10, QWORD PTR [rbx+80]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
; A[10] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[10] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+80], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[10] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [rbx+104], r13
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[10] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[10] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[10] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[10] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[10] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[10] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[10] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[10] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[10] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[10] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+64], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov rdx, QWORD PTR [r9+88]
mov r11, QWORD PTR [rbx+88]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
; A[11] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r11, rax
adox r12, rcx
; A[11] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+88], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[11] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [rbx+112], r14
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[11] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r10, rax
adox r11, rcx
; A[11] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[11] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[11] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r14, rax
adox r10, rcx
; A[11] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[11] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[11] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r13, rax
adox r14, rcx
; A[11] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[11] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[11] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+72], r10
mov r12, rdi
adcx r11, rax
adox r12, rcx
adcx r12, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+80], r11
mov QWORD PTR [r8+88], r12
mov rdx, QWORD PTR [r9+96]
mov r12, QWORD PTR [rbx+96]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
; A[12] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r12, rax
adox r13, rcx
; A[12] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+96], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[12] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbx+120], r10
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
; A[12] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r11, rax
adox r12, rcx
; A[12] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[12] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+24], r14
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
; A[12] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r10, rax
adox r11, rcx
; A[12] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[12] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+56], r13
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[12] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r14, rax
adox r10, rcx
; A[12] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[12] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[12] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+80], r11
mov r13, rdi
adcx r12, rax
adox r13, rcx
adcx r13, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+88], r12
mov QWORD PTR [r8+96], r13
mov rdx, QWORD PTR [r9+104]
mov r13, QWORD PTR [rbx+104]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[13] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r13, rax
adox r14, rcx
; A[13] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+104], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[13] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8], r11
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[13] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r12, rax
adox r13, rcx
; A[13] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[13] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[13] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r11, rax
adox r12, rcx
; A[13] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[13] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
mov QWORD PTR [r8+64], r14
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
; A[13] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r10, rax
adox r11, rcx
; A[13] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
; A[13] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[13] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+88], r12
mov r14, rdi
adcx r13, rax
adox r14, rcx
adcx r14, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+96], r13
mov QWORD PTR [r8+104], r14
mov rdx, QWORD PTR [r9+112]
mov r14, QWORD PTR [rbx+112]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
; A[14] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r14, rax
adox r10, rcx
; A[14] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+112], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[14] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+8], r12
mov r14, QWORD PTR [r8+24]
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
; A[14] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r13, rax
adox r14, rcx
; A[14] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+16], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[14] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r11
mov r13, QWORD PTR [r8+56]
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
; A[14] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r12, rax
adox r13, rcx
; A[14] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+48], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[14] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+72], r10
mov r12, QWORD PTR [r8+88]
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
; A[14] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r11, rax
adox r12, rcx
; A[14] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+80], r11
adcx r12, rax
adox r13, rcx
; A[14] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[14] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+96], r13
mov r10, rdi
adcx r14, rax
adox r10, rcx
adcx r10, r15
mov r15, rdi
adox r15, rdi
adcx r15, rdi
mov QWORD PTR [r8+104], r14
mov QWORD PTR [r8+112], r10
mov rdx, QWORD PTR [r9+120]
mov r10, QWORD PTR [rbx+120]
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
mov r13, QWORD PTR [r8+16]
mov r14, QWORD PTR [r8+24]
; A[15] * B[0]
mulx rcx, rax, QWORD PTR [rbp]
adcx r10, rax
adox r11, rcx
; A[15] * B[1]
mulx rcx, rax, QWORD PTR [rbp+8]
mov QWORD PTR [rbx+120], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[2]
mulx rcx, rax, QWORD PTR [rbp+16]
mov QWORD PTR [r8], r11
adcx r12, rax
adox r13, rcx
; A[15] * B[3]
mulx rcx, rax, QWORD PTR [rbp+24]
mov QWORD PTR [r8+8], r12
adcx r13, rax
adox r14, rcx
mov QWORD PTR [r8+16], r13
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov r12, QWORD PTR [r8+48]
mov r13, QWORD PTR [r8+56]
; A[15] * B[4]
mulx rcx, rax, QWORD PTR [rbp+32]
adcx r14, rax
adox r10, rcx
; A[15] * B[5]
mulx rcx, rax, QWORD PTR [rbp+40]
mov QWORD PTR [r8+24], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[6]
mulx rcx, rax, QWORD PTR [rbp+48]
mov QWORD PTR [r8+32], r10
adcx r11, rax
adox r12, rcx
; A[15] * B[7]
mulx rcx, rax, QWORD PTR [rbp+56]
mov QWORD PTR [r8+40], r11
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r8+48], r12
mov r14, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
mov r11, QWORD PTR [r8+80]
mov r12, QWORD PTR [r8+88]
; A[15] * B[8]
mulx rcx, rax, QWORD PTR [rbp+64]
adcx r13, rax
adox r14, rcx
; A[15] * B[9]
mulx rcx, rax, QWORD PTR [rbp+72]
mov QWORD PTR [r8+56], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[10]
mulx rcx, rax, QWORD PTR [rbp+80]
mov QWORD PTR [r8+64], r14
adcx r10, rax
adox r11, rcx
; A[15] * B[11]
mulx rcx, rax, QWORD PTR [rbp+88]
mov QWORD PTR [r8+72], r10
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+80], r11
mov r13, QWORD PTR [r8+96]
mov r14, QWORD PTR [r8+104]
mov r10, QWORD PTR [r8+112]
; A[15] * B[12]
mulx rcx, rax, QWORD PTR [rbp+96]
adcx r12, rax
adox r13, rcx
; A[15] * B[13]
mulx rcx, rax, QWORD PTR [rbp+104]
mov QWORD PTR [r8+88], r12
adcx r13, rax
adox r14, rcx
; A[15] * B[14]
mulx rcx, rax, QWORD PTR [rbp+112]
mov QWORD PTR [r8+96], r13
adcx r14, rax
adox r10, rcx
; A[15] * B[15]
mulx rcx, rax, QWORD PTR [rbp+120]
mov QWORD PTR [r8+104], r14
mov r11, rdi
adcx r10, rax
adox r11, rcx
adcx r11, r15
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
sub r8, 128
cmp r9, r8
je L_start_1024_mul_avx2_16
cmp rbp, r8
jne L_end_1024_mul_avx2_16
L_start_1024_mul_avx2_16:
vmovdqu xmm0, OWORD PTR [rbx]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbx+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbx+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbx+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbx+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbx+80]
vmovups OWORD PTR [r8+80], xmm0
vmovdqu xmm0, OWORD PTR [rbx+96]
vmovups OWORD PTR [r8+96], xmm0
vmovdqu xmm0, OWORD PTR [rbx+112]
vmovups OWORD PTR [r8+112], xmm0
L_end_1024_mul_avx2_16:
add rsp, 128
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
sp_1024_mul_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Square a and put result in r. (r = a * a)
; *
; * r A single precision integer.
; * a A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sqr_avx2_16 PROC
push rbp
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
mov r8, rcx
mov r9, rdx
sub rsp, 128
cmp r9, r8
mov rbp, rsp
cmovne rbp, r8
add r8, 128
xor r13, r13
; Diagonal 1
xor r12, r12
; A[1] x A[0]
mov rdx, QWORD PTR [r9]
mulx r11, r10, QWORD PTR [r9+8]
; A[2] x A[0]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+8], r10
mov QWORD PTR [rbp+16], r11
mov r10, r13
mov r11, r13
; A[3] x A[0]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r12, rax
adox r10, rcx
; A[4] x A[0]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+24], r12
mov QWORD PTR [rbp+32], r10
mov r12, r13
mov r10, r13
; A[5] x A[0]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, rcx
; A[6] x A[0]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+40], r11
mov QWORD PTR [rbp+48], r12
mov r11, r13
mov r12, r13
; A[7] x A[0]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
; A[8] x A[0]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
mov r10, r13
mov r11, r13
; A[9] x A[0]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
; A[10] x A[0]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
mov r12, r13
mov r10, r13
; A[11] x A[0]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
; A[12] x A[0]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+88], r11
mov r15, r12
mov r11, r13
mov r12, r13
; A[13] x A[0]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; A[14] x A[0]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r11, rax
adox r12, rcx
mov rdi, r10
mov rsi, r11
mov r10, r13
; A[15] x A[0]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov rbx, r12
; Carry
adcx r10, r13
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8], r10
; Diagonal 2
mov r10, QWORD PTR [rbp+24]
mov r11, QWORD PTR [rbp+32]
mov r12, QWORD PTR [rbp+40]
; A[2] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, QWORD PTR [r9+16]
adcx r10, rax
adox r11, rcx
; A[3] x A[1]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+24], r10
mov QWORD PTR [rbp+32], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[4] x A[1]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r12, rax
adox r10, rcx
; A[5] x A[1]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+40], r12
mov QWORD PTR [rbp+48], r10
mov r12, QWORD PTR [rbp+64]
mov r10, QWORD PTR [rbp+72]
; A[6] x A[1]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
; A[7] x A[1]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+56], r11
mov QWORD PTR [rbp+64], r12
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[8] x A[1]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
; A[9] x A[1]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; No load %r14 - %r9
; A[10] x A[1]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r15, rcx
; A[11] x A[1]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13
; No load %r15 - %r10
; No load %rbx - %r8
; A[12] x A[1]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rdi, rax
adox rsi, rcx
; A[13] x A[1]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r11, QWORD PTR [r8]
mov r12, r13
; A[14] x A[1]
mulx rcx, rax, QWORD PTR [r9+112]
adcx rbx, rax
adox r11, rcx
; A[15] x A[1]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r11, rax
adox r12, rcx
; No store %rbx
mov QWORD PTR [r8], r11
mov r10, r13
; A[15] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, QWORD PTR [r9+120]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+16], r10
; Diagonal 3
mov r10, QWORD PTR [rbp+40]
mov r11, QWORD PTR [rbp+48]
mov r12, QWORD PTR [rbp+56]
; A[3] x A[2]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
; A[4] x A[2]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+40], r10
mov QWORD PTR [rbp+48], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[5] x A[2]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
; A[6] x A[2]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+56], r12
mov QWORD PTR [rbp+64], r10
mov r12, QWORD PTR [rbp+80]
mov r10, QWORD PTR [rbp+88]
; A[7] x A[2]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[8] x A[2]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [rbp+72], r11
mov QWORD PTR [rbp+80], r12
; No load %r13 - %r9
; No load %r14 - %r10
; A[9] x A[2]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r15, rcx
; A[10] x A[2]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13
; No load %r15 - %r8
; No load %rbx - %r9
; A[11] x A[2]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rdi, rax
adox rsi, rcx
; A[12] x A[2]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[13] x A[2]
mulx rcx, rax, QWORD PTR [r9+104]
adcx rbx, rax
adox r12, rcx
; A[14] x A[2]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
; No store %rbx
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
mov r12, r13
; A[14] x A[3]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
; A[14] x A[4]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, r13
; A[14] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+32], r10
; Diagonal 4
mov r10, QWORD PTR [rbp+56]
mov r11, QWORD PTR [rbp+64]
mov r12, QWORD PTR [rbp+72]
; A[4] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r10, rax
adox r11, rcx
; A[5] x A[3]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+56], r10
mov QWORD PTR [rbp+64], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[6] x A[3]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r12, rax
adox r10, rcx
; A[7] x A[3]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [rbp+72], r12
mov QWORD PTR [rbp+80], r10
; No load %r13 - %r10
; No load %r14 - %r8
; A[8] x A[3]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r11, rax
adox r15, rcx
; A[9] x A[3]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r11
; No store %r13
; No load %r15 - %r9
; No load %rbx - %r10
; A[10] x A[3]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rdi, rax
adox rsi, rcx
; A[11] x A[3]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[12] x A[3]
mulx rcx, rax, QWORD PTR [r9+96]
adcx rbx, rax
adox r10, rcx
; A[13] x A[3]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r10, rax
adox r11, rcx
; No store %rbx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[13] x A[4]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
; A[13] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, r13
; A[13] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
; A[13] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, r13
; A[13] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+48], r10
; Diagonal 5
mov r10, QWORD PTR [rbp+72]
mov r11, QWORD PTR [rbp+80]
mov r12, QWORD PTR [rbp+88]
; A[5] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r10, rax
adox r11, rcx
; A[6] x A[4]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [rbp+72], r10
mov QWORD PTR [rbp+80], r11
; No load %r13 - %r8
; No load %r14 - %r9
; A[7] x A[4]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r15, rcx
; A[8] x A[4]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r12
; No store %r13
; No load %r15 - %r10
; No load %rbx - %r8
; A[9] x A[4]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rdi, rax
adox rsi, rcx
; A[10] x A[4]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[11] x A[4]
mulx rcx, rax, QWORD PTR [r9+88]
adcx rbx, rax
adox r11, rcx
; A[12] x A[4]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
; No store %rbx
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[12] x A[5]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
; A[12] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[12] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[12] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
mov r12, r13
; A[12] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
; A[12] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, r13
; A[12] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+56], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+64], r10
; Diagonal 6
mov r10, QWORD PTR [rbp+88]
; No load %r13 - %r9
; No load %r14 - %r10
; A[6] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r15, rcx
; A[7] x A[5]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r15, rax
adox rdi, rcx
mov QWORD PTR [rbp+88], r10
; No store %r13
; No load %r15 - %r8
; No load %rbx - %r9
; A[8] x A[5]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rdi, rax
adox rsi, rcx
; A[9] x A[5]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [r8+8]
; A[10] x A[5]
mulx rcx, rax, QWORD PTR [r9+80]
adcx rbx, rax
adox r12, rcx
; A[11] x A[5]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
; No store %rbx
mov QWORD PTR [r8], r12
mov r11, QWORD PTR [r8+16]
mov r12, QWORD PTR [r8+24]
; A[11] x A[6]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
; A[11] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+8], r10
mov QWORD PTR [r8+16], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[11] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
; A[11] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+24], r12
mov QWORD PTR [r8+32], r10
mov r12, QWORD PTR [r8+48]
mov r10, QWORD PTR [r8+56]
; A[11] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
; A[13] x A[9]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+40], r11
mov QWORD PTR [r8+48], r12
mov r11, QWORD PTR [r8+64]
mov r12, r13
; A[13] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; A[13] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+56], r10
mov QWORD PTR [r8+64], r11
mov r10, r13
; A[13] x A[12]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+72], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+80], r10
; Diagonal 7
; No load %r14 - %r8
; No load %r15 - %r9
; No load %rbx - %r10
; A[7] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, QWORD PTR [r9+56]
adcx rdi, rax
adox rsi, rcx
; A[8] x A[6]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rsi, rax
adox rbx, rcx
; No store %r14
; No store %r15
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[9] x A[6]
mulx rcx, rax, QWORD PTR [r9+72]
adcx rbx, rax
adox r10, rcx
; A[10] x A[6]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r10, rax
adox r11, rcx
; No store %rbx
mov QWORD PTR [r8], r10
mov r12, QWORD PTR [r8+16]
mov r10, QWORD PTR [r8+24]
; A[10] x A[7]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
; A[10] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+8], r11
mov QWORD PTR [r8+16], r12
mov r11, QWORD PTR [r8+32]
mov r12, QWORD PTR [r8+40]
; A[10] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
; A[14] x A[6]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+24], r10
mov QWORD PTR [r8+32], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[14] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r12, rax
adox r10, rcx
; A[14] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+40], r12
mov QWORD PTR [r8+48], r10
mov r12, QWORD PTR [r8+64]
mov r10, QWORD PTR [r8+72]
; A[14] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
; A[14] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+56], r11
mov QWORD PTR [r8+64], r12
mov r11, QWORD PTR [r8+80]
mov r12, r13
; A[14] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r10, rax
adox r11, rcx
; A[14] x A[12]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+72], r10
mov QWORD PTR [r8+80], r11
mov r10, r13
; A[14] x A[13]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+88], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+96], r10
; Diagonal 8
; No load %rbx - %r8
mov r11, QWORD PTR [r8]
mov r12, QWORD PTR [r8+8]
; A[8] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, QWORD PTR [r9+64]
adcx rbx, rax
adox r11, rcx
; A[9] x A[7]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r11, rax
adox r12, rcx
; No store %rbx
mov QWORD PTR [r8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r12, rax
adox r10, rcx
; A[15] x A[3]
mov rdx, QWORD PTR [r9+120]
mulx rcx, rax, QWORD PTR [r9+24]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+8], r12
mov QWORD PTR [r8+16], r10
mov r12, QWORD PTR [r8+32]
mov r10, QWORD PTR [r8+40]
; A[15] x A[4]
mulx rcx, rax, QWORD PTR [r9+32]
adcx r11, rax
adox r12, rcx
; A[15] x A[5]
mulx rcx, rax, QWORD PTR [r9+40]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+24], r11
mov QWORD PTR [r8+32], r12
mov r11, QWORD PTR [r8+48]
mov r12, QWORD PTR [r8+56]
; A[15] x A[6]
mulx rcx, rax, QWORD PTR [r9+48]
adcx r10, rax
adox r11, rcx
; A[15] x A[7]
mulx rcx, rax, QWORD PTR [r9+56]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+40], r10
mov QWORD PTR [r8+48], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[15] x A[8]
mulx rcx, rax, QWORD PTR [r9+64]
adcx r12, rax
adox r10, rcx
; A[15] x A[9]
mulx rcx, rax, QWORD PTR [r9+72]
adcx r10, rax
adox r11, rcx
mov QWORD PTR [r8+56], r12
mov QWORD PTR [r8+64], r10
mov r12, QWORD PTR [r8+80]
mov r10, QWORD PTR [r8+88]
; A[15] x A[10]
mulx rcx, rax, QWORD PTR [r9+80]
adcx r11, rax
adox r12, rcx
; A[15] x A[11]
mulx rcx, rax, QWORD PTR [r9+88]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+72], r11
mov QWORD PTR [r8+80], r12
mov r11, QWORD PTR [r8+96]
mov r12, r13
; A[15] x A[12]
mulx rcx, rax, QWORD PTR [r9+96]
adcx r10, rax
adox r11, rcx
; A[15] x A[13]
mulx rcx, rax, QWORD PTR [r9+104]
adcx r11, rax
adox r12, rcx
mov QWORD PTR [r8+88], r10
mov QWORD PTR [r8+96], r11
mov r10, r13
; A[15] x A[14]
mulx rcx, rax, QWORD PTR [r9+112]
adcx r12, rax
adox r10, rcx
mov QWORD PTR [r8+104], r12
; Carry
adcx r10, r14
mov r14, r13
adcx r14, r13
adox r14, r13
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r14
; Double and Add in A[i] x A[i]
mov r11, QWORD PTR [rbp+8]
; A[0] x A[0]
mov rdx, QWORD PTR [r9]
mulx rcx, rax, rdx
mov QWORD PTR [rbp], rax
adox r11, r11
adcx r11, rcx
mov QWORD PTR [rbp+8], r11
mov r10, QWORD PTR [rbp+16]
mov r11, QWORD PTR [rbp+24]
; A[1] x A[1]
mov rdx, QWORD PTR [r9+8]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+16], r10
mov QWORD PTR [rbp+24], r11
mov r10, QWORD PTR [rbp+32]
mov r11, QWORD PTR [rbp+40]
; A[2] x A[2]
mov rdx, QWORD PTR [r9+16]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+32], r10
mov QWORD PTR [rbp+40], r11
mov r10, QWORD PTR [rbp+48]
mov r11, QWORD PTR [rbp+56]
; A[3] x A[3]
mov rdx, QWORD PTR [r9+24]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+48], r10
mov QWORD PTR [rbp+56], r11
mov r10, QWORD PTR [rbp+64]
mov r11, QWORD PTR [rbp+72]
; A[4] x A[4]
mov rdx, QWORD PTR [r9+32]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+64], r10
mov QWORD PTR [rbp+72], r11
mov r10, QWORD PTR [rbp+80]
mov r11, QWORD PTR [rbp+88]
; A[5] x A[5]
mov rdx, QWORD PTR [r9+40]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [rbp+80], r10
mov QWORD PTR [rbp+88], r11
; A[6] x A[6]
mov rdx, QWORD PTR [r9+48]
mulx rcx, rax, rdx
adox r15, r15
adox rdi, rdi
adcx r15, rax
adcx rdi, rcx
; A[7] x A[7]
mov rdx, QWORD PTR [r9+56]
mulx rcx, rax, rdx
adox rsi, rsi
adox rbx, rbx
adcx rsi, rax
adcx rbx, rcx
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
; A[8] x A[8]
mov rdx, QWORD PTR [r9+64]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8], r10
mov QWORD PTR [r8+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
; A[9] x A[9]
mov rdx, QWORD PTR [r9+72]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+16], r10
mov QWORD PTR [r8+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
; A[10] x A[10]
mov rdx, QWORD PTR [r9+80]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+32], r10
mov QWORD PTR [r8+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
; A[11] x A[11]
mov rdx, QWORD PTR [r9+88]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+48], r10
mov QWORD PTR [r8+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
; A[12] x A[12]
mov rdx, QWORD PTR [r9+96]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+64], r10
mov QWORD PTR [r8+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
; A[13] x A[13]
mov rdx, QWORD PTR [r9+104]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+80], r10
mov QWORD PTR [r8+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
; A[14] x A[14]
mov rdx, QWORD PTR [r9+112]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+96], r10
mov QWORD PTR [r8+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
; A[15] x A[15]
mov rdx, QWORD PTR [r9+120]
mulx rcx, rax, rdx
adox r10, r10
adox r11, r11
adcx r10, rax
adcx r11, rcx
mov QWORD PTR [r8+112], r10
mov QWORD PTR [r8+120], r11
mov QWORD PTR [r8+-32], r15
mov QWORD PTR [r8+-24], rdi
mov QWORD PTR [r8+-16], rsi
mov QWORD PTR [r8+-8], rbx
sub r8, 128
cmp r9, r8
jne L_end_1024_sqr_avx2_16
vmovdqu xmm0, OWORD PTR [rbp]
vmovups OWORD PTR [r8], xmm0
vmovdqu xmm0, OWORD PTR [rbp+16]
vmovups OWORD PTR [r8+16], xmm0
vmovdqu xmm0, OWORD PTR [rbp+32]
vmovups OWORD PTR [r8+32], xmm0
vmovdqu xmm0, OWORD PTR [rbp+48]
vmovups OWORD PTR [r8+48], xmm0
vmovdqu xmm0, OWORD PTR [rbp+64]
vmovups OWORD PTR [r8+64], xmm0
vmovdqu xmm0, OWORD PTR [rbp+80]
vmovups OWORD PTR [r8+80], xmm0
L_end_1024_sqr_avx2_16:
add rsp, 128
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
sp_1024_sqr_avx2_16 ENDP
_text ENDS
ENDIF
; /* Add b to a into r. (r = a + b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_add_16 PROC
; Add
mov r9, QWORD PTR [rdx]
xor rax, rax
add r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
adc r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
adc r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
adc r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
adc r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
adc r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
adc r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
adc r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
adc r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
adc r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
adc r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
adc r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
adc r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
adc r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
adc r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
adc r10, QWORD PTR [r8+120]
mov QWORD PTR [rcx+120], r10
adc rax, 0
ret
sp_1024_add_16 ENDP
_text ENDS
; /* Sub b from a into a. (a -= b)
; *
; * a A single precision integer and result.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sub_in_place_16 PROC
mov r8, QWORD PTR [rcx]
xor rax, rax
sub r8, QWORD PTR [rdx]
mov r9, QWORD PTR [rcx+8]
mov QWORD PTR [rcx], r8
sbb r9, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rcx+16]
mov QWORD PTR [rcx+8], r9
sbb r8, QWORD PTR [rdx+16]
mov r9, QWORD PTR [rcx+24]
mov QWORD PTR [rcx+16], r8
sbb r9, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rcx+32]
mov QWORD PTR [rcx+24], r9
sbb r8, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rcx+40]
mov QWORD PTR [rcx+32], r8
sbb r9, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rcx+48]
mov QWORD PTR [rcx+40], r9
sbb r8, QWORD PTR [rdx+48]
mov r9, QWORD PTR [rcx+56]
mov QWORD PTR [rcx+48], r8
sbb r9, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rcx+64]
mov QWORD PTR [rcx+56], r9
sbb r8, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rcx+72]
mov QWORD PTR [rcx+64], r8
sbb r9, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rcx+80]
mov QWORD PTR [rcx+72], r9
sbb r8, QWORD PTR [rdx+80]
mov r9, QWORD PTR [rcx+88]
mov QWORD PTR [rcx+80], r8
sbb r9, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rcx+96]
mov QWORD PTR [rcx+88], r9
sbb r8, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rcx+104]
mov QWORD PTR [rcx+96], r8
sbb r9, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rcx+112]
mov QWORD PTR [rcx+104], r9
sbb r8, QWORD PTR [rdx+112]
mov r9, QWORD PTR [rcx+120]
mov QWORD PTR [rcx+112], r8
sbb r9, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+120], r9
sbb rax, 0
ret
sp_1024_sub_in_place_16 ENDP
_text ENDS
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_1024_cond_sub_16 PROC
sub rsp, 128
mov rax, 0
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r9
and r11, r9
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r9
and r11, r9
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov r10, QWORD PTR [rdx]
mov r8, QWORD PTR [rsp]
sub r10, r8
mov r11, QWORD PTR [rdx+8]
mov r8, QWORD PTR [rsp+8]
sbb r11, r8
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rdx+16]
mov r8, QWORD PTR [rsp+16]
sbb r10, r8
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rdx+24]
mov r8, QWORD PTR [rsp+24]
sbb r11, r8
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rdx+32]
mov r8, QWORD PTR [rsp+32]
sbb r10, r8
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rdx+40]
mov r8, QWORD PTR [rsp+40]
sbb r11, r8
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rdx+48]
mov r8, QWORD PTR [rsp+48]
sbb r10, r8
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rdx+56]
mov r8, QWORD PTR [rsp+56]
sbb r11, r8
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rdx+64]
mov r8, QWORD PTR [rsp+64]
sbb r10, r8
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rdx+72]
mov r8, QWORD PTR [rsp+72]
sbb r11, r8
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rdx+80]
mov r8, QWORD PTR [rsp+80]
sbb r10, r8
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rdx+88]
mov r8, QWORD PTR [rsp+88]
sbb r11, r8
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rdx+96]
mov r8, QWORD PTR [rsp+96]
sbb r10, r8
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rdx+104]
mov r8, QWORD PTR [rsp+104]
sbb r11, r8
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rdx+112]
mov r8, QWORD PTR [rsp+112]
sbb r10, r8
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rdx+120]
mov r8, QWORD PTR [rsp+120]
sbb r11, r8
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb rax, 0
add rsp, 128
ret
sp_1024_cond_sub_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Conditionally subtract b from a using the mask m.
; * m is -1 to subtract and 0 when not copying.
; *
; * r A single precision number representing condition subtract result.
; * a A single precision number to subtract from.
; * b A single precision number to subtract.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_1024_cond_sub_avx2_16 PROC
push r12
mov rax, 0
mov r12, QWORD PTR [r8]
mov r10, QWORD PTR [rdx]
pext r12, r12, r9
sub r10, r12
mov r12, QWORD PTR [r8+8]
mov r11, QWORD PTR [rdx+8]
pext r12, r12, r9
mov QWORD PTR [rcx], r10
sbb r11, r12
mov r10, QWORD PTR [r8+16]
mov r12, QWORD PTR [rdx+16]
pext r10, r10, r9
mov QWORD PTR [rcx+8], r11
sbb r12, r10
mov r11, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+24]
pext r11, r11, r9
mov QWORD PTR [rcx+16], r12
sbb r10, r11
mov r12, QWORD PTR [r8+32]
mov r11, QWORD PTR [rdx+32]
pext r12, r12, r9
mov QWORD PTR [rcx+24], r10
sbb r11, r12
mov r10, QWORD PTR [r8+40]
mov r12, QWORD PTR [rdx+40]
pext r10, r10, r9
mov QWORD PTR [rcx+32], r11
sbb r12, r10
mov r11, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+48]
pext r11, r11, r9
mov QWORD PTR [rcx+40], r12
sbb r10, r11
mov r12, QWORD PTR [r8+56]
mov r11, QWORD PTR [rdx+56]
pext r12, r12, r9
mov QWORD PTR [rcx+48], r10
sbb r11, r12
mov r10, QWORD PTR [r8+64]
mov r12, QWORD PTR [rdx+64]
pext r10, r10, r9
mov QWORD PTR [rcx+56], r11
sbb r12, r10
mov r11, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+72]
pext r11, r11, r9
mov QWORD PTR [rcx+64], r12
sbb r10, r11
mov r12, QWORD PTR [r8+80]
mov r11, QWORD PTR [rdx+80]
pext r12, r12, r9
mov QWORD PTR [rcx+72], r10
sbb r11, r12
mov r10, QWORD PTR [r8+88]
mov r12, QWORD PTR [rdx+88]
pext r10, r10, r9
mov QWORD PTR [rcx+80], r11
sbb r12, r10
mov r11, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+96]
pext r11, r11, r9
mov QWORD PTR [rcx+88], r12
sbb r10, r11
mov r12, QWORD PTR [r8+104]
mov r11, QWORD PTR [rdx+104]
pext r12, r12, r9
mov QWORD PTR [rcx+96], r10
sbb r11, r12
mov r10, QWORD PTR [r8+112]
mov r12, QWORD PTR [rdx+112]
pext r10, r10, r9
mov QWORD PTR [rcx+104], r11
sbb r12, r10
mov r11, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+120]
pext r11, r11, r9
mov QWORD PTR [rcx+112], r12
sbb r10, r11
mov QWORD PTR [rcx+120], r10
sbb rax, 0
pop r12
ret
sp_1024_cond_sub_avx2_16 ENDP
_text ENDS
ENDIF
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_d_16 PROC
push r12
mov r9, rdx
; A[0] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9]
mov r10, rax
mov r11, rdx
mov QWORD PTR [rcx], r10
; A[1] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+8]
add r11, rax
mov QWORD PTR [rcx+8], r11
adc r12, rdx
adc r10, 0
; A[2] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+16]
add r12, rax
mov QWORD PTR [rcx+16], r12
adc r10, rdx
adc r11, 0
; A[3] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+24]
add r10, rax
mov QWORD PTR [rcx+24], r10
adc r11, rdx
adc r12, 0
; A[4] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+32]
add r11, rax
mov QWORD PTR [rcx+32], r11
adc r12, rdx
adc r10, 0
; A[5] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+40]
add r12, rax
mov QWORD PTR [rcx+40], r12
adc r10, rdx
adc r11, 0
; A[6] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+48]
add r10, rax
mov QWORD PTR [rcx+48], r10
adc r11, rdx
adc r12, 0
; A[7] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+56]
add r11, rax
mov QWORD PTR [rcx+56], r11
adc r12, rdx
adc r10, 0
; A[8] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+64]
add r12, rax
mov QWORD PTR [rcx+64], r12
adc r10, rdx
adc r11, 0
; A[9] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+72]
add r10, rax
mov QWORD PTR [rcx+72], r10
adc r11, rdx
adc r12, 0
; A[10] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+80]
add r11, rax
mov QWORD PTR [rcx+80], r11
adc r12, rdx
adc r10, 0
; A[11] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+88]
add r12, rax
mov QWORD PTR [rcx+88], r12
adc r10, rdx
adc r11, 0
; A[12] * B
mov rax, r8
xor r12, r12
mul QWORD PTR [r9+96]
add r10, rax
mov QWORD PTR [rcx+96], r10
adc r11, rdx
adc r12, 0
; A[13] * B
mov rax, r8
xor r10, r10
mul QWORD PTR [r9+104]
add r11, rax
mov QWORD PTR [rcx+104], r11
adc r12, rdx
adc r10, 0
; A[14] * B
mov rax, r8
xor r11, r11
mul QWORD PTR [r9+112]
add r12, rax
mov QWORD PTR [rcx+112], r12
adc r10, rdx
adc r11, 0
; A[15] * B
mov rax, r8
mul QWORD PTR [r9+120]
add r10, rax
adc r11, rdx
mov QWORD PTR [rcx+120], r10
mov QWORD PTR [rcx+128], r11
pop r12
ret
sp_1024_mul_d_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Mul a by digit b into r. (r = a * b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision digit.
; */
_text SEGMENT READONLY PARA
sp_1024_mul_d_avx2_16 PROC
push r12
push r13
mov rax, rdx
; A[0] * B
mov rdx, r8
xor r13, r13
mulx r12, r11, QWORD PTR [rax]
mov QWORD PTR [rcx], r11
; A[1] * B
mulx r10, r9, QWORD PTR [rax+8]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+8], r12
; A[2] * B
mulx r10, r9, QWORD PTR [rax+16]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+16], r11
; A[3] * B
mulx r10, r9, QWORD PTR [rax+24]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+24], r12
; A[4] * B
mulx r10, r9, QWORD PTR [rax+32]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+32], r11
; A[5] * B
mulx r10, r9, QWORD PTR [rax+40]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+40], r12
; A[6] * B
mulx r10, r9, QWORD PTR [rax+48]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+48], r11
; A[7] * B
mulx r10, r9, QWORD PTR [rax+56]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+56], r12
; A[8] * B
mulx r10, r9, QWORD PTR [rax+64]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+64], r11
; A[9] * B
mulx r10, r9, QWORD PTR [rax+72]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+72], r12
; A[10] * B
mulx r10, r9, QWORD PTR [rax+80]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+80], r11
; A[11] * B
mulx r10, r9, QWORD PTR [rax+88]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+88], r12
; A[12] * B
mulx r10, r9, QWORD PTR [rax+96]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+96], r11
; A[13] * B
mulx r10, r9, QWORD PTR [rax+104]
mov r11, r13
adcx r12, r9
adox r11, r10
mov QWORD PTR [rcx+104], r12
; A[14] * B
mulx r10, r9, QWORD PTR [rax+112]
mov r12, r13
adcx r11, r9
adox r12, r10
mov QWORD PTR [rcx+112], r11
; A[15] * B
mulx r10, r9, QWORD PTR [rax+120]
mov r11, r13
adcx r12, r9
adox r11, r10
adcx r11, r13
mov QWORD PTR [rcx+120], r12
mov QWORD PTR [rcx+128], r11
pop r13
pop r12
ret
sp_1024_mul_d_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF _WIN64
; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
; *
; * d1 The high order half of the number to divide.
; * d0 The low order half of the number to divide.
; * div The dividend.
; * returns the result of the division.
; */
_text SEGMENT READONLY PARA
div_1024_word_asm_16 PROC
mov r9, rdx
mov rax, r9
mov rdx, rcx
div r8
ret
div_1024_word_asm_16 ENDP
_text ENDS
ENDIF
; /* Compare a with b in constant time.
; *
; * a A single precision integer.
; * b A single precision integer.
; * return -ve, 0 or +ve if a is less than, equal to or greater than b
; * respectively.
; */
_text SEGMENT READONLY PARA
sp_1024_cmp_16 PROC
push r12
xor r9, r9
mov r8, -1
mov rax, -1
mov r10, 1
mov r11, QWORD PTR [rcx+120]
mov r12, QWORD PTR [rdx+120]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+112]
mov r12, QWORD PTR [rdx+112]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+104]
mov r12, QWORD PTR [rdx+104]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+96]
mov r12, QWORD PTR [rdx+96]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+88]
mov r12, QWORD PTR [rdx+88]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+80]
mov r12, QWORD PTR [rdx+80]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+72]
mov r12, QWORD PTR [rdx+72]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+64]
mov r12, QWORD PTR [rdx+64]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+56]
mov r12, QWORD PTR [rdx+56]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+48]
mov r12, QWORD PTR [rdx+48]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+40]
mov r12, QWORD PTR [rdx+40]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+32]
mov r12, QWORD PTR [rdx+32]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+24]
mov r12, QWORD PTR [rdx+24]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+16]
mov r12, QWORD PTR [rdx+16]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx+8]
mov r12, QWORD PTR [rdx+8]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
mov r11, QWORD PTR [rcx]
mov r12, QWORD PTR [rdx]
and r11, r8
and r12, r8
sub r11, r12
cmova rax, r10
cmovc rax, r8
cmovnz r8, r9
xor rax, r8
pop r12
ret
sp_1024_cmp_16 ENDP
_text ENDS
; /* Conditionally copy a into r using the mask m.
; * m is -1 to copy and 0 when not.
; *
; * r A single precision number to copy over.
; * a A single precision number to copy.
; * m Mask value to apply.
; */
_text SEGMENT READONLY PARA
sp_1024_cond_copy_16 PROC
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
xor rax, QWORD PTR [rdx]
xor r9, QWORD PTR [rdx+8]
xor r10, QWORD PTR [rdx+16]
xor r11, QWORD PTR [rdx+24]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx], rax
xor QWORD PTR [rcx+8], r9
xor QWORD PTR [rcx+16], r10
xor QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
mov r10, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
xor rax, QWORD PTR [rdx+32]
xor r9, QWORD PTR [rdx+40]
xor r10, QWORD PTR [rdx+48]
xor r11, QWORD PTR [rdx+56]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+32], rax
xor QWORD PTR [rcx+40], r9
xor QWORD PTR [rcx+48], r10
xor QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
mov r10, QWORD PTR [rcx+80]
mov r11, QWORD PTR [rcx+88]
xor rax, QWORD PTR [rdx+64]
xor r9, QWORD PTR [rdx+72]
xor r10, QWORD PTR [rdx+80]
xor r11, QWORD PTR [rdx+88]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+64], rax
xor QWORD PTR [rcx+72], r9
xor QWORD PTR [rcx+80], r10
xor QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [rcx+112]
mov r11, QWORD PTR [rcx+120]
xor rax, QWORD PTR [rdx+96]
xor r9, QWORD PTR [rdx+104]
xor r10, QWORD PTR [rdx+112]
xor r11, QWORD PTR [rdx+120]
and rax, r8
and r9, r8
and r10, r8
and r11, r8
xor QWORD PTR [rcx+96], rax
xor QWORD PTR [rcx+104], r9
xor QWORD PTR [rcx+112], r10
xor QWORD PTR [rcx+120], r11
ret
sp_1024_cond_copy_16 ENDP
_text ENDS
; /* Reduce the number back to 1024 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_1024_mont_reduce_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
mov r9, rdx
xor rsi, rsi
; i = 16
mov r10, 16
mov r15, QWORD PTR [rcx]
mov rdi, QWORD PTR [rcx+8]
L_1024_mont_loop_16:
; mu = a[i] * mp
mov r13, r15
imul r13, r8
; a[i+0] += m[0] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9]
add r15, rax
adc r12, rdx
; a[i+1] += m[1] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+8]
mov r15, rdi
add r15, rax
adc r11, rdx
add r15, r12
adc r11, 0
; a[i+2] += m[2] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+16]
mov rdi, QWORD PTR [rcx+16]
add rdi, rax
adc r12, rdx
add rdi, r11
adc r12, 0
; a[i+3] += m[3] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+24]
mov r14, QWORD PTR [rcx+24]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+24], r14
adc r11, 0
; a[i+4] += m[4] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+32]
mov r14, QWORD PTR [rcx+32]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+32], r14
adc r12, 0
; a[i+5] += m[5] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+40]
mov r14, QWORD PTR [rcx+40]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+40], r14
adc r11, 0
; a[i+6] += m[6] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+48]
mov r14, QWORD PTR [rcx+48]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+48], r14
adc r12, 0
; a[i+7] += m[7] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+56]
mov r14, QWORD PTR [rcx+56]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+56], r14
adc r11, 0
; a[i+8] += m[8] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+64]
mov r14, QWORD PTR [rcx+64]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+64], r14
adc r12, 0
; a[i+9] += m[9] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+72]
mov r14, QWORD PTR [rcx+72]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+72], r14
adc r11, 0
; a[i+10] += m[10] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+80]
mov r14, QWORD PTR [rcx+80]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+80], r14
adc r12, 0
; a[i+11] += m[11] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+88]
mov r14, QWORD PTR [rcx+88]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+88], r14
adc r11, 0
; a[i+12] += m[12] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+96]
mov r14, QWORD PTR [rcx+96]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+96], r14
adc r12, 0
; a[i+13] += m[13] * mu
mov rax, r13
xor r11, r11
mul QWORD PTR [r9+104]
mov r14, QWORD PTR [rcx+104]
add r14, rax
adc r11, rdx
add r14, r12
mov QWORD PTR [rcx+104], r14
adc r11, 0
; a[i+14] += m[14] * mu
mov rax, r13
xor r12, r12
mul QWORD PTR [r9+112]
mov r14, QWORD PTR [rcx+112]
add r14, rax
adc r12, rdx
add r14, r11
mov QWORD PTR [rcx+112], r14
adc r12, 0
; a[i+15] += m[15] * mu
mov rax, r13
mul QWORD PTR [r9+120]
mov r14, QWORD PTR [rcx+120]
add r12, rax
adc rdx, rsi
mov rsi, 0
adc rsi, 0
add r14, r12
mov QWORD PTR [rcx+120], r14
adc QWORD PTR [rcx+128], rdx
adc rsi, 0
; i -= 1
add rcx, 8
dec r10
jnz L_1024_mont_loop_16
mov r14, QWORD PTR [rcx+120]
mov QWORD PTR [rcx], r15
sub r14, QWORD PTR [r9+120]
mov QWORD PTR [rcx+8], rdi
sbb r14, r14
neg rsi
not r14
or rsi, r14
IFDEF _WIN64
mov r8, r9
mov r9, rsi
ELSE
mov r9, rsi
mov r8, r9
ENDIF
mov rdx, rcx
mov rcx, rcx
sub rcx, 128
call sp_1024_cond_sub_16
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_1024_mont_reduce_16 ENDP
_text ENDS
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montogmery form.
; * b Second number to add in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_add_16 PROC
push r12
push r13
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
add rax, QWORD PTR [r8]
mov r13, 0
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
adc rax, QWORD PTR [r8+32]
adc r10, QWORD PTR [r8+40]
adc r11, QWORD PTR [r8+48]
adc r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
adc rax, QWORD PTR [r8+64]
adc r10, QWORD PTR [r8+72]
adc r11, QWORD PTR [r8+80]
adc r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
adc rax, QWORD PTR [r8+96]
adc r10, QWORD PTR [r8+104]
adc r11, QWORD PTR [r8+112]
adc r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
sub r12, QWORD PTR [r9+120]
sbb r12, r12
not r12
or r13, r12
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
and r11, r13
and r12, r13
mov QWORD PTR [rsp], r11
mov QWORD PTR [rsp+8], r12
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+16], r11
mov QWORD PTR [rsp+24], r12
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+32], r11
mov QWORD PTR [rsp+40], r12
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+48], r11
mov QWORD PTR [rsp+56], r12
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+64], r11
mov QWORD PTR [rsp+72], r12
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+80], r11
mov QWORD PTR [rsp+88], r12
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+96], r11
mov QWORD PTR [rsp+104], r12
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+112], r11
mov QWORD PTR [rsp+120], r12
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r10, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r10, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r10, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r10, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r10, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r10, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r10, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r10, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
add rsp, 128
pop r13
pop r12
ret
sp_1024_mont_add_16 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_dbl_16 PROC
push r12
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r12
and r11, r12
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r9, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r9, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r9, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r9, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r9, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r9, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r9, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r9, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
add rsp, 128
pop r12
ret
sp_1024_mont_dbl_16 ENDP
_text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_tpl_16 PROC
push r12
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r12
and r11, r12
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r9, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r9, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r9, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r9, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r9, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r9, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r9, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r9, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
mov r10, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
mov r10, QWORD PTR [rcx+80]
mov r11, QWORD PTR [rcx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [rcx+112]
mov r11, QWORD PTR [rcx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
and r10, r12
and r11, r12
mov QWORD PTR [rsp], r10
mov QWORD PTR [rsp+8], r11
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+16], r10
mov QWORD PTR [rsp+24], r11
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+32], r10
mov QWORD PTR [rsp+40], r11
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+48], r10
mov QWORD PTR [rsp+56], r11
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+64], r10
mov QWORD PTR [rsp+72], r11
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+80], r10
mov QWORD PTR [rsp+88], r11
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+96], r10
mov QWORD PTR [rsp+104], r11
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
and r10, r12
and r11, r12
mov QWORD PTR [rsp+112], r10
mov QWORD PTR [rsp+120], r11
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
sub rax, QWORD PTR [rsp]
sbb r9, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
sbb rax, QWORD PTR [rsp+16]
sbb r9, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
sbb rax, QWORD PTR [rsp+32]
sbb r9, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
sbb rax, QWORD PTR [rsp+48]
sbb r9, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
sbb rax, QWORD PTR [rsp+64]
sbb r9, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
sbb rax, QWORD PTR [rsp+80]
sbb r9, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
sbb rax, QWORD PTR [rsp+96]
sbb r9, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
sbb rax, QWORD PTR [rsp+112]
sbb r9, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
add rsp, 128
pop r12
ret
sp_1024_mont_tpl_16 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of addition.
; * a First number to add in Montogmery form.
; * b Second number to add in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_sub_16 PROC
push r12
push r13
sub rsp, 128
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
mov r13, 0
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
sbb rax, QWORD PTR [r8+32]
sbb r10, QWORD PTR [r8+40]
sbb r11, QWORD PTR [r8+48]
sbb r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
sbb rax, QWORD PTR [r8+64]
sbb r10, QWORD PTR [r8+72]
sbb r11, QWORD PTR [r8+80]
sbb r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
sbb rax, QWORD PTR [r8+96]
sbb r10, QWORD PTR [r8+104]
sbb r11, QWORD PTR [r8+112]
sbb r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
and r11, r13
and r12, r13
mov QWORD PTR [rsp], r11
mov QWORD PTR [rsp+8], r12
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+16], r11
mov QWORD PTR [rsp+24], r12
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+32], r11
mov QWORD PTR [rsp+40], r12
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+48], r11
mov QWORD PTR [rsp+56], r12
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+64], r11
mov QWORD PTR [rsp+72], r12
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+80], r11
mov QWORD PTR [rsp+88], r12
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+96], r11
mov QWORD PTR [rsp+104], r12
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
and r11, r13
and r12, r13
mov QWORD PTR [rsp+112], r11
mov QWORD PTR [rsp+120], r12
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
add rax, QWORD PTR [rsp]
adc r10, QWORD PTR [rsp+8]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
adc rax, QWORD PTR [rsp+16]
adc r10, QWORD PTR [rsp+24]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
adc rax, QWORD PTR [rsp+32]
adc r10, QWORD PTR [rsp+40]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
adc rax, QWORD PTR [rsp+48]
adc r10, QWORD PTR [rsp+56]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
adc rax, QWORD PTR [rsp+64]
adc r10, QWORD PTR [rsp+72]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
adc rax, QWORD PTR [rsp+80]
adc r10, QWORD PTR [rsp+88]
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
adc rax, QWORD PTR [rsp+96]
adc r10, QWORD PTR [rsp+104]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
adc rax, QWORD PTR [rsp+112]
adc r10, QWORD PTR [rsp+120]
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
add rsp, 128
pop r13
pop r12
ret
sp_1024_mont_sub_16 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_div2_16 PROC
push r12
push r13
sub rsp, 128
mov r13, QWORD PTR [rdx]
xor r12, r12
mov rax, r13
and r13, 1
neg r13
mov r10, QWORD PTR [r8]
and r10, r13
mov QWORD PTR [rsp], r10
mov r10, QWORD PTR [r8+8]
and r10, r13
mov QWORD PTR [rsp+8], r10
mov r10, QWORD PTR [r8+16]
and r10, r13
mov QWORD PTR [rsp+16], r10
mov r10, QWORD PTR [r8+24]
and r10, r13
mov QWORD PTR [rsp+24], r10
mov r10, QWORD PTR [r8+32]
and r10, r13
mov QWORD PTR [rsp+32], r10
mov r10, QWORD PTR [r8+40]
and r10, r13
mov QWORD PTR [rsp+40], r10
mov r10, QWORD PTR [r8+48]
and r10, r13
mov QWORD PTR [rsp+48], r10
mov r10, QWORD PTR [r8+56]
and r10, r13
mov QWORD PTR [rsp+56], r10
mov r10, QWORD PTR [r8+64]
and r10, r13
mov QWORD PTR [rsp+64], r10
mov r10, QWORD PTR [r8+72]
and r10, r13
mov QWORD PTR [rsp+72], r10
mov r10, QWORD PTR [r8+80]
and r10, r13
mov QWORD PTR [rsp+80], r10
mov r10, QWORD PTR [r8+88]
and r10, r13
mov QWORD PTR [rsp+88], r10
mov r10, QWORD PTR [r8+96]
and r10, r13
mov QWORD PTR [rsp+96], r10
mov r10, QWORD PTR [r8+104]
and r10, r13
mov QWORD PTR [rsp+104], r10
mov r10, QWORD PTR [r8+112]
and r10, r13
mov QWORD PTR [rsp+112], r10
mov r10, QWORD PTR [r8+120]
and r10, r13
mov QWORD PTR [rsp+120], r10
add QWORD PTR [rsp], rax
mov rax, QWORD PTR [rdx+8]
adc QWORD PTR [rsp+8], rax
mov rax, QWORD PTR [rdx+16]
adc QWORD PTR [rsp+16], rax
mov rax, QWORD PTR [rdx+24]
adc QWORD PTR [rsp+24], rax
mov rax, QWORD PTR [rdx+32]
adc QWORD PTR [rsp+32], rax
mov rax, QWORD PTR [rdx+40]
adc QWORD PTR [rsp+40], rax
mov rax, QWORD PTR [rdx+48]
adc QWORD PTR [rsp+48], rax
mov rax, QWORD PTR [rdx+56]
adc QWORD PTR [rsp+56], rax
mov rax, QWORD PTR [rdx+64]
adc QWORD PTR [rsp+64], rax
mov rax, QWORD PTR [rdx+72]
adc QWORD PTR [rsp+72], rax
mov rax, QWORD PTR [rdx+80]
adc QWORD PTR [rsp+80], rax
mov rax, QWORD PTR [rdx+88]
adc QWORD PTR [rsp+88], rax
mov rax, QWORD PTR [rdx+96]
adc QWORD PTR [rsp+96], rax
mov rax, QWORD PTR [rdx+104]
adc QWORD PTR [rsp+104], rax
mov rax, QWORD PTR [rdx+112]
adc QWORD PTR [rsp+112], rax
mov rax, QWORD PTR [rdx+120]
adc QWORD PTR [rsp+120], rax
adc r12, 0
mov rax, QWORD PTR [rsp]
mov r9, QWORD PTR [rsp+8]
shrd rax, r9, 1
mov QWORD PTR [rcx], rax
mov rax, QWORD PTR [rsp+16]
shrd r9, rax, 1
mov QWORD PTR [rcx+8], r9
mov r9, QWORD PTR [rsp+24]
shrd rax, r9, 1
mov QWORD PTR [rcx+16], rax
mov rax, QWORD PTR [rsp+32]
shrd r9, rax, 1
mov QWORD PTR [rcx+24], r9
mov r9, QWORD PTR [rsp+40]
shrd rax, r9, 1
mov QWORD PTR [rcx+32], rax
mov rax, QWORD PTR [rsp+48]
shrd r9, rax, 1
mov QWORD PTR [rcx+40], r9
mov r9, QWORD PTR [rsp+56]
shrd rax, r9, 1
mov QWORD PTR [rcx+48], rax
mov rax, QWORD PTR [rsp+64]
shrd r9, rax, 1
mov QWORD PTR [rcx+56], r9
mov r9, QWORD PTR [rsp+72]
shrd rax, r9, 1
mov QWORD PTR [rcx+64], rax
mov rax, QWORD PTR [rsp+80]
shrd r9, rax, 1
mov QWORD PTR [rcx+72], r9
mov r9, QWORD PTR [rsp+88]
shrd rax, r9, 1
mov QWORD PTR [rcx+80], rax
mov rax, QWORD PTR [rsp+96]
shrd r9, rax, 1
mov QWORD PTR [rcx+88], r9
mov r9, QWORD PTR [rsp+104]
shrd rax, r9, 1
mov QWORD PTR [rcx+96], rax
mov rax, QWORD PTR [rsp+112]
shrd r9, rax, 1
mov QWORD PTR [rcx+104], r9
mov r9, QWORD PTR [rsp+120]
shrd rax, r9, 1
mov QWORD PTR [rcx+112], rax
shrd r9, r12, 1
mov QWORD PTR [rcx+120], r9
add rsp, 128
pop r13
pop r12
ret
sp_1024_div2_16 ENDP
_text ENDS
; /* Sub b from a into r. (r = a - b)
; *
; * r A single precision integer.
; * a A single precision integer.
; * b A single precision integer.
; */
_text SEGMENT READONLY PARA
sp_1024_sub_16 PROC
mov r9, QWORD PTR [rdx]
xor rax, rax
sub r9, QWORD PTR [r8]
mov r10, QWORD PTR [rdx+8]
mov QWORD PTR [rcx], r9
sbb r10, QWORD PTR [r8+8]
mov r9, QWORD PTR [rdx+16]
mov QWORD PTR [rcx+8], r10
sbb r9, QWORD PTR [r8+16]
mov r10, QWORD PTR [rdx+24]
mov QWORD PTR [rcx+16], r9
sbb r10, QWORD PTR [r8+24]
mov r9, QWORD PTR [rdx+32]
mov QWORD PTR [rcx+24], r10
sbb r9, QWORD PTR [r8+32]
mov r10, QWORD PTR [rdx+40]
mov QWORD PTR [rcx+32], r9
sbb r10, QWORD PTR [r8+40]
mov r9, QWORD PTR [rdx+48]
mov QWORD PTR [rcx+40], r10
sbb r9, QWORD PTR [r8+48]
mov r10, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+48], r9
sbb r10, QWORD PTR [r8+56]
mov r9, QWORD PTR [rdx+64]
mov QWORD PTR [rcx+56], r10
sbb r9, QWORD PTR [r8+64]
mov r10, QWORD PTR [rdx+72]
mov QWORD PTR [rcx+64], r9
sbb r10, QWORD PTR [r8+72]
mov r9, QWORD PTR [rdx+80]
mov QWORD PTR [rcx+72], r10
sbb r9, QWORD PTR [r8+80]
mov r10, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+80], r9
sbb r10, QWORD PTR [r8+88]
mov r9, QWORD PTR [rdx+96]
mov QWORD PTR [rcx+88], r10
sbb r9, QWORD PTR [r8+96]
mov r10, QWORD PTR [rdx+104]
mov QWORD PTR [rcx+96], r9
sbb r10, QWORD PTR [r8+104]
mov r9, QWORD PTR [rdx+112]
mov QWORD PTR [rcx+104], r10
sbb r9, QWORD PTR [r8+112]
mov r10, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+112], r9
sbb r10, QWORD PTR [r8+120]
mov QWORD PTR [rcx+120], r10
sbb rax, 0
ret
sp_1024_sub_16 ENDP
_text ENDS
IFDEF HAVE_INTEL_AVX2
; /* Reduce the number back to 1024 bits using Montgomery reduction.
; *
; * a A single precision number to reduce in place.
; * m The single precision number representing the modulus.
; * mp The digit representing the negative inverse of m mod 2^n.
; */
_text SEGMENT READONLY PARA
sp_1024_mont_reduce_avx2_16 PROC
push r12
push r13
push r14
push r15
push rdi
push rsi
push rbx
push rbp
mov r9, rcx
mov r10, rdx
xor rbp, rbp
; i = 16
mov r11, 16
mov r15, QWORD PTR [r9]
mov rdi, QWORD PTR [r9+8]
mov rsi, QWORD PTR [r9+16]
mov rbx, QWORD PTR [r9+24]
add r9, 64
xor rbp, rbp
L_1024_mont_loop_avx2_16:
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-32]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-24]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-24], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9+-8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-16], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-8], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+8]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+16]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+8], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+24]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+16], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+32]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+24], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+40]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+32], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+48]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+40], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+56]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+48], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+64]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+56], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+64], r12
adox rbp, r14
adcx rbp, r14
; mu = a[i] * mp
mov rdx, r15
mov r12, r15
imul rdx, r8
xor r14, r14
; a[i+0] += m[0] * mu
mulx rcx, rax, QWORD PTR [r10]
mov r15, rdi
adcx r12, rax
adox r15, rcx
; a[i+1] += m[1] * mu
mulx rcx, rax, QWORD PTR [r10+8]
mov rdi, rsi
adcx r15, rax
adox rdi, rcx
; a[i+2] += m[2] * mu
mulx rcx, rax, QWORD PTR [r10+16]
mov rsi, rbx
adcx rdi, rax
adox rsi, rcx
; a[i+3] += m[3] * mu
mulx rcx, rax, QWORD PTR [r10+24]
mov rbx, QWORD PTR [r9+-24]
adcx rsi, rax
adox rbx, rcx
; a[i+4] += m[4] * mu
mulx rcx, rax, QWORD PTR [r10+32]
mov r13, QWORD PTR [r9+-16]
adcx rbx, rax
adox r13, rcx
; a[i+5] += m[5] * mu
mulx rcx, rax, QWORD PTR [r10+40]
mov r12, QWORD PTR [r9+-8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+-16], r13
; a[i+6] += m[6] * mu
mulx rcx, rax, QWORD PTR [r10+48]
mov r13, QWORD PTR [r9]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+-8], r12
; a[i+7] += m[7] * mu
mulx rcx, rax, QWORD PTR [r10+56]
mov r12, QWORD PTR [r9+8]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9], r13
; a[i+8] += m[8] * mu
mulx rcx, rax, QWORD PTR [r10+64]
mov r13, QWORD PTR [r9+16]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+8], r12
; a[i+9] += m[9] * mu
mulx rcx, rax, QWORD PTR [r10+72]
mov r12, QWORD PTR [r9+24]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+16], r13
; a[i+10] += m[10] * mu
mulx rcx, rax, QWORD PTR [r10+80]
mov r13, QWORD PTR [r9+32]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+24], r12
; a[i+11] += m[11] * mu
mulx rcx, rax, QWORD PTR [r10+88]
mov r12, QWORD PTR [r9+40]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+32], r13
; a[i+12] += m[12] * mu
mulx rcx, rax, QWORD PTR [r10+96]
mov r13, QWORD PTR [r9+48]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+40], r12
; a[i+13] += m[13] * mu
mulx rcx, rax, QWORD PTR [r10+104]
mov r12, QWORD PTR [r9+56]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+48], r13
; a[i+14] += m[14] * mu
mulx rcx, rax, QWORD PTR [r10+112]
mov r13, QWORD PTR [r9+64]
adcx r12, rax
adox r13, rcx
mov QWORD PTR [r9+56], r12
; a[i+15] += m[15] * mu
mulx rcx, rax, QWORD PTR [r10+120]
mov r12, QWORD PTR [r9+72]
adcx r13, rax
adox r12, rcx
mov QWORD PTR [r9+64], r13
adcx r12, rbp
mov rbp, r14
mov QWORD PTR [r9+72], r12
adox rbp, r14
adcx rbp, r14
; a += 2
add r9, 16
; i -= 2
sub r11, 2
jnz L_1024_mont_loop_avx2_16
sub r9, 64
sub r12, QWORD PTR [r10+120]
mov r8, r9
sbb r12, r12
neg rbp
not r12
or rbp, r12
sub r9, 128
mov rcx, QWORD PTR [r10]
mov rdx, r15
pext rcx, rcx, rbp
sub rdx, rcx
mov rcx, QWORD PTR [r10+8]
mov rax, rdi
pext rcx, rcx, rbp
mov QWORD PTR [r9], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+16]
mov rcx, rsi
pext rdx, rdx, rbp
mov QWORD PTR [r9+8], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+24]
mov rdx, rbx
pext rax, rax, rbp
mov QWORD PTR [r9+16], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+32]
mov rax, QWORD PTR [r8+32]
pext rcx, rcx, rbp
mov QWORD PTR [r9+24], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+40]
mov rcx, QWORD PTR [r8+40]
pext rdx, rdx, rbp
mov QWORD PTR [r9+32], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+48]
mov rdx, QWORD PTR [r8+48]
pext rax, rax, rbp
mov QWORD PTR [r9+40], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+56]
mov rax, QWORD PTR [r8+56]
pext rcx, rcx, rbp
mov QWORD PTR [r9+48], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+64]
mov rcx, QWORD PTR [r8+64]
pext rdx, rdx, rbp
mov QWORD PTR [r9+56], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+72]
mov rdx, QWORD PTR [r8+72]
pext rax, rax, rbp
mov QWORD PTR [r9+64], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+80]
mov rax, QWORD PTR [r8+80]
pext rcx, rcx, rbp
mov QWORD PTR [r9+72], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+88]
mov rcx, QWORD PTR [r8+88]
pext rdx, rdx, rbp
mov QWORD PTR [r9+80], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+96]
mov rdx, QWORD PTR [r8+96]
pext rax, rax, rbp
mov QWORD PTR [r9+88], rcx
sbb rdx, rax
mov rcx, QWORD PTR [r10+104]
mov rax, QWORD PTR [r8+104]
pext rcx, rcx, rbp
mov QWORD PTR [r9+96], rdx
sbb rax, rcx
mov rdx, QWORD PTR [r10+112]
mov rcx, QWORD PTR [r8+112]
pext rdx, rdx, rbp
mov QWORD PTR [r9+104], rax
sbb rcx, rdx
mov rax, QWORD PTR [r10+120]
mov rdx, QWORD PTR [r8+120]
pext rax, rax, rbp
mov QWORD PTR [r9+112], rcx
sbb rdx, rax
mov QWORD PTR [r9+120], rdx
pop rbp
pop rbx
pop rsi
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_1024_mont_reduce_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Add two Montgomery form numbers (r = a + b % m).
; *
; * r Result of addition.
; * a First number to add in Montogmery form.
; * b Second number to add in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_add_avx2_16 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
add rax, QWORD PTR [r8]
mov r13, 0
adc r10, QWORD PTR [r8+8]
adc r11, QWORD PTR [r8+16]
adc r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
adc rax, QWORD PTR [r8+32]
adc r10, QWORD PTR [r8+40]
adc r11, QWORD PTR [r8+48]
adc r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
adc rax, QWORD PTR [r8+64]
adc r10, QWORD PTR [r8+72]
adc r11, QWORD PTR [r8+80]
adc r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
adc rax, QWORD PTR [r8+96]
adc r10, QWORD PTR [r8+104]
adc r11, QWORD PTR [r8+112]
adc r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
sub r12, QWORD PTR [r9+120]
sbb r12, r12
not r12
or r13, r12
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
pext r11, r11, r13
pext r12, r12, r13
sub rax, r11
sbb r10, r12
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
pext r11, r11, r13
pext r12, r12, r13
sbb rax, r11
sbb r10, r12
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
pop r13
pop r12
ret
sp_1024_mont_add_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Double a Montgomery form number (r = a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_dbl_avx2_16 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r12
pext r11, r11, r12
sub rax, r10
sbb r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
pop r12
ret
sp_1024_mont_dbl_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * r Result of addition.
; * a Number to souble in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_tpl_avx2_16 PROC
push r12
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rdx+32]
mov r9, QWORD PTR [rdx+40]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rdx+64]
mov r9, QWORD PTR [rdx+72]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rdx+96]
mov r9, QWORD PTR [rdx+104]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r12
pext r11, r11, r12
sub rax, r10
sbb r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
add rax, QWORD PTR [rdx]
mov r12, 0
adc r9, QWORD PTR [rdx+8]
adc r10, QWORD PTR [rdx+16]
adc r11, QWORD PTR [rdx+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
mov r10, QWORD PTR [rcx+48]
mov r11, QWORD PTR [rcx+56]
adc rax, QWORD PTR [rdx+32]
adc r9, QWORD PTR [rdx+40]
adc r10, QWORD PTR [rdx+48]
adc r11, QWORD PTR [rdx+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
mov r10, QWORD PTR [rcx+80]
mov r11, QWORD PTR [rcx+88]
adc rax, QWORD PTR [rdx+64]
adc r9, QWORD PTR [rdx+72]
adc r10, QWORD PTR [rdx+80]
adc r11, QWORD PTR [rdx+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
mov r10, QWORD PTR [rcx+112]
mov r11, QWORD PTR [rcx+120]
adc rax, QWORD PTR [rdx+96]
adc r9, QWORD PTR [rdx+104]
adc r10, QWORD PTR [rdx+112]
adc r11, QWORD PTR [rdx+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
sbb r12, 0
sub r11, QWORD PTR [r8+120]
sbb r11, r11
not r11
or r12, r11
mov r10, QWORD PTR [r8]
mov r11, QWORD PTR [r8+8]
mov rax, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
pext r10, r10, r12
pext r11, r11, r12
sub rax, r10
sbb r9, r11
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r9
mov r10, QWORD PTR [r8+16]
mov r11, QWORD PTR [r8+24]
mov rax, QWORD PTR [rcx+16]
mov r9, QWORD PTR [rcx+24]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r9
mov r10, QWORD PTR [r8+32]
mov r11, QWORD PTR [r8+40]
mov rax, QWORD PTR [rcx+32]
mov r9, QWORD PTR [rcx+40]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r9
mov r10, QWORD PTR [r8+48]
mov r11, QWORD PTR [r8+56]
mov rax, QWORD PTR [rcx+48]
mov r9, QWORD PTR [rcx+56]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r9
mov r10, QWORD PTR [r8+64]
mov r11, QWORD PTR [r8+72]
mov rax, QWORD PTR [rcx+64]
mov r9, QWORD PTR [rcx+72]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r9
mov r10, QWORD PTR [r8+80]
mov r11, QWORD PTR [r8+88]
mov rax, QWORD PTR [rcx+80]
mov r9, QWORD PTR [rcx+88]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r9
mov r10, QWORD PTR [r8+96]
mov r11, QWORD PTR [r8+104]
mov rax, QWORD PTR [rcx+96]
mov r9, QWORD PTR [rcx+104]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r9
mov r10, QWORD PTR [r8+112]
mov r11, QWORD PTR [r8+120]
mov rax, QWORD PTR [rcx+112]
mov r9, QWORD PTR [rcx+120]
pext r10, r10, r12
pext r11, r11, r12
sbb rax, r10
sbb r9, r11
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r9
pop r12
ret
sp_1024_mont_tpl_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * r Result of addition.
; * a First number to add in Montogmery form.
; * b Second number to add in Montogmery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_mont_sub_avx2_16 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r10, QWORD PTR [rdx+8]
mov r11, QWORD PTR [rdx+16]
mov r12, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
mov r13, 0
sbb r10, QWORD PTR [r8+8]
sbb r11, QWORD PTR [r8+16]
sbb r12, QWORD PTR [r8+24]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov QWORD PTR [rcx+16], r11
mov QWORD PTR [rcx+24], r12
mov rax, QWORD PTR [rdx+32]
mov r10, QWORD PTR [rdx+40]
mov r11, QWORD PTR [rdx+48]
mov r12, QWORD PTR [rdx+56]
sbb rax, QWORD PTR [r8+32]
sbb r10, QWORD PTR [r8+40]
sbb r11, QWORD PTR [r8+48]
sbb r12, QWORD PTR [r8+56]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov QWORD PTR [rcx+48], r11
mov QWORD PTR [rcx+56], r12
mov rax, QWORD PTR [rdx+64]
mov r10, QWORD PTR [rdx+72]
mov r11, QWORD PTR [rdx+80]
mov r12, QWORD PTR [rdx+88]
sbb rax, QWORD PTR [r8+64]
sbb r10, QWORD PTR [r8+72]
sbb r11, QWORD PTR [r8+80]
sbb r12, QWORD PTR [r8+88]
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov QWORD PTR [rcx+80], r11
mov QWORD PTR [rcx+88], r12
mov rax, QWORD PTR [rdx+96]
mov r10, QWORD PTR [rdx+104]
mov r11, QWORD PTR [rdx+112]
mov r12, QWORD PTR [rdx+120]
sbb rax, QWORD PTR [r8+96]
sbb r10, QWORD PTR [r8+104]
sbb r11, QWORD PTR [r8+112]
sbb r12, QWORD PTR [r8+120]
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov QWORD PTR [rcx+112], r11
mov QWORD PTR [rcx+120], r12
sbb r13, 0
mov r11, QWORD PTR [r9]
mov r12, QWORD PTR [r9+8]
mov rax, QWORD PTR [rcx]
mov r10, QWORD PTR [rcx+8]
pext r11, r11, r13
pext r12, r12, r13
add rax, r11
adc r10, r12
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov r11, QWORD PTR [r9+16]
mov r12, QWORD PTR [r9+24]
mov rax, QWORD PTR [rcx+16]
mov r10, QWORD PTR [rcx+24]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov r11, QWORD PTR [r9+32]
mov r12, QWORD PTR [r9+40]
mov rax, QWORD PTR [rcx+32]
mov r10, QWORD PTR [rcx+40]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov r11, QWORD PTR [r9+48]
mov r12, QWORD PTR [r9+56]
mov rax, QWORD PTR [rcx+48]
mov r10, QWORD PTR [rcx+56]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
mov r11, QWORD PTR [r9+64]
mov r12, QWORD PTR [r9+72]
mov rax, QWORD PTR [rcx+64]
mov r10, QWORD PTR [rcx+72]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+64], rax
mov QWORD PTR [rcx+72], r10
mov r11, QWORD PTR [r9+80]
mov r12, QWORD PTR [r9+88]
mov rax, QWORD PTR [rcx+80]
mov r10, QWORD PTR [rcx+88]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+80], rax
mov QWORD PTR [rcx+88], r10
mov r11, QWORD PTR [r9+96]
mov r12, QWORD PTR [r9+104]
mov rax, QWORD PTR [rcx+96]
mov r10, QWORD PTR [rcx+104]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+96], rax
mov QWORD PTR [rcx+104], r10
mov r11, QWORD PTR [r9+112]
mov r12, QWORD PTR [r9+120]
mov rax, QWORD PTR [rcx+112]
mov r10, QWORD PTR [rcx+120]
pext r11, r11, r13
pext r12, r12, r13
adc rax, r11
adc r10, r12
mov QWORD PTR [rcx+112], rax
mov QWORD PTR [rcx+120], r10
pop r13
pop r12
ret
sp_1024_mont_sub_avx2_16 ENDP
_text ENDS
ENDIF
IFDEF HAVE_INTEL_AVX2
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
; * a Number to divide.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_1024_div2_avx2_16 PROC
push r12
push r13
mov r13, QWORD PTR [rdx]
xor r12, r12
mov r10, r13
and r13, 1
neg r13
mov rax, QWORD PTR [r8]
mov r9, QWORD PTR [r8+8]
mov r10, QWORD PTR [rdx]
mov r11, QWORD PTR [rdx+8]
pext rax, rax, r13
pext r9, r9, r13
add r10, rax
adc r11, r9
mov QWORD PTR [rcx], r10
mov QWORD PTR [rcx+8], r11
mov rax, QWORD PTR [r8+16]
mov r9, QWORD PTR [r8+24]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
mov rax, QWORD PTR [r8+32]
mov r9, QWORD PTR [r8+40]
mov r10, QWORD PTR [rdx+32]
mov r11, QWORD PTR [rdx+40]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+32], r10
mov QWORD PTR [rcx+40], r11
mov rax, QWORD PTR [r8+48]
mov r9, QWORD PTR [r8+56]
mov r10, QWORD PTR [rdx+48]
mov r11, QWORD PTR [rdx+56]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+48], r10
mov QWORD PTR [rcx+56], r11
mov rax, QWORD PTR [r8+64]
mov r9, QWORD PTR [r8+72]
mov r10, QWORD PTR [rdx+64]
mov r11, QWORD PTR [rdx+72]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+64], r10
mov QWORD PTR [rcx+72], r11
mov rax, QWORD PTR [r8+80]
mov r9, QWORD PTR [r8+88]
mov r10, QWORD PTR [rdx+80]
mov r11, QWORD PTR [rdx+88]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+80], r10
mov QWORD PTR [rcx+88], r11
mov rax, QWORD PTR [r8+96]
mov r9, QWORD PTR [r8+104]
mov r10, QWORD PTR [rdx+96]
mov r11, QWORD PTR [rdx+104]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+96], r10
mov QWORD PTR [rcx+104], r11
mov rax, QWORD PTR [r8+112]
mov r9, QWORD PTR [r8+120]
mov r10, QWORD PTR [rdx+112]
mov r11, QWORD PTR [rdx+120]
pext rax, rax, r13
pext r9, r9, r13
adc r10, rax
adc r11, r9
mov QWORD PTR [rcx+112], r10
mov QWORD PTR [rcx+120], r11
adc r12, 0
mov r10, QWORD PTR [rcx]
mov r11, QWORD PTR [rcx+8]
shrd r10, r11, 1
mov QWORD PTR [rcx], r10
mov r10, QWORD PTR [rcx+16]
shrd r11, r10, 1
mov QWORD PTR [rcx+8], r11
mov r11, QWORD PTR [rcx+24]
shrd r10, r11, 1
mov QWORD PTR [rcx+16], r10
mov r10, QWORD PTR [rcx+32]
shrd r11, r10, 1
mov QWORD PTR [rcx+24], r11
mov r11, QWORD PTR [rcx+40]
shrd r10, r11, 1
mov QWORD PTR [rcx+32], r10
mov r10, QWORD PTR [rcx+48]
shrd r11, r10, 1
mov QWORD PTR [rcx+40], r11
mov r11, QWORD PTR [rcx+56]
shrd r10, r11, 1
mov QWORD PTR [rcx+48], r10
mov r10, QWORD PTR [rcx+64]
shrd r11, r10, 1
mov QWORD PTR [rcx+56], r11
mov r11, QWORD PTR [rcx+72]
shrd r10, r11, 1
mov QWORD PTR [rcx+64], r10
mov r10, QWORD PTR [rcx+80]
shrd r11, r10, 1
mov QWORD PTR [rcx+72], r11
mov r11, QWORD PTR [rcx+88]
shrd r10, r11, 1
mov QWORD PTR [rcx+80], r10
mov r10, QWORD PTR [rcx+96]
shrd r11, r10, 1
mov QWORD PTR [rcx+88], r11
mov r11, QWORD PTR [rcx+104]
shrd r10, r11, 1
mov QWORD PTR [rcx+96], r10
mov r10, QWORD PTR [rcx+112]
shrd r11, r10, 1
mov QWORD PTR [rcx+104], r11
mov r11, QWORD PTR [rcx+120]
shrd r10, r11, 1
mov QWORD PTR [rcx+112], r10
shrd r11, r12, 1
mov QWORD PTR [rcx+120], r11
pop r13
pop r12
ret
sp_1024_div2_avx2_16 ENDP
_text ENDS
ENDIF
; /* Read big endian unsigned byte array into r.
; * Uses the bswap instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_1024_from_bin_bswap PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 128
xor r13, r13
jmp L_1024_from_bin_bswap_64_end
L_1024_from_bin_bswap_64_start:
sub r11, 64
mov rax, QWORD PTR [r11+56]
mov r10, QWORD PTR [r11+48]
bswap rax
bswap r10
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
mov rax, QWORD PTR [r11+40]
mov r10, QWORD PTR [r11+32]
bswap rax
bswap r10
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
mov rax, QWORD PTR [r11+24]
mov r10, QWORD PTR [r11+16]
bswap rax
bswap r10
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
mov rax, QWORD PTR [r11+8]
mov r10, QWORD PTR [r11]
bswap rax
bswap r10
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_1024_from_bin_bswap_64_end:
cmp r9, 63
jg L_1024_from_bin_bswap_64_start
jmp L_1024_from_bin_bswap_8_end
L_1024_from_bin_bswap_8_start:
sub r11, 8
mov rax, QWORD PTR [r11]
bswap rax
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_1024_from_bin_bswap_8_end:
cmp r9, 7
jg L_1024_from_bin_bswap_8_start
cmp r9, r13
je L_1024_from_bin_bswap_hi_end
mov r10, r13
mov rax, r13
L_1024_from_bin_bswap_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_1024_from_bin_bswap_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_1024_from_bin_bswap_hi_end:
cmp rcx, r12
je L_1024_from_bin_bswap_zero_end
L_1024_from_bin_bswap_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_1024_from_bin_bswap_zero_start
L_1024_from_bin_bswap_zero_end:
pop r13
pop r12
ret
sp_1024_from_bin_bswap ENDP
_text ENDS
IFNDEF NO_MOVBE_SUPPORT
; /* Read big endian unsigned byte array into r.
; * Uses the movbe instruction which is an optional instruction.
; *
; * r A single precision integer.
; * size Maximum number of bytes to convert
; * a Byte array.
; * n Number of bytes in array to read.
; */
_text SEGMENT READONLY PARA
sp_1024_from_bin_movbe PROC
push r12
push r13
mov r11, r8
mov r12, rcx
add r11, r9
add r12, 128
xor r13, r13
jmp L_1024_from_bin_movbe_64_end
L_1024_from_bin_movbe_64_start:
sub r11, 64
movbe rax, QWORD PTR [r11+56]
movbe r10, QWORD PTR [r11+48]
mov QWORD PTR [rcx], rax
mov QWORD PTR [rcx+8], r10
movbe rax, QWORD PTR [r11+40]
movbe r10, QWORD PTR [r11+32]
mov QWORD PTR [rcx+16], rax
mov QWORD PTR [rcx+24], r10
movbe rax, QWORD PTR [r11+24]
movbe r10, QWORD PTR [r11+16]
mov QWORD PTR [rcx+32], rax
mov QWORD PTR [rcx+40], r10
movbe rax, QWORD PTR [r11+8]
movbe r10, QWORD PTR [r11]
mov QWORD PTR [rcx+48], rax
mov QWORD PTR [rcx+56], r10
add rcx, 64
sub r9, 64
L_1024_from_bin_movbe_64_end:
cmp r9, 63
jg L_1024_from_bin_movbe_64_start
jmp L_1024_from_bin_movbe_8_end
L_1024_from_bin_movbe_8_start:
sub r11, 8
movbe rax, QWORD PTR [r11]
mov QWORD PTR [rcx], rax
add rcx, 8
sub r9, 8
L_1024_from_bin_movbe_8_end:
cmp r9, 7
jg L_1024_from_bin_movbe_8_start
cmp r9, r13
je L_1024_from_bin_movbe_hi_end
mov r10, r13
mov rax, r13
L_1024_from_bin_movbe_hi_start:
mov al, BYTE PTR [r8]
shl r10, 8
inc r8
add r10, rax
dec r9
jg L_1024_from_bin_movbe_hi_start
mov QWORD PTR [rcx], r10
add rcx, 8
L_1024_from_bin_movbe_hi_end:
cmp rcx, r12
je L_1024_from_bin_movbe_zero_end
L_1024_from_bin_movbe_zero_start:
mov QWORD PTR [rcx], r13
add rcx, 8
cmp rcx, r12
jl L_1024_from_bin_movbe_zero_start
L_1024_from_bin_movbe_zero_end:
pop r13
pop r12
ret
sp_1024_from_bin_movbe ENDP
_text ENDS
ENDIF
ENDIF
END