mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-01-28 21:29:57 +01:00
22679 lines
589 KiB
ArmAsm
22679 lines
589 KiB
ArmAsm
/* sha256_asm
|
|
*
|
|
* Copyright (C) 2006-2023 wolfSSL Inc.
|
|
*
|
|
* This file is part of wolfSSL.
|
|
*
|
|
* wolfSSL is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* wolfSSL is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
|
*/
|
|
|
|
#ifdef WOLFSSL_USER_SETTINGS
|
|
#ifdef WOLFSSL_USER_SETTINGS_ASM
|
|
/*
|
|
* user_settings_asm.h is a file generated by the script user_settings_asm.sh.
|
|
* The script takes in a user_settings.h and produces user_settings_asm.h, which
|
|
* is a stripped down version of user_settings.h containing only preprocessor
|
|
* directives. This makes the header safe to include in assembly (.S) files.
|
|
*/
|
|
#include "user_settings_asm.h"
|
|
#else
|
|
/*
|
|
* Note: if user_settings.h contains any C code (e.g. a typedef or function
|
|
* prototype), including it here in an assembly (.S) file will cause an
|
|
* assembler failure. See user_settings_asm.h above.
|
|
*/
|
|
#include "user_settings.h"
|
|
#endif /* WOLFSSL_USER_SETTINGS_ASM */
|
|
#endif /* WOLFSSL_USER_SETTINGS */
|
|
|
|
#ifndef HAVE_INTEL_AVX1
|
|
#define HAVE_INTEL_AVX1
|
|
#endif /* HAVE_INTEL_AVX1 */
|
|
#ifndef NO_AVX2_SUPPORT
|
|
#define HAVE_INTEL_AVX2
|
|
#endif /* NO_AVX2_SUPPORT */
|
|
|
|
#ifdef WOLFSSL_X86_64_BUILD
|
|
#ifdef HAVE_INTEL_AVX1
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
L_avx1_sha256_k:
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 16
|
|
#else
|
|
.p2align 4
|
|
#endif /* __APPLE__ */
|
|
L_avx1_sha256_shuf_00BA:
|
|
.quad 0xb0a090803020100, 0xffffffffffffffff
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 16
|
|
#else
|
|
.p2align 4
|
|
#endif /* __APPLE__ */
|
|
L_avx1_sha256_shuf_DC00:
|
|
.quad 0xffffffffffffffff, 0xb0a090803020100
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 16
|
|
#else
|
|
.p2align 4
|
|
#endif /* __APPLE__ */
|
|
L_avx1_sha256_flip_mask:
|
|
.quad 0x405060700010203, 0xc0d0e0f08090a0b
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX1
|
|
.type Transform_Sha256_AVX1,@function
|
|
.align 16
|
|
Transform_Sha256_AVX1:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX1
|
|
.p2align 4
|
|
_Transform_Sha256_AVX1:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
subq $0x40, %rsp
|
|
leaq 32(%rdi), %rax
|
|
vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13
|
|
vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11
|
|
vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rax), %xmm0
|
|
vmovdqu 16(%rax), %xmm1
|
|
vpshufb %xmm13, %xmm0, %xmm0
|
|
vpshufb %xmm13, %xmm1, %xmm1
|
|
vmovdqu 32(%rax), %xmm2
|
|
vmovdqu 48(%rax), %xmm3
|
|
vpshufb %xmm13, %xmm2, %xmm2
|
|
vpshufb %xmm13, %xmm3, %xmm3
|
|
movl %r9d, %ebx
|
|
movl %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# set_w_k_xfer_4: 0
|
|
vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 4
|
|
vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 8
|
|
vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 12
|
|
vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# rnd_all_4: 0-3
|
|
addl (%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 4(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 8(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 12(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 1-4
|
|
addl 16(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 20(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 24(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 28(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 2-5
|
|
addl 32(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 36(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 40(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 44(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 3-6
|
|
addl 48(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 52(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 56(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 60(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
addl %r8d, (%rdi)
|
|
addl %r9d, 4(%rdi)
|
|
addl %r10d, 8(%rdi)
|
|
addl %r11d, 12(%rdi)
|
|
addl %r12d, 16(%rdi)
|
|
addl %r13d, 20(%rdi)
|
|
addl %r14d, 24(%rdi)
|
|
addl %r15d, 28(%rdi)
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x40, %rsp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX1_Len
|
|
.type Transform_Sha256_AVX1_Len,@function
|
|
.align 16
|
|
Transform_Sha256_AVX1_Len:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX1_Len
|
|
.p2align 4
|
|
_Transform_Sha256_AVX1_Len:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
pushq %rbp
|
|
movq %rsi, %rbp
|
|
movq %rdx, %rsi
|
|
subq $0x40, %rsp
|
|
vmovdqa L_avx1_sha256_flip_mask(%rip), %xmm13
|
|
vmovdqa L_avx1_sha256_shuf_00BA(%rip), %xmm11
|
|
vmovdqa L_avx1_sha256_shuf_DC00(%rip), %xmm12
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# Start of loop processing a block
|
|
L_sha256_len_avx1_start:
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rbp), %xmm0
|
|
vmovdqu 16(%rbp), %xmm1
|
|
vpshufb %xmm13, %xmm0, %xmm0
|
|
vpshufb %xmm13, %xmm1, %xmm1
|
|
vmovdqu 32(%rbp), %xmm2
|
|
vmovdqu 48(%rbp), %xmm3
|
|
vpshufb %xmm13, %xmm2, %xmm2
|
|
vpshufb %xmm13, %xmm3, %xmm3
|
|
movl %r9d, %ebx
|
|
movl %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# set_w_k_xfer_4: 0
|
|
vpaddd 0+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 16+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 32+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 48+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 4
|
|
vpaddd 64+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 80+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 96+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 112+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 8
|
|
vpaddd 128+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 144+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 160+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 176+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %xmm5, %xmm8
|
|
vpslld $14, %xmm5, %xmm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %xmm6, %xmm7, %xmm6
|
|
vpor %xmm8, %xmm9, %xmm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %xmm5, %xmm9
|
|
vpxor %xmm6, %xmm8, %xmm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %xmm6, %xmm9, %xmm5
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %xmm6, %xmm7, %xmm6
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %xmm6, %xmm8
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %xmm6, %xmm9
|
|
vpxor %xmm8, %xmm7, %xmm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %xmm9, %xmm8, %xmm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 12
|
|
vpaddd 192+L_avx1_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 208+L_avx1_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 224+L_avx1_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 240+L_avx1_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# rnd_all_4: 0-3
|
|
addl (%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 4(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 8(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 12(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 1-4
|
|
addl 16(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 20(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 24(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 28(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 2-5
|
|
addl 32(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 36(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 40(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 44(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 3-6
|
|
addl 48(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 52(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 56(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 60(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
addl (%rdi), %r8d
|
|
addl 4(%rdi), %r9d
|
|
addl 8(%rdi), %r10d
|
|
addl 12(%rdi), %r11d
|
|
addl 16(%rdi), %r12d
|
|
addl 20(%rdi), %r13d
|
|
addl 24(%rdi), %r14d
|
|
addl 28(%rdi), %r15d
|
|
addq $0x40, %rbp
|
|
subl $0x40, %esi
|
|
movl %r8d, (%rdi)
|
|
movl %r9d, 4(%rdi)
|
|
movl %r10d, 8(%rdi)
|
|
movl %r11d, 12(%rdi)
|
|
movl %r12d, 16(%rdi)
|
|
movl %r13d, 20(%rdi)
|
|
movl %r14d, 24(%rdi)
|
|
movl %r15d, 28(%rdi)
|
|
jnz L_sha256_len_avx1_start
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x40, %rsp
|
|
popq %rbp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
L_avx1_rorx_sha256_k:
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 16
|
|
#else
|
|
.p2align 4
|
|
#endif /* __APPLE__ */
|
|
L_avx1_rorx_sha256_shuf_00BA:
|
|
.quad 0xb0a090803020100, 0xffffffffffffffff
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 16
|
|
#else
|
|
.p2align 4
|
|
#endif /* __APPLE__ */
|
|
L_avx1_rorx_sha256_shuf_DC00:
|
|
.quad 0xffffffffffffffff, 0xb0a090803020100
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 16
|
|
#else
|
|
.p2align 4
|
|
#endif /* __APPLE__ */
|
|
L_avx1_rorx_sha256_flip_mask:
|
|
.quad 0x405060700010203, 0xc0d0e0f08090a0b
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX1_RORX
|
|
.type Transform_Sha256_AVX1_RORX,@function
|
|
.align 16
|
|
Transform_Sha256_AVX1_RORX:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX1_RORX
|
|
.p2align 4
|
|
_Transform_Sha256_AVX1_RORX:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
subq $0x40, %rsp
|
|
vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13
|
|
vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11
|
|
vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12
|
|
leaq 32(%rdi), %rax
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rax), %xmm0
|
|
vmovdqu 16(%rax), %xmm1
|
|
vpshufb %xmm13, %xmm0, %xmm0
|
|
vpshufb %xmm13, %xmm1, %xmm1
|
|
vmovdqu 32(%rax), %xmm2
|
|
vmovdqu 48(%rax), %xmm3
|
|
vpshufb %xmm13, %xmm2, %xmm2
|
|
vpshufb %xmm13, %xmm3, %xmm3
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# set_w_k_xfer_4: 0
|
|
vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
movl %r9d, %ebx
|
|
rorxl $6, %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 4
|
|
vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 8
|
|
vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 12
|
|
vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
xorl %eax, %eax
|
|
# rnd_all_4: 0-3
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
addl %eax, %r8d
|
|
addl (%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 4(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
addl %eax, %r14d
|
|
addl 8(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 12(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
# rnd_all_4: 1-4
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
addl %eax, %r12d
|
|
addl 16(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 20(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
addl %eax, %r10d
|
|
addl 24(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 28(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
# rnd_all_4: 2-5
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
addl %eax, %r8d
|
|
addl 32(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 36(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
addl %eax, %r14d
|
|
addl 40(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 44(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
# rnd_all_4: 3-6
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
addl %eax, %r12d
|
|
addl 48(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 52(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
addl %eax, %r10d
|
|
addl 56(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 60(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
addl %eax, %r8d
|
|
addl %r8d, (%rdi)
|
|
addl %r9d, 4(%rdi)
|
|
addl %r10d, 8(%rdi)
|
|
addl %r11d, 12(%rdi)
|
|
addl %r12d, 16(%rdi)
|
|
addl %r13d, 20(%rdi)
|
|
addl %r14d, 24(%rdi)
|
|
addl %r15d, 28(%rdi)
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x40, %rsp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX1_RORX_Len
|
|
.type Transform_Sha256_AVX1_RORX_Len,@function
|
|
.align 16
|
|
Transform_Sha256_AVX1_RORX_Len:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX1_RORX_Len
|
|
.p2align 4
|
|
_Transform_Sha256_AVX1_RORX_Len:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
pushq %rbp
|
|
movq %rsi, %rbp
|
|
movq %rdx, %rsi
|
|
subq $0x40, %rsp
|
|
vmovdqa L_avx1_rorx_sha256_flip_mask(%rip), %xmm13
|
|
vmovdqa L_avx1_rorx_sha256_shuf_00BA(%rip), %xmm11
|
|
vmovdqa L_avx1_rorx_sha256_shuf_DC00(%rip), %xmm12
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# Start of loop processing a block
|
|
L_sha256_len_avx1_len_rorx_start:
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rbp), %xmm0
|
|
vmovdqu 16(%rbp), %xmm1
|
|
vpshufb %xmm13, %xmm0, %xmm0
|
|
vpshufb %xmm13, %xmm1, %xmm1
|
|
vmovdqu 32(%rbp), %xmm2
|
|
vmovdqu 48(%rbp), %xmm3
|
|
vpshufb %xmm13, %xmm2, %xmm2
|
|
vpshufb %xmm13, %xmm3, %xmm3
|
|
# set_w_k_xfer_4: 0
|
|
vpaddd 0+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 16+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 32+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 48+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
movl %r9d, %ebx
|
|
rorxl $6, %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 4
|
|
vpaddd 64+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 80+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 96+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 112+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 8
|
|
vpaddd 128+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 144+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 160+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 176+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %xmm2, %xmm3, %xmm4
|
|
vpalignr $4, %xmm0, %xmm1, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm3, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm0, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 4-7
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 16(%rsp), %r11d
|
|
vpalignr $4, %xmm3, %xmm0, %xmm4
|
|
vpalignr $4, %xmm1, %xmm2, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 20(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm0, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 24(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm1, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 28(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm1
|
|
# msg_sched done: 4-7
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 32(%rsp), %r15d
|
|
vpalignr $4, %xmm0, %xmm1, %xmm4
|
|
vpalignr $4, %xmm2, %xmm3, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 36(%rsp), %r14d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpshufd $0xfa, %xmm1, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 40(%rsp), %r13d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm2, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 44(%rsp), %r12d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vpaddd %xmm4, %xmm9, %xmm2
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 12-15
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 48(%rsp), %r11d
|
|
vpalignr $4, %xmm1, %xmm2, %xmm4
|
|
vpalignr $4, %xmm3, %xmm0, %xmm5
|
|
# rnd_0: 1 - 2
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %xmm5, %xmm6
|
|
vpslld $25, %xmm5, %xmm7
|
|
# rnd_0: 3 - 4
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $3, %xmm5, %xmm8
|
|
vpor %xmm6, %xmm7, %xmm7
|
|
# rnd_0: 5 - 7
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 52(%rsp), %r10d
|
|
vpsrld $18, %xmm5, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpslld $14, %xmm5, %xmm5
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpxor %xmm5, %xmm7, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %xmm6, %xmm7, %xmm7
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpshufd $0xfa, %xmm2, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
vpxor %xmm8, %xmm7, %xmm5
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrld $10, %xmm6, %xmm8
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 56(%rsp), %r9d
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
vpaddd %xmm3, %xmm4, %xmm4
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %xmm5, %xmm4, %xmm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpxor %xmm6, %xmm8, %xmm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufb %xmm11, %xmm8, %xmm8
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpaddd %xmm8, %xmm4, %xmm4
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 60(%rsp), %r8d
|
|
vpshufd $0x50, %xmm4, %xmm6
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpsrld $10, %xmm6, %xmm9
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpsrlq $19, %xmm6, %xmm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpsrlq $0x11, %xmm6, %xmm6
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpxor %xmm7, %xmm6, %xmm6
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
vpxor %xmm6, %xmm9, %xmm9
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
vpshufb %xmm12, %xmm9, %xmm9
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vpaddd %xmm4, %xmm9, %xmm3
|
|
# msg_sched done: 12-15
|
|
# set_w_k_xfer_4: 12
|
|
vpaddd 192+L_avx1_rorx_sha256_k(%rip), %xmm0, %xmm4
|
|
vpaddd 208+L_avx1_rorx_sha256_k(%rip), %xmm1, %xmm5
|
|
vmovdqu %xmm4, (%rsp)
|
|
vmovdqu %xmm5, 16(%rsp)
|
|
vpaddd 224+L_avx1_rorx_sha256_k(%rip), %xmm2, %xmm6
|
|
vpaddd 240+L_avx1_rorx_sha256_k(%rip), %xmm3, %xmm7
|
|
vmovdqu %xmm6, 32(%rsp)
|
|
vmovdqu %xmm7, 48(%rsp)
|
|
xorl %eax, %eax
|
|
xorl %ecx, %ecx
|
|
# rnd_all_4: 0-3
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
addl %eax, %r8d
|
|
addl (%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 4(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
addl %eax, %r14d
|
|
addl 8(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 12(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
# rnd_all_4: 1-4
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
addl %eax, %r12d
|
|
addl 16(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 20(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
addl %eax, %r10d
|
|
addl 24(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 28(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
# rnd_all_4: 2-5
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
addl %eax, %r8d
|
|
addl 32(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 36(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
addl %eax, %r14d
|
|
addl 40(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 44(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
# rnd_all_4: 3-6
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
addl %eax, %r12d
|
|
addl 48(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 52(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
addl %eax, %r10d
|
|
addl 56(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 60(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
addl %eax, %r8d
|
|
addl (%rdi), %r8d
|
|
addl 4(%rdi), %r9d
|
|
addl 8(%rdi), %r10d
|
|
addl 12(%rdi), %r11d
|
|
addl 16(%rdi), %r12d
|
|
addl 20(%rdi), %r13d
|
|
addl 24(%rdi), %r14d
|
|
addl 28(%rdi), %r15d
|
|
addq $0x40, %rbp
|
|
subl $0x40, %esi
|
|
movl %r8d, (%rdi)
|
|
movl %r9d, 4(%rdi)
|
|
movl %r10d, 8(%rdi)
|
|
movl %r11d, 12(%rdi)
|
|
movl %r12d, 16(%rdi)
|
|
movl %r13d, 20(%rdi)
|
|
movl %r14d, 24(%rdi)
|
|
movl %r15d, 28(%rdi)
|
|
jnz L_sha256_len_avx1_len_rorx_start
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x40, %rsp
|
|
popq %rbp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len
|
|
#endif /* __APPLE__ */
|
|
#endif /* HAVE_INTEL_AVX1 */
|
|
#ifdef HAVE_INTEL_AVX2
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
L_avx2_sha256_k:
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
|
|
.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
|
|
.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 32
|
|
#else
|
|
.p2align 5
|
|
#endif /* __APPLE__ */
|
|
L_avx2_sha256_shuf_00BA:
|
|
.quad 0xb0a090803020100, 0xffffffffffffffff
|
|
.quad 0xb0a090803020100, 0xffffffffffffffff
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 32
|
|
#else
|
|
.p2align 5
|
|
#endif /* __APPLE__ */
|
|
L_avx2_sha256_shuf_DC00:
|
|
.quad 0xffffffffffffffff, 0xb0a090803020100
|
|
.quad 0xffffffffffffffff, 0xb0a090803020100
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 32
|
|
#else
|
|
.p2align 5
|
|
#endif /* __APPLE__ */
|
|
L_avx2_sha256_flip_mask:
|
|
.quad 0x405060700010203, 0xc0d0e0f08090a0b
|
|
.quad 0x405060700010203, 0xc0d0e0f08090a0b
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX2
|
|
.type Transform_Sha256_AVX2,@function
|
|
.align 16
|
|
Transform_Sha256_AVX2:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX2
|
|
.p2align 4
|
|
_Transform_Sha256_AVX2:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
subq $0x200, %rsp
|
|
leaq 32(%rdi), %rax
|
|
vmovdqa L_avx2_sha256_flip_mask(%rip), %xmm13
|
|
vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11
|
|
vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rax), %xmm0
|
|
vmovdqu 16(%rax), %xmm1
|
|
vpshufb %xmm13, %xmm0, %xmm0
|
|
vpshufb %xmm13, %xmm1, %xmm1
|
|
vmovdqu 32(%rax), %xmm2
|
|
vmovdqu 48(%rax), %xmm3
|
|
vpshufb %xmm13, %xmm2, %xmm2
|
|
vpshufb %xmm13, %xmm3, %xmm3
|
|
movl %r9d, %ebx
|
|
movl %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# set_w_k_xfer_4: 0
|
|
vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, (%rsp)
|
|
vmovdqu %ymm5, 32(%rsp)
|
|
vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 64(%rsp)
|
|
vmovdqu %ymm5, 96(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm3, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 32(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 36(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm0, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 40(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 44(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 16-19
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 64(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 68(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm1, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 72(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 76(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# msg_sched done: 16-19
|
|
# msg_sched: 24-27
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 96(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 100(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm2, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 104(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 108(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# msg_sched done: 24-27
|
|
# set_w_k_xfer_4: 4
|
|
vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, 128(%rsp)
|
|
vmovdqu %ymm5, 160(%rsp)
|
|
vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 192(%rsp)
|
|
vmovdqu %ymm5, 224(%rsp)
|
|
# msg_sched: 32-35
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 128(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 132(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm3, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 136(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 140(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# msg_sched done: 32-35
|
|
# msg_sched: 40-43
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 160(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 164(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm0, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 168(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 172(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# msg_sched done: 40-43
|
|
# msg_sched: 48-51
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 192(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 196(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm1, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 200(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 204(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# msg_sched done: 48-51
|
|
# msg_sched: 56-59
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 224(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 228(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm2, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 232(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 236(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# msg_sched done: 56-59
|
|
# set_w_k_xfer_4: 8
|
|
vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, 256(%rsp)
|
|
vmovdqu %ymm5, 288(%rsp)
|
|
vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 320(%rsp)
|
|
vmovdqu %ymm5, 352(%rsp)
|
|
# msg_sched: 64-67
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 256(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 260(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm3, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 264(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 268(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# msg_sched done: 64-67
|
|
# msg_sched: 72-75
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 288(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 292(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm0, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 296(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 300(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# msg_sched done: 72-75
|
|
# msg_sched: 80-83
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 320(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 324(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm1, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 328(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 332(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# msg_sched done: 80-83
|
|
# msg_sched: 88-91
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 352(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 356(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm2, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 360(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 364(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# msg_sched done: 88-91
|
|
# set_w_k_xfer_4: 12
|
|
vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, 384(%rsp)
|
|
vmovdqu %ymm5, 416(%rsp)
|
|
vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 448(%rsp)
|
|
vmovdqu %ymm5, 480(%rsp)
|
|
# rnd_all_4: 24-27
|
|
addl 384(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 388(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 392(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 396(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 26-29
|
|
addl 416(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 420(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 424(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 428(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 28-31
|
|
addl 448(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 452(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 456(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 460(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 30-33
|
|
addl 480(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 484(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 488(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 492(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
addl %r8d, (%rdi)
|
|
addl %r9d, 4(%rdi)
|
|
addl %r10d, 8(%rdi)
|
|
addl %r11d, 12(%rdi)
|
|
addl %r12d, 16(%rdi)
|
|
addl %r13d, 20(%rdi)
|
|
addl %r14d, 24(%rdi)
|
|
addl %r15d, 28(%rdi)
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x200, %rsp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX2_Len
|
|
.type Transform_Sha256_AVX2_Len,@function
|
|
.align 16
|
|
Transform_Sha256_AVX2_Len:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX2_Len
|
|
.p2align 4
|
|
_Transform_Sha256_AVX2_Len:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
pushq %rbp
|
|
movq %rsi, %rbp
|
|
movq %rdx, %rsi
|
|
subq $0x200, %rsp
|
|
testb $0x40, %sil
|
|
je L_sha256_len_avx2_block
|
|
vmovdqu (%rbp), %ymm0
|
|
vmovdqu 32(%rbp), %ymm1
|
|
vmovups %ymm0, 32(%rdi)
|
|
vmovups %ymm1, 64(%rdi)
|
|
#ifndef __APPLE__
|
|
call Transform_Sha256_AVX2@plt
|
|
#else
|
|
call _Transform_Sha256_AVX2
|
|
#endif /* __APPLE__ */
|
|
addq $0x40, %rbp
|
|
subl $0x40, %esi
|
|
jz L_sha256_len_avx2_done
|
|
L_sha256_len_avx2_block:
|
|
vmovdqa L_avx2_sha256_flip_mask(%rip), %ymm13
|
|
vmovdqa L_avx2_sha256_shuf_00BA(%rip), %ymm11
|
|
vmovdqa L_avx2_sha256_shuf_DC00(%rip), %ymm12
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# Start of loop processing two blocks
|
|
L_sha256_len_avx2_start:
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rbp), %xmm0
|
|
vmovdqu 16(%rbp), %xmm1
|
|
vmovdqu 64(%rbp), %xmm4
|
|
vmovdqu 80(%rbp), %xmm5
|
|
vinserti128 $0x01, %xmm4, %ymm0, %ymm0
|
|
vinserti128 $0x01, %xmm5, %ymm1, %ymm1
|
|
vpshufb %ymm13, %ymm0, %ymm0
|
|
vpshufb %ymm13, %ymm1, %ymm1
|
|
vmovdqu 32(%rbp), %xmm2
|
|
vmovdqu 48(%rbp), %xmm3
|
|
vmovdqu 96(%rbp), %xmm6
|
|
vmovdqu 112(%rbp), %xmm7
|
|
vinserti128 $0x01, %xmm6, %ymm2, %ymm2
|
|
vinserti128 $0x01, %xmm7, %ymm3, %ymm3
|
|
vpshufb %ymm13, %ymm2, %ymm2
|
|
vpshufb %ymm13, %ymm3, %ymm3
|
|
movl %r9d, %ebx
|
|
movl %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# set_w_k_xfer_4: 0
|
|
vpaddd 0+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 32+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, (%rsp)
|
|
vmovdqu %ymm5, 32(%rsp)
|
|
vpaddd 64+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 96+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 64(%rsp)
|
|
vmovdqu %ymm5, 96(%rsp)
|
|
# msg_sched: 0-3
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl (%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm3, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# msg_sched done: 0-3
|
|
# msg_sched: 8-11
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 32(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 36(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm0, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 40(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 44(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# msg_sched done: 8-11
|
|
# msg_sched: 16-19
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 64(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 68(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm1, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 72(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 76(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# msg_sched done: 16-19
|
|
# msg_sched: 24-27
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 96(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 100(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm2, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 104(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 108(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# msg_sched done: 24-27
|
|
# set_w_k_xfer_4: 4
|
|
vpaddd 128+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 160+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, 128(%rsp)
|
|
vmovdqu %ymm5, 160(%rsp)
|
|
vpaddd 192+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 224+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 192(%rsp)
|
|
vmovdqu %ymm5, 224(%rsp)
|
|
# msg_sched: 32-35
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 128(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 132(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm3, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 136(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 140(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# msg_sched done: 32-35
|
|
# msg_sched: 40-43
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 160(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 164(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm0, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 168(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 172(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# msg_sched done: 40-43
|
|
# msg_sched: 48-51
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 192(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 196(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm1, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 200(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 204(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# msg_sched done: 48-51
|
|
# msg_sched: 56-59
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 224(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 228(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm2, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 232(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 236(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# msg_sched done: 56-59
|
|
# set_w_k_xfer_4: 8
|
|
vpaddd 256+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 288+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, 256(%rsp)
|
|
vmovdqu %ymm5, 288(%rsp)
|
|
vpaddd 320+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 352+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 320(%rsp)
|
|
vmovdqu %ymm5, 352(%rsp)
|
|
# msg_sched: 64-67
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 256(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 260(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm3, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 264(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 268(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# msg_sched done: 64-67
|
|
# msg_sched: 72-75
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 288(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 292(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm0, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 296(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 300(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# msg_sched done: 72-75
|
|
# msg_sched: 80-83
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r9d, %eax
|
|
movl %r13d, %ecx
|
|
addl 320(%rsp), %r15d
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
andl %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r14d, %ecx
|
|
xorl %r12d, %edx
|
|
addl %ecx, %r15d
|
|
rorl $6, %edx
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
movl %r8d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r8d, %ebx
|
|
movl %r12d, %ecx
|
|
addl 324(%rsp), %r14d
|
|
xorl %r13d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r11d, %edx
|
|
andl %r11d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r13d, %ecx
|
|
xorl %r11d, %edx
|
|
addl %ecx, %r14d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm1, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
movl %r15d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r15d, %eax
|
|
movl %r11d, %ecx
|
|
addl 328(%rsp), %r13d
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
andl %r10d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r12d, %ecx
|
|
xorl %r10d, %edx
|
|
addl %ecx, %r13d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
movl %r14d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r14d, %ebx
|
|
movl %r10d, %ecx
|
|
addl 332(%rsp), %r12d
|
|
xorl %r11d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r9d, %edx
|
|
andl %r9d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r11d, %ecx
|
|
xorl %r9d, %edx
|
|
addl %ecx, %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
movl %r13d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# msg_sched done: 80-83
|
|
# msg_sched: 88-91
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 1 - 2
|
|
movl %r13d, %eax
|
|
movl %r9d, %ecx
|
|
addl 352(%rsp), %r11d
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
andl %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 3 - 4
|
|
rorl $5, %edx
|
|
xorl %r10d, %ecx
|
|
xorl %r8d, %edx
|
|
addl %ecx, %r11d
|
|
rorl $6, %edx
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
movl %r12d, %ecx
|
|
vpsrld $18, %ymm5, %ymm8
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 5 - 6
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
vpor %ymm6, %ymm7, %ymm6
|
|
vpor %ymm8, %ymm9, %ymm8
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
# rnd_1: 0 - 1
|
|
rorl $14, %edx
|
|
movl %r12d, %ebx
|
|
movl %r8d, %ecx
|
|
addl 356(%rsp), %r10d
|
|
xorl %r9d, %ecx
|
|
vpsrld $3, %ymm5, %ymm9
|
|
vpxor %ymm6, %ymm8, %ymm6
|
|
# rnd_1: 2 - 3
|
|
xorl %r15d, %edx
|
|
andl %r15d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r9d, %ecx
|
|
xorl %r15d, %edx
|
|
addl %ecx, %r10d
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
vpshufd $0xfa, %ymm2, %ymm6
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
movl %r11d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
vpsrld $10, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 6 - 7
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
# rnd_0: 0 - 0
|
|
rorl $14, %edx
|
|
vpsrlq $0x11, %ymm6, %ymm6
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 1 - 3
|
|
movl %r11d, %eax
|
|
movl %r15d, %ecx
|
|
addl 360(%rsp), %r9d
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
andl %r14d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r8d, %ecx
|
|
xorl %r14d, %edx
|
|
addl %ecx, %r9d
|
|
vpxor %ymm6, %ymm7, %ymm6
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 4 - 4
|
|
rorl $6, %edx
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
movl %r10d, %ecx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 5 - 5
|
|
andl %eax, %ebx
|
|
rorl $9, %ecx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
# rnd_1: 0 - 0
|
|
rorl $14, %edx
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_1: 1 - 1
|
|
movl %r10d, %ebx
|
|
movl %r14d, %ecx
|
|
addl 364(%rsp), %r8d
|
|
xorl %r15d, %ecx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 2 - 3
|
|
xorl %r13d, %edx
|
|
andl %r13d, %ecx
|
|
rorl $5, %edx
|
|
xorl %r15d, %ecx
|
|
xorl %r13d, %edx
|
|
addl %ecx, %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
vpxor %ymm8, %ymm7, %ymm8
|
|
# rnd_1: 4 - 5
|
|
rorl $6, %edx
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
movl %r9d, %ecx
|
|
andl %ebx, %eax
|
|
rorl $9, %ecx
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
vpxor %ymm9, %ymm8, %ymm9
|
|
# rnd_1: 6 - 6
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 7 - 7
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# msg_sched done: 88-91
|
|
# set_w_k_xfer_4: 12
|
|
vpaddd 384+L_avx2_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 416+L_avx2_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, 384(%rsp)
|
|
vmovdqu %ymm5, 416(%rsp)
|
|
vpaddd 448+L_avx2_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 480+L_avx2_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 448(%rsp)
|
|
vmovdqu %ymm5, 480(%rsp)
|
|
# rnd_all_4: 24-27
|
|
addl 384(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 388(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 392(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 396(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 26-29
|
|
addl 416(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 420(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 424(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 428(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 28-31
|
|
addl 448(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 452(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 456(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 460(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 30-33
|
|
addl 480(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 484(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 488(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 492(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
addl (%rdi), %r8d
|
|
addl 4(%rdi), %r9d
|
|
addl 8(%rdi), %r10d
|
|
addl 12(%rdi), %r11d
|
|
addl 16(%rdi), %r12d
|
|
addl 20(%rdi), %r13d
|
|
addl 24(%rdi), %r14d
|
|
addl 28(%rdi), %r15d
|
|
movl %r8d, (%rdi)
|
|
movl %r9d, 4(%rdi)
|
|
movl %r10d, 8(%rdi)
|
|
movl %r11d, 12(%rdi)
|
|
movl %r12d, 16(%rdi)
|
|
movl %r13d, 20(%rdi)
|
|
movl %r14d, 24(%rdi)
|
|
movl %r15d, 28(%rdi)
|
|
movl %r9d, %ebx
|
|
movl %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# rnd_all_4: 1-4
|
|
addl 16(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 20(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 24(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 28(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 3-6
|
|
addl 48(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 52(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 56(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 60(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 5-8
|
|
addl 80(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 84(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 88(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 92(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 7-10
|
|
addl 112(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 116(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 120(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 124(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 9-12
|
|
addl 144(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 148(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 152(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 156(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 11-14
|
|
addl 176(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 180(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 184(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 188(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 13-16
|
|
addl 208(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 212(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 216(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 220(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 15-18
|
|
addl 240(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 244(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 248(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 252(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 17-20
|
|
addl 272(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 276(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 280(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 284(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 19-22
|
|
addl 304(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 308(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 312(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 316(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 21-24
|
|
addl 336(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 340(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 344(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 348(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 23-26
|
|
addl 368(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 372(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 376(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 380(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 25-28
|
|
addl 400(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 404(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 408(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 412(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 27-30
|
|
addl 432(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 436(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 440(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 444(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
# rnd_all_4: 29-32
|
|
addl 464(%rsp), %r15d
|
|
movl %r13d, %ecx
|
|
movl %r9d, %eax
|
|
xorl %r14d, %ecx
|
|
rorl $14, %edx
|
|
andl %r12d, %ecx
|
|
xorl %r12d, %edx
|
|
xorl %r14d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r15d
|
|
xorl %r12d, %edx
|
|
xorl %r8d, %eax
|
|
rorl $6, %edx
|
|
movl %r8d, %ecx
|
|
addl %edx, %r15d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r8d, %ecx
|
|
xorl %r9d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %ecx
|
|
addl %ebx, %r15d
|
|
rorl $2, %ecx
|
|
movl %r11d, %edx
|
|
addl %ecx, %r15d
|
|
addl 468(%rsp), %r14d
|
|
movl %r12d, %ecx
|
|
movl %r8d, %ebx
|
|
xorl %r13d, %ecx
|
|
rorl $14, %edx
|
|
andl %r11d, %ecx
|
|
xorl %r11d, %edx
|
|
xorl %r13d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r14d
|
|
xorl %r11d, %edx
|
|
xorl %r15d, %ebx
|
|
rorl $6, %edx
|
|
movl %r15d, %ecx
|
|
addl %edx, %r14d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r15d, %ecx
|
|
xorl %r8d, %eax
|
|
rorl $11, %ecx
|
|
addl %r14d, %r10d
|
|
xorl %r15d, %ecx
|
|
addl %eax, %r14d
|
|
rorl $2, %ecx
|
|
movl %r10d, %edx
|
|
addl %ecx, %r14d
|
|
addl 472(%rsp), %r13d
|
|
movl %r11d, %ecx
|
|
movl %r15d, %eax
|
|
xorl %r12d, %ecx
|
|
rorl $14, %edx
|
|
andl %r10d, %ecx
|
|
xorl %r10d, %edx
|
|
xorl %r12d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r13d
|
|
xorl %r10d, %edx
|
|
xorl %r14d, %eax
|
|
rorl $6, %edx
|
|
movl %r14d, %ecx
|
|
addl %edx, %r13d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r14d, %ecx
|
|
xorl %r15d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %ecx
|
|
addl %ebx, %r13d
|
|
rorl $2, %ecx
|
|
movl %r9d, %edx
|
|
addl %ecx, %r13d
|
|
addl 476(%rsp), %r12d
|
|
movl %r10d, %ecx
|
|
movl %r14d, %ebx
|
|
xorl %r11d, %ecx
|
|
rorl $14, %edx
|
|
andl %r9d, %ecx
|
|
xorl %r9d, %edx
|
|
xorl %r11d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r12d
|
|
xorl %r9d, %edx
|
|
xorl %r13d, %ebx
|
|
rorl $6, %edx
|
|
movl %r13d, %ecx
|
|
addl %edx, %r12d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r13d, %ecx
|
|
xorl %r14d, %eax
|
|
rorl $11, %ecx
|
|
addl %r12d, %r8d
|
|
xorl %r13d, %ecx
|
|
addl %eax, %r12d
|
|
rorl $2, %ecx
|
|
movl %r8d, %edx
|
|
addl %ecx, %r12d
|
|
# rnd_all_4: 31-34
|
|
addl 496(%rsp), %r11d
|
|
movl %r9d, %ecx
|
|
movl %r13d, %eax
|
|
xorl %r10d, %ecx
|
|
rorl $14, %edx
|
|
andl %r8d, %ecx
|
|
xorl %r8d, %edx
|
|
xorl %r10d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r11d
|
|
xorl %r8d, %edx
|
|
xorl %r12d, %eax
|
|
rorl $6, %edx
|
|
movl %r12d, %ecx
|
|
addl %edx, %r11d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r12d, %ecx
|
|
xorl %r13d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %ecx
|
|
addl %ebx, %r11d
|
|
rorl $2, %ecx
|
|
movl %r15d, %edx
|
|
addl %ecx, %r11d
|
|
addl 500(%rsp), %r10d
|
|
movl %r8d, %ecx
|
|
movl %r12d, %ebx
|
|
xorl %r9d, %ecx
|
|
rorl $14, %edx
|
|
andl %r15d, %ecx
|
|
xorl %r15d, %edx
|
|
xorl %r9d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r10d
|
|
xorl %r15d, %edx
|
|
xorl %r11d, %ebx
|
|
rorl $6, %edx
|
|
movl %r11d, %ecx
|
|
addl %edx, %r10d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r11d, %ecx
|
|
xorl %r12d, %eax
|
|
rorl $11, %ecx
|
|
addl %r10d, %r14d
|
|
xorl %r11d, %ecx
|
|
addl %eax, %r10d
|
|
rorl $2, %ecx
|
|
movl %r14d, %edx
|
|
addl %ecx, %r10d
|
|
addl 504(%rsp), %r9d
|
|
movl %r15d, %ecx
|
|
movl %r11d, %eax
|
|
xorl %r8d, %ecx
|
|
rorl $14, %edx
|
|
andl %r14d, %ecx
|
|
xorl %r14d, %edx
|
|
xorl %r8d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r9d
|
|
xorl %r14d, %edx
|
|
xorl %r10d, %eax
|
|
rorl $6, %edx
|
|
movl %r10d, %ecx
|
|
addl %edx, %r9d
|
|
rorl $9, %ecx
|
|
andl %eax, %ebx
|
|
xorl %r10d, %ecx
|
|
xorl %r11d, %ebx
|
|
rorl $11, %ecx
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %ecx
|
|
addl %ebx, %r9d
|
|
rorl $2, %ecx
|
|
movl %r13d, %edx
|
|
addl %ecx, %r9d
|
|
addl 508(%rsp), %r8d
|
|
movl %r14d, %ecx
|
|
movl %r10d, %ebx
|
|
xorl %r15d, %ecx
|
|
rorl $14, %edx
|
|
andl %r13d, %ecx
|
|
xorl %r13d, %edx
|
|
xorl %r15d, %ecx
|
|
rorl $5, %edx
|
|
addl %ecx, %r8d
|
|
xorl %r13d, %edx
|
|
xorl %r9d, %ebx
|
|
rorl $6, %edx
|
|
movl %r9d, %ecx
|
|
addl %edx, %r8d
|
|
rorl $9, %ecx
|
|
andl %ebx, %eax
|
|
xorl %r9d, %ecx
|
|
xorl %r10d, %eax
|
|
rorl $11, %ecx
|
|
addl %r8d, %r12d
|
|
xorl %r9d, %ecx
|
|
addl %eax, %r8d
|
|
rorl $2, %ecx
|
|
movl %r12d, %edx
|
|
addl %ecx, %r8d
|
|
addl (%rdi), %r8d
|
|
addl 4(%rdi), %r9d
|
|
addl 8(%rdi), %r10d
|
|
addl 12(%rdi), %r11d
|
|
addl 16(%rdi), %r12d
|
|
addl 20(%rdi), %r13d
|
|
addl 24(%rdi), %r14d
|
|
addl 28(%rdi), %r15d
|
|
addq $0x80, %rbp
|
|
subl $0x80, %esi
|
|
movl %r8d, (%rdi)
|
|
movl %r9d, 4(%rdi)
|
|
movl %r10d, 8(%rdi)
|
|
movl %r11d, 12(%rdi)
|
|
movl %r12d, 16(%rdi)
|
|
movl %r13d, 20(%rdi)
|
|
movl %r14d, 24(%rdi)
|
|
movl %r15d, 28(%rdi)
|
|
jnz L_sha256_len_avx2_start
|
|
L_sha256_len_avx2_done:
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x200, %rsp
|
|
popq %rbp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
L_avx2_rorx_sha256_k:
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
|
.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
|
|
.long 0xe49b69c1,0xefbe4786,0xfc19dc6,0x240ca1cc
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
|
.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
|
|
.long 0xc6e00bf3,0xd5a79147,0x6ca6351,0x14292967
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 32
|
|
#else
|
|
.p2align 5
|
|
#endif /* __APPLE__ */
|
|
L_avx2_rorx_sha256_flip_mask:
|
|
.quad 0x405060700010203, 0xc0d0e0f08090a0b
|
|
.quad 0x405060700010203, 0xc0d0e0f08090a0b
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 32
|
|
#else
|
|
.p2align 5
|
|
#endif /* __APPLE__ */
|
|
L_avx2_rorx_sha256_shuf_00BA:
|
|
.quad 0xb0a090803020100, 0xffffffffffffffff
|
|
.quad 0xb0a090803020100, 0xffffffffffffffff
|
|
#ifndef __APPLE__
|
|
.data
|
|
#else
|
|
.section __DATA,__data
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.align 32
|
|
#else
|
|
.p2align 5
|
|
#endif /* __APPLE__ */
|
|
L_avx2_rorx_sha256_shuf_DC00:
|
|
.quad 0xffffffffffffffff, 0xb0a090803020100
|
|
.quad 0xffffffffffffffff, 0xb0a090803020100
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX2_RORX
|
|
.type Transform_Sha256_AVX2_RORX,@function
|
|
.align 16
|
|
Transform_Sha256_AVX2_RORX:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX2_RORX
|
|
.p2align 4
|
|
_Transform_Sha256_AVX2_RORX:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
subq $0x200, %rsp
|
|
leaq 32(%rdi), %rax
|
|
vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %xmm13
|
|
vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11
|
|
vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rax), %xmm0
|
|
vmovdqu 16(%rax), %xmm1
|
|
vpshufb %xmm13, %xmm0, %xmm0
|
|
vpshufb %xmm13, %xmm1, %xmm1
|
|
vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, (%rsp)
|
|
vmovdqu %ymm5, 32(%rsp)
|
|
vmovdqu 32(%rax), %xmm2
|
|
vmovdqu 48(%rax), %xmm3
|
|
vpshufb %xmm13, %xmm2, %xmm2
|
|
vpshufb %xmm13, %xmm3, %xmm3
|
|
vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 64(%rsp)
|
|
vmovdqu %ymm5, 96(%rsp)
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
movl %r9d, %ebx
|
|
rorxl $6, %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm3, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 128(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 32(%rsp), %r11d
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 36(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm0, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 40(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 44(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 160(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 64(%rsp), %r15d
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 68(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm1, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 72(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 76(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 192(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 96(%rsp), %r11d
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 100(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm2, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 104(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 108(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 224(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 128(%rsp), %r15d
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 132(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm3, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 136(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 140(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 256(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 160(%rsp), %r11d
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 164(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm0, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 168(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 172(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 288(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 192(%rsp), %r15d
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 196(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm1, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 200(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 204(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 320(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 224(%rsp), %r11d
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 228(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm2, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 232(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 236(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 352(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 256(%rsp), %r15d
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 260(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm3, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 264(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 268(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 384(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 288(%rsp), %r11d
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 292(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm0, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 296(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 300(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 416(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 320(%rsp), %r15d
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 324(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm1, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 328(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 332(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 448(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 352(%rsp), %r11d
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 356(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm2, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 360(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 364(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 480(%rsp)
|
|
xorl %eax, %eax
|
|
xorl %ecx, %ecx
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 384(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 388(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 392(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 396(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 416(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 420(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 424(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 428(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 448(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 452(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 456(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 460(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 480(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 484(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 488(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 492(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
addl %eax, %r8d
|
|
addl %r8d, (%rdi)
|
|
addl %r9d, 4(%rdi)
|
|
addl %r10d, 8(%rdi)
|
|
addl %r11d, 12(%rdi)
|
|
addl %r12d, 16(%rdi)
|
|
addl %r13d, 20(%rdi)
|
|
addl %r14d, 24(%rdi)
|
|
addl %r15d, 28(%rdi)
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x200, %rsp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX
|
|
#endif /* __APPLE__ */
|
|
#ifndef __APPLE__
|
|
.text
|
|
.globl Transform_Sha256_AVX2_RORX_Len
|
|
.type Transform_Sha256_AVX2_RORX_Len,@function
|
|
.align 16
|
|
Transform_Sha256_AVX2_RORX_Len:
|
|
#else
|
|
.section __TEXT,__text
|
|
.globl _Transform_Sha256_AVX2_RORX_Len
|
|
.p2align 4
|
|
_Transform_Sha256_AVX2_RORX_Len:
|
|
#endif /* __APPLE__ */
|
|
pushq %rbx
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
pushq %rbp
|
|
movq %rsi, %rbp
|
|
movq %rdx, %rsi
|
|
subq $0x200, %rsp
|
|
testb $0x40, %sil
|
|
je L_sha256_len_avx2_rorx_block
|
|
vmovdqu (%rbp), %ymm0
|
|
vmovdqu 32(%rbp), %ymm1
|
|
vmovups %ymm0, 32(%rdi)
|
|
vmovups %ymm1, 64(%rdi)
|
|
#ifndef __APPLE__
|
|
call Transform_Sha256_AVX2_RORX@plt
|
|
#else
|
|
call _Transform_Sha256_AVX2_RORX
|
|
#endif /* __APPLE__ */
|
|
addq $0x40, %rbp
|
|
subl $0x40, %esi
|
|
jz L_sha256_len_avx2_rorx_done
|
|
L_sha256_len_avx2_rorx_block:
|
|
vmovdqa L_avx2_rorx_sha256_flip_mask(%rip), %ymm13
|
|
vmovdqa L_avx2_rorx_sha256_shuf_00BA(%rip), %ymm11
|
|
vmovdqa L_avx2_rorx_sha256_shuf_DC00(%rip), %ymm12
|
|
movl (%rdi), %r8d
|
|
movl 4(%rdi), %r9d
|
|
movl 8(%rdi), %r10d
|
|
movl 12(%rdi), %r11d
|
|
movl 16(%rdi), %r12d
|
|
movl 20(%rdi), %r13d
|
|
movl 24(%rdi), %r14d
|
|
movl 28(%rdi), %r15d
|
|
# Start of loop processing two blocks
|
|
L_sha256_len_avx2_rorx_start:
|
|
# X0, X1, X2, X3 = W[0..15]
|
|
vmovdqu (%rbp), %xmm0
|
|
vmovdqu 16(%rbp), %xmm1
|
|
vinserti128 $0x01, 64(%rbp), %ymm0, %ymm0
|
|
vinserti128 $0x01, 80(%rbp), %ymm1, %ymm1
|
|
vpshufb %ymm13, %ymm0, %ymm0
|
|
vpshufb %ymm13, %ymm1, %ymm1
|
|
vpaddd 0+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
vpaddd 32+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm5
|
|
vmovdqu %ymm4, (%rsp)
|
|
vmovdqu %ymm5, 32(%rsp)
|
|
vmovdqu 32(%rbp), %xmm2
|
|
vmovdqu 48(%rbp), %xmm3
|
|
vinserti128 $0x01, 96(%rbp), %ymm2, %ymm2
|
|
vinserti128 $0x01, 112(%rbp), %ymm3, %ymm3
|
|
vpshufb %ymm13, %ymm2, %ymm2
|
|
vpshufb %ymm13, %ymm3, %ymm3
|
|
vpaddd 64+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
vpaddd 96+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm5
|
|
vmovdqu %ymm4, 64(%rsp)
|
|
vmovdqu %ymm5, 96(%rsp)
|
|
movl %r9d, %ebx
|
|
rorxl $6, %r12d, %edx
|
|
xorl %r10d, %ebx
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl (%rsp), %r15d
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 4(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm3, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 8(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 12(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 128+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 128(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 32(%rsp), %r11d
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 36(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm0, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 40(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 44(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 160+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 160(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 64(%rsp), %r15d
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 68(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm1, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 72(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 76(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 192+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 192(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 96(%rsp), %r11d
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 100(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm2, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 104(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 108(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 224+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 224(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 128(%rsp), %r15d
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 132(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm3, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 136(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 140(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 256+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 256(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 160(%rsp), %r11d
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 164(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm0, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 168(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 172(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 288+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 288(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 192(%rsp), %r15d
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 196(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm1, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 200(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 204(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 320+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 320(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 224(%rsp), %r11d
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 228(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm2, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 232(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 236(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 352+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 352(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 256(%rsp), %r15d
|
|
vpalignr $4, %ymm0, %ymm1, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm2, %ymm3, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 260(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm3, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm0, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 264(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 268(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm0
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 384+L_avx2_rorx_sha256_k(%rip), %ymm0, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 384(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 288(%rsp), %r11d
|
|
vpalignr $4, %ymm1, %ymm2, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm3, %ymm0, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 292(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm0, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm1, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 296(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 300(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm1
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 416+L_avx2_rorx_sha256_k(%rip), %ymm1, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 416(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r13d, %eax
|
|
rorxl $11, %r12d, %ecx
|
|
addl 320(%rsp), %r15d
|
|
vpalignr $4, %ymm2, %ymm3, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
vpalignr $4, %ymm0, %ymm1, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r12d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
xorl %r14d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r8d, %eax
|
|
addl %edx, %r15d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
addl %ebx, %r15d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r12d, %ebx
|
|
rorxl $11, %r11d, %ecx
|
|
addl 324(%rsp), %r14d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r11d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
vpshufd $0xfa, %ymm1, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
xorl %r13d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r14d, %r10d
|
|
movl %r8d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r15d, %ebx
|
|
addl %edx, %r14d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
addl %eax, %r14d
|
|
vpaddd %ymm2, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r11d, %eax
|
|
rorxl $11, %r10d, %ecx
|
|
addl 328(%rsp), %r13d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r10d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
xorl %r12d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r14d, %eax
|
|
addl %edx, %r13d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
addl %ebx, %r13d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r10d, %ebx
|
|
rorxl $11, %r9d, %ecx
|
|
addl 332(%rsp), %r12d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r9d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
xorl %r11d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
vpaddd %ymm4, %ymm9, %ymm2
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r12d, %r8d
|
|
movl %r14d, %ebx
|
|
vpaddd 448+L_avx2_rorx_sha256_k(%rip), %ymm2, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r13d, %ebx
|
|
addl %edx, %r12d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
addl %eax, %r12d
|
|
vmovdqu %ymm4, 448(%rsp)
|
|
# rnd_0: 0 - 0
|
|
movl %r9d, %eax
|
|
rorxl $11, %r8d, %ecx
|
|
addl 352(%rsp), %r11d
|
|
vpalignr $4, %ymm3, %ymm0, %ymm5
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
vpalignr $4, %ymm1, %ymm2, %ymm4
|
|
# rnd_0: 2 - 2
|
|
andl %r8d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
vpsrld $7, %ymm5, %ymm6
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
xorl %r10d, %eax
|
|
vpslld $25, %ymm5, %ymm7
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
vpsrld $18, %ymm5, %ymm8
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
vpslld $14, %ymm5, %ymm9
|
|
# rnd_0: 6 - 6
|
|
xorl %r12d, %eax
|
|
addl %edx, %r11d
|
|
andl %eax, %ebx
|
|
vpor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 7 - 7
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
addl %ebx, %r11d
|
|
vpor %ymm9, %ymm8, %ymm8
|
|
# rnd_1: 0 - 0
|
|
movl %r8d, %ebx
|
|
rorxl $11, %r15d, %ecx
|
|
addl 356(%rsp), %r10d
|
|
vpsrld $3, %ymm5, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
vpxor %ymm8, %ymm6, %ymm6
|
|
# rnd_1: 2 - 2
|
|
andl %r15d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
vpshufd $0xfa, %ymm2, %ymm7
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
xorl %r9d, %ebx
|
|
vpxor %ymm6, %ymm9, %ymm5
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
vpsrld $10, %ymm7, %ymm8
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r10d, %r14d
|
|
movl %r12d, %ebx
|
|
vpsrlq $19, %ymm7, %ymm6
|
|
# rnd_1: 6 - 6
|
|
xorl %r11d, %ebx
|
|
addl %edx, %r10d
|
|
andl %ebx, %eax
|
|
vpsrlq $0x11, %ymm7, %ymm7
|
|
# rnd_1: 7 - 7
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
addl %eax, %r10d
|
|
vpaddd %ymm3, %ymm4, %ymm4
|
|
# rnd_0: 0 - 0
|
|
movl %r15d, %eax
|
|
rorxl $11, %r14d, %ecx
|
|
addl 360(%rsp), %r9d
|
|
vpxor %ymm7, %ymm6, %ymm6
|
|
# rnd_0: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
vpxor %ymm6, %ymm8, %ymm8
|
|
# rnd_0: 2 - 2
|
|
andl %r14d, %eax
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
vpaddd %ymm5, %ymm4, %ymm4
|
|
# rnd_0: 3 - 3
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
xorl %r8d, %eax
|
|
vpshufb %ymm11, %ymm8, %ymm8
|
|
# rnd_0: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
vpaddd %ymm8, %ymm4, %ymm4
|
|
# rnd_0: 5 - 5
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
vpshufd $0x50, %ymm4, %ymm6
|
|
# rnd_0: 6 - 6
|
|
xorl %r10d, %eax
|
|
addl %edx, %r9d
|
|
andl %eax, %ebx
|
|
vpsrlq $0x11, %ymm6, %ymm8
|
|
# rnd_0: 7 - 7
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
addl %ebx, %r9d
|
|
vpsrlq $19, %ymm6, %ymm7
|
|
# rnd_1: 0 - 0
|
|
movl %r14d, %ebx
|
|
rorxl $11, %r13d, %ecx
|
|
addl 364(%rsp), %r8d
|
|
vpsrld $10, %ymm6, %ymm9
|
|
# rnd_1: 1 - 1
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
vpxor %ymm7, %ymm8, %ymm8
|
|
# rnd_1: 2 - 2
|
|
andl %r13d, %ebx
|
|
xorl %ecx, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
vpxor %ymm8, %ymm9, %ymm9
|
|
# rnd_1: 3 - 3
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
xorl %r15d, %ebx
|
|
vpshufb %ymm12, %ymm9, %ymm9
|
|
# rnd_1: 4 - 4
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
vpaddd %ymm4, %ymm9, %ymm3
|
|
# rnd_1: 5 - 5
|
|
xorl %ecx, %edx
|
|
addl %r8d, %r12d
|
|
movl %r10d, %ebx
|
|
vpaddd 480+L_avx2_rorx_sha256_k(%rip), %ymm3, %ymm4
|
|
# rnd_1: 6 - 6
|
|
xorl %r9d, %ebx
|
|
addl %edx, %r8d
|
|
andl %ebx, %eax
|
|
# rnd_1: 7 - 7
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
addl %eax, %r8d
|
|
vmovdqu %ymm4, 480(%rsp)
|
|
xorl %eax, %eax
|
|
xorl %ecx, %ecx
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 384(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 388(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 392(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 396(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 416(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 420(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 424(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 428(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 448(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 452(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 456(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 460(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 480(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 484(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 488(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 492(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
addl %eax, %r8d
|
|
xorl %ecx, %ecx
|
|
addl (%rdi), %r8d
|
|
addl 4(%rdi), %r9d
|
|
addl 8(%rdi), %r10d
|
|
addl 12(%rdi), %r11d
|
|
addl 16(%rdi), %r12d
|
|
addl 20(%rdi), %r13d
|
|
addl 24(%rdi), %r14d
|
|
addl 28(%rdi), %r15d
|
|
movl %r8d, (%rdi)
|
|
movl %r9d, 4(%rdi)
|
|
movl %r10d, 8(%rdi)
|
|
movl %r11d, 12(%rdi)
|
|
movl %r12d, 16(%rdi)
|
|
movl %r13d, 20(%rdi)
|
|
movl %r14d, 24(%rdi)
|
|
movl %r15d, 28(%rdi)
|
|
movl %r9d, %ebx
|
|
xorl %eax, %eax
|
|
xorl %r10d, %ebx
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 16(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 20(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 24(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 28(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 48(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 52(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 56(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 60(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 80(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 84(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 88(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 92(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 112(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 116(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 120(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 124(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 144(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 148(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 152(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 156(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 176(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 180(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 184(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 188(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 208(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 212(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 216(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 220(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 240(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 244(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 248(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 252(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 272(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 276(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 280(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 284(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 304(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 308(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 312(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 316(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 336(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 340(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 344(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 348(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 368(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 372(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 376(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 380(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 400(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 404(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 408(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 412(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 432(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 436(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 440(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 444(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
rorxl $6, %r12d, %edx
|
|
rorxl $11, %r12d, %ecx
|
|
leal (%r8,%rax,1), %r8d
|
|
addl 464(%rsp), %r15d
|
|
movl %r13d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r14d, %eax
|
|
rorxl $25, %r12d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r12d, %eax
|
|
addl %edx, %r15d
|
|
rorxl $2, %r8d, %edx
|
|
rorxl $13, %r8d, %ecx
|
|
xorl %r14d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r8d, %edx
|
|
addl %eax, %r15d
|
|
xorl %ecx, %edx
|
|
movl %r9d, %eax
|
|
addl %r15d, %r11d
|
|
xorl %r8d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r15d
|
|
xorl %r9d, %ebx
|
|
rorxl $6, %r11d, %edx
|
|
rorxl $11, %r11d, %ecx
|
|
addl %ebx, %r15d
|
|
addl 468(%rsp), %r14d
|
|
movl %r12d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r13d, %ebx
|
|
rorxl $25, %r11d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r11d, %ebx
|
|
addl %edx, %r14d
|
|
rorxl $2, %r15d, %edx
|
|
rorxl $13, %r15d, %ecx
|
|
xorl %r13d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r15d, %edx
|
|
addl %ebx, %r14d
|
|
xorl %ecx, %edx
|
|
movl %r8d, %ebx
|
|
leal (%r10,%r14,1), %r10d
|
|
xorl %r15d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r14d
|
|
xorl %r8d, %eax
|
|
rorxl $6, %r10d, %edx
|
|
rorxl $11, %r10d, %ecx
|
|
leal (%r14,%rax,1), %r14d
|
|
addl 472(%rsp), %r13d
|
|
movl %r11d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r12d, %eax
|
|
rorxl $25, %r10d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r10d, %eax
|
|
addl %edx, %r13d
|
|
rorxl $2, %r14d, %edx
|
|
rorxl $13, %r14d, %ecx
|
|
xorl %r12d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r14d, %edx
|
|
addl %eax, %r13d
|
|
xorl %ecx, %edx
|
|
movl %r15d, %eax
|
|
addl %r13d, %r9d
|
|
xorl %r14d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r13d
|
|
xorl %r15d, %ebx
|
|
rorxl $6, %r9d, %edx
|
|
rorxl $11, %r9d, %ecx
|
|
addl %ebx, %r13d
|
|
addl 476(%rsp), %r12d
|
|
movl %r10d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r11d, %ebx
|
|
rorxl $25, %r9d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r9d, %ebx
|
|
addl %edx, %r12d
|
|
rorxl $2, %r13d, %edx
|
|
rorxl $13, %r13d, %ecx
|
|
xorl %r11d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r13d, %edx
|
|
addl %ebx, %r12d
|
|
xorl %ecx, %edx
|
|
movl %r14d, %ebx
|
|
leal (%r8,%r12,1), %r8d
|
|
xorl %r13d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r12d
|
|
xorl %r14d, %eax
|
|
rorxl $6, %r8d, %edx
|
|
rorxl $11, %r8d, %ecx
|
|
leal (%r12,%rax,1), %r12d
|
|
addl 496(%rsp), %r11d
|
|
movl %r9d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r10d, %eax
|
|
rorxl $25, %r8d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r8d, %eax
|
|
addl %edx, %r11d
|
|
rorxl $2, %r12d, %edx
|
|
rorxl $13, %r12d, %ecx
|
|
xorl %r10d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r12d, %edx
|
|
addl %eax, %r11d
|
|
xorl %ecx, %edx
|
|
movl %r13d, %eax
|
|
addl %r11d, %r15d
|
|
xorl %r12d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r11d
|
|
xorl %r13d, %ebx
|
|
rorxl $6, %r15d, %edx
|
|
rorxl $11, %r15d, %ecx
|
|
addl %ebx, %r11d
|
|
addl 500(%rsp), %r10d
|
|
movl %r8d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r9d, %ebx
|
|
rorxl $25, %r15d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r15d, %ebx
|
|
addl %edx, %r10d
|
|
rorxl $2, %r11d, %edx
|
|
rorxl $13, %r11d, %ecx
|
|
xorl %r9d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r11d, %edx
|
|
addl %ebx, %r10d
|
|
xorl %ecx, %edx
|
|
movl %r12d, %ebx
|
|
leal (%r14,%r10,1), %r14d
|
|
xorl %r11d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r10d
|
|
xorl %r12d, %eax
|
|
rorxl $6, %r14d, %edx
|
|
rorxl $11, %r14d, %ecx
|
|
leal (%r10,%rax,1), %r10d
|
|
addl 504(%rsp), %r9d
|
|
movl %r15d, %eax
|
|
xorl %edx, %ecx
|
|
xorl %r8d, %eax
|
|
rorxl $25, %r14d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r14d, %eax
|
|
addl %edx, %r9d
|
|
rorxl $2, %r10d, %edx
|
|
rorxl $13, %r10d, %ecx
|
|
xorl %r8d, %eax
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r10d, %edx
|
|
addl %eax, %r9d
|
|
xorl %ecx, %edx
|
|
movl %r11d, %eax
|
|
addl %r9d, %r13d
|
|
xorl %r10d, %eax
|
|
andl %eax, %ebx
|
|
addl %edx, %r9d
|
|
xorl %r11d, %ebx
|
|
rorxl $6, %r13d, %edx
|
|
rorxl $11, %r13d, %ecx
|
|
addl %ebx, %r9d
|
|
addl 508(%rsp), %r8d
|
|
movl %r14d, %ebx
|
|
xorl %edx, %ecx
|
|
xorl %r15d, %ebx
|
|
rorxl $25, %r13d, %edx
|
|
xorl %ecx, %edx
|
|
andl %r13d, %ebx
|
|
addl %edx, %r8d
|
|
rorxl $2, %r9d, %edx
|
|
rorxl $13, %r9d, %ecx
|
|
xorl %r15d, %ebx
|
|
xorl %edx, %ecx
|
|
rorxl $22, %r9d, %edx
|
|
addl %ebx, %r8d
|
|
xorl %ecx, %edx
|
|
movl %r10d, %ebx
|
|
leal (%r12,%r8,1), %r12d
|
|
xorl %r9d, %ebx
|
|
andl %ebx, %eax
|
|
addl %edx, %r8d
|
|
xorl %r10d, %eax
|
|
addl %eax, %r8d
|
|
addq $0x80, %rbp
|
|
addl (%rdi), %r8d
|
|
addl 4(%rdi), %r9d
|
|
addl 8(%rdi), %r10d
|
|
addl 12(%rdi), %r11d
|
|
addl 16(%rdi), %r12d
|
|
addl 20(%rdi), %r13d
|
|
addl 24(%rdi), %r14d
|
|
addl 28(%rdi), %r15d
|
|
subl $0x80, %esi
|
|
movl %r8d, (%rdi)
|
|
movl %r9d, 4(%rdi)
|
|
movl %r10d, 8(%rdi)
|
|
movl %r11d, 12(%rdi)
|
|
movl %r12d, 16(%rdi)
|
|
movl %r13d, 20(%rdi)
|
|
movl %r14d, 24(%rdi)
|
|
movl %r15d, 28(%rdi)
|
|
jnz L_sha256_len_avx2_rorx_start
|
|
L_sha256_len_avx2_rorx_done:
|
|
xorq %rax, %rax
|
|
vzeroupper
|
|
addq $0x200, %rsp
|
|
popq %rbp
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbx
|
|
repz retq
|
|
#ifndef __APPLE__
|
|
.size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len
|
|
#endif /* __APPLE__ */
|
|
#endif /* HAVE_INTEL_AVX2 */
|
|
#endif /* WOLFSSL_X86_64_BUILD */
|
|
|
|
#if defined(__linux__) && defined(__ELF__)
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|