Get Mac OS X working with the x86_64 assembly files

This commit is contained in:
Sean Parkinson
2019-02-15 15:08:47 +10:00
parent 64cb07557d
commit 16f31cf8c6
8 changed files with 1690 additions and 97 deletions

View File

@ -608,10 +608,12 @@
#define AESNI_ALIGN 16
#endif
#ifndef _MSC_VER
#define XASM_LINK(f) asm(f)
#else
#ifdef _MSC_VER
#define XASM_LINK(f)
#elif defined(__APPLE__)
#define XASM_LINK(f) asm("_" f)
#else
#define XASM_LINK(f) asm(f)
#endif /* _MSC_VER */
static int checkAESNI = 0;

View File

@ -36,8 +36,13 @@ AES_CBC_encrypt (const unsigned char *in,
const unsigned char *KS,
int nr)
*/
#ifndef __APPLE__
.globl AES_CBC_encrypt
AES_CBC_encrypt:
#else
.globl _AES_CBC_encrypt
_AES_CBC_encrypt:
#endif
# parameter 1: %rdi
# parameter 2: %rsi
# parameter 3: %rdx
@ -96,8 +101,13 @@ AES_CBC_decrypt_by4 (const unsigned char *in,
const unsigned char *KS,
int nr)
*/
#ifndef __APPLE__
.globl AES_CBC_decrypt_by4
AES_CBC_decrypt_by4:
#else
.globl _AES_CBC_decrypt_by4
_AES_CBC_decrypt_by4:
#endif
# parameter 1: %rdi
# parameter 2: %rsi
# parameter 3: %rdx
@ -272,8 +282,13 @@ AES_CBC_decrypt_by6 (const unsigned char *in,
const unsigned char *KS,
int nr)
*/
#ifndef __APPLE__
.globl AES_CBC_decrypt_by6
AES_CBC_decrypt_by6:
#else
.globl _AES_CBC_decrypt_by6
_AES_CBC_decrypt_by6:
#endif
# parameter 1: %rdi - in
# parameter 2: %rsi - out
# parameter 3: %rdx - ivec
@ -495,8 +510,13 @@ AES_CBC_decrypt_by8 (const unsigned char *in,
const unsigned char *KS,
int nr)
*/
#ifndef __APPLE__
.globl AES_CBC_decrypt_by8
AES_CBC_decrypt_by8:
#else
.globl _AES_CBC_decrypt_by8
_AES_CBC_decrypt_by8:
#endif
# parameter 1: %rdi - in
# parameter 2: %rsi - out
# parameter 3: %rdx - ivec
@ -746,8 +766,13 @@ AES_ECB_encrypt (const unsigned char *in,
const unsigned char *KS,
int nr)
*/
#ifndef __APPLE__
.globl AES_ECB_encrypt
AES_ECB_encrypt:
#else
.globl _AES_ECB_encrypt
_AES_ECB_encrypt:
#endif
# parameter 1: %rdi
# parameter 2: %rsi
# parameter 3: %rdx
@ -905,8 +930,13 @@ AES_ECB_decrypt (const unsigned char *in,
const unsigned char *KS,
int nr)
*/
#ifndef __APPLE__
.globl AES_ECB_decrypt
AES_ECB_decrypt:
#else
.globl _AES_ECB_decrypt
_AES_ECB_decrypt:
#endif
# parameter 1: %rdi
# parameter 2: %rsi
# parameter 3: %rdx
@ -1065,8 +1095,13 @@ void AES_128_Key_Expansion(const unsigned char* userkey,
unsigned char* key_schedule);
*/
.align 16,0x90
#ifndef __APPLE__
.globl AES_128_Key_Expansion
AES_128_Key_Expansion:
#else
.globl _AES_128_Key_Expansion
_AES_128_Key_Expansion:
#endif
# parameter 1: %rdi
# parameter 2: %rsi
movl $10, 240(%rsi)
@ -1125,8 +1160,13 @@ ret
void AES_192_Key_Expansion (const unsigned char *userkey,
unsigned char *key)
*/
#ifndef __APPLE__
.globl AES_192_Key_Expansion
AES_192_Key_Expansion:
#else
.globl _AES_192_Key_Expansion
_AES_192_Key_Expansion:
#endif
# parameter 1: %rdi
# parameter 2: %rsi
@ -1211,8 +1251,13 @@ ret
void AES_256_Key_Expansion (const unsigned char *userkey,
unsigned char *key)
*/
#ifndef __APPLE__
.globl AES_256_Key_Expansion
AES_256_Key_Expansion:
#else
.globl _AES_256_Key_Expansion
_AES_256_Key_Expansion:
#endif
# parameter 1: %rdi
# parameter 2: %rsi

View File

@ -26,43 +26,150 @@
#define HAVE_INTEL_AVX2
#endif /* HAVE_INTEL_AVX2 */
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_one:
.quad 0x0, 0x1
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_two:
.quad 0x0, 0x2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_three:
.quad 0x0, 0x3
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_four:
.quad 0x0, 0x4
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_five:
.quad 0x0, 0x5
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_six:
.quad 0x0, 0x6
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_seven:
.quad 0x0, 0x7
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_eight:
.quad 0x0, 0x8
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_bswap_epi64:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_bswap_mask:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_aes_gcm_mod2_128:
.quad 0x1, 0xc200000000000000
#ifndef __APPLE__
.text
.globl AES_GCM_encrypt
.type AES_GCM_encrypt,@function
.align 4
AES_GCM_encrypt:
#else
.section __TEXT,__text
.globl _AES_GCM_encrypt
.p2align 2
_AES_GCM_encrypt:
#endif /* __APPLE__ */
pushq %r13
pushq %r12
pushq %rbx
@ -70,11 +177,11 @@ AES_GCM_encrypt:
pushq %r15
movq %rdx, %r12
movq %rcx, %rax
movq 48(%rsp), %r11
movq 56(%rsp), %rbx
movq 64(%rsp), %r14
movl 48(%rsp), %r11d
movl 56(%rsp), %ebx
movl 64(%rsp), %r14d
movq 72(%rsp), %r15
movq 80(%rsp), %r10
movl 80(%rsp), %r10d
subq $0xa0, %rsp
pxor %xmm4, %xmm4
pxor %xmm6, %xmm6
@ -1902,11 +2009,21 @@ L_AES_GCM_encrypt_store_tag_done:
popq %r12
popq %r13
repz retq
#ifndef __APPLE__
.size AES_GCM_encrypt,.-AES_GCM_encrypt
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl AES_GCM_decrypt
.type AES_GCM_decrypt,@function
.align 4
AES_GCM_decrypt:
#else
.section __TEXT,__text
.globl _AES_GCM_decrypt
.p2align 2
_AES_GCM_decrypt:
#endif /* __APPLE__ */
pushq %r13
pushq %r12
pushq %rbx
@ -1915,11 +2032,11 @@ AES_GCM_decrypt:
pushq %rbp
movq %rdx, %r12
movq %rcx, %rax
movq 56(%rsp), %r11
movq 64(%rsp), %rbx
movq 72(%rsp), %r14
movl 56(%rsp), %r11d
movl 64(%rsp), %ebx
movl 72(%rsp), %r14d
movq 80(%rsp), %r15
movq 88(%rsp), %r10
movl 88(%rsp), %r10d
movq 96(%rsp), %rbp
subq $0xa8, %rsp
pxor %xmm4, %xmm4
@ -3298,45 +3415,154 @@ L_AES_GCM_decrypt_cmp_tag_done:
popq %r12
popq %r13
repz retq
#ifndef __APPLE__
.size AES_GCM_decrypt,.-AES_GCM_decrypt
#endif /* __APPLE__ */
#ifdef HAVE_INTEL_AVX1
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_one:
.quad 0x0, 0x1
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_two:
.quad 0x0, 0x2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_three:
.quad 0x0, 0x3
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_four:
.quad 0x0, 0x4
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_five:
.quad 0x0, 0x5
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_six:
.quad 0x0, 0x6
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_seven:
.quad 0x0, 0x7
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_eight:
.quad 0x0, 0x8
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_bswap_epi64:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_bswap_mask:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_aes_gcm_mod2_128:
.quad 0x1, 0xc200000000000000
#ifndef __APPLE__
.text
.globl AES_GCM_encrypt_avx1
.type AES_GCM_encrypt_avx1,@function
.align 4
AES_GCM_encrypt_avx1:
#else
.section __TEXT,__text
.globl _AES_GCM_encrypt_avx1
.p2align 2
_AES_GCM_encrypt_avx1:
#endif /* __APPLE__ */
pushq %r13
pushq %r12
pushq %rbx
@ -3344,11 +3570,11 @@ AES_GCM_encrypt_avx1:
pushq %r15
movq %rdx, %r12
movq %rcx, %rax
movq 48(%rsp), %r11
movq 56(%rsp), %rbx
movq 64(%rsp), %r14
movl 48(%rsp), %r11d
movl 56(%rsp), %ebx
movl 64(%rsp), %r14d
movq 72(%rsp), %r15
movq 80(%rsp), %r10
movl 80(%rsp), %r10d
subq $0xa0, %rsp
vpxor %xmm4, %xmm4, %xmm4
vpxor %xmm6, %xmm6, %xmm6
@ -4902,11 +5128,21 @@ L_AES_GCM_encrypt_avx1_store_tag_done:
popq %r12
popq %r13
repz retq
#ifndef __APPLE__
.size AES_GCM_encrypt_avx1,.-AES_GCM_encrypt_avx1
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl AES_GCM_decrypt_avx1
.type AES_GCM_decrypt_avx1,@function
.align 4
AES_GCM_decrypt_avx1:
#else
.section __TEXT,__text
.globl _AES_GCM_decrypt_avx1
.p2align 2
_AES_GCM_decrypt_avx1:
#endif /* __APPLE__ */
pushq %r13
pushq %r12
pushq %rbx
@ -4915,11 +5151,11 @@ AES_GCM_decrypt_avx1:
pushq %rbp
movq %rdx, %r12
movq %rcx, %rax
movq 56(%rsp), %r11
movq 64(%rsp), %rbx
movq 72(%rsp), %r14
movl 56(%rsp), %r11d
movl 64(%rsp), %ebx
movl 72(%rsp), %r14d
movq 80(%rsp), %r15
movq 88(%rsp), %r10
movl 88(%rsp), %r10d
movq 96(%rsp), %rbp
subq $0xa8, %rsp
vpxor %xmm4, %xmm4, %xmm4
@ -6066,49 +6302,167 @@ L_AES_GCM_decrypt_avx1_cmp_tag_done:
popq %r12
popq %r13
repz retq
#ifndef __APPLE__
.size AES_GCM_decrypt_avx1,.-AES_GCM_decrypt_avx1
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX1 */
#ifdef HAVE_INTEL_AVX2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_one:
.quad 0x0, 0x1
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_two:
.quad 0x0, 0x2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_three:
.quad 0x0, 0x3
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_four:
.quad 0x0, 0x4
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_five:
.quad 0x0, 0x5
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_six:
.quad 0x0, 0x6
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_seven:
.quad 0x0, 0x7
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_eight:
.quad 0x0, 0x8
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_bswap_one:
.quad 0x0, 0x100000000000000
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_bswap_epi64:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_bswap_mask:
.quad 0x8090a0b0c0d0e0f, 0x1020304050607
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_aes_gcm_mod2_128:
.quad 0x1, 0xc200000000000000
#ifndef __APPLE__
.text
.globl AES_GCM_encrypt_avx2
.type AES_GCM_encrypt_avx2,@function
.align 4
AES_GCM_encrypt_avx2:
#else
.section __TEXT,__text
.globl _AES_GCM_encrypt_avx2
.p2align 2
_AES_GCM_encrypt_avx2:
#endif /* __APPLE__ */
pushq %r13
pushq %r12
pushq %r15
@ -6118,12 +6472,12 @@ AES_GCM_encrypt_avx2:
movq %rcx, %rax
movq %r8, %r15
movq %rsi, %r8
movq %r9, %r10
movq 48(%rsp), %r11
movq 56(%rsp), %rbx
movq 64(%rsp), %r14
movl %r9d, %r10d
movl 48(%rsp), %r11d
movl 56(%rsp), %ebx
movl 64(%rsp), %r14d
movq 72(%rsp), %rsi
movq 80(%rsp), %r9
movl 80(%rsp), %r9d
subq $0xa0, %rsp
vpxor %xmm4, %xmm4, %xmm4
vpxor %xmm6, %xmm6, %xmm6
@ -7402,11 +7756,21 @@ L_AES_GCM_encrypt_avx2_store_tag_done:
popq %r12
popq %r13
repz retq
#ifndef __APPLE__
.size AES_GCM_encrypt_avx2,.-AES_GCM_encrypt_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl AES_GCM_decrypt_avx2
.type AES_GCM_decrypt_avx2,@function
.align 4
AES_GCM_decrypt_avx2:
#else
.section __TEXT,__text
.globl _AES_GCM_decrypt_avx2
.p2align 2
_AES_GCM_decrypt_avx2:
#endif /* __APPLE__ */
pushq %r13
pushq %r12
pushq %r14
@ -7417,12 +7781,12 @@ AES_GCM_decrypt_avx2:
movq %rcx, %rax
movq %r8, %r14
movq %rsi, %r8
movq %r9, %r10
movq 56(%rsp), %r11
movq 64(%rsp), %rbx
movq 72(%rsp), %r15
movl %r9d, %r10d
movl 56(%rsp), %r11d
movl 64(%rsp), %ebx
movl 72(%rsp), %r15d
movq 80(%rsp), %rsi
movq 88(%rsp), %r9
movl 88(%rsp), %r9d
movq 96(%rsp), %rbp
subq $0xa8, %rsp
vpxor %xmm4, %xmm4, %xmm4
@ -8363,5 +8727,7 @@ L_AES_GCM_decrypt_avx2_cmp_tag_done:
popq %r12
popq %r13
repz retq
#ifndef __APPLE__
.size AES_GCM_decrypt_avx2,.-AES_GCM_decrypt_avx2
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */

View File

@ -26,10 +26,18 @@
#define HAVE_INTEL_AVX2
#endif /* HAVE_INTEL_AVX2 */
#ifndef __APPLE__
.text
.globl chacha_encrypt_x64
.type chacha_encrypt_x64,@function
.align 4
chacha_encrypt_x64:
#else
.section __TEXT,__text
.globl _chacha_encrypt_x64
.p2align 2
_chacha_encrypt_x64:
#endif /* __APPLE__ */
pushq %rbx
pushq %rbp
pushq %r12
@ -440,24 +448,70 @@ L_chacha_x64_done:
popq %rbp
popq %rbx
repz retq
#ifndef __APPLE__
.size chacha_encrypt_x64,.-chacha_encrypt_x64
#endif /* __APPLE__ */
#ifdef HAVE_INTEL_AVX1
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_chacha20_avx1_rotl8:
.quad 0x605040702010003, 0xe0d0c0f0a09080b
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_chacha20_avx1_rotl16:
.quad 0x504070601000302, 0xd0c0f0e09080b0a
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_chacha20_avx1_add:
.quad 0x100000000, 0x300000002
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_chacha20_avx1_four:
.quad 0x400000004, 0x400000004
#ifndef __APPLE__
.text
.globl chacha_encrypt_avx1
.type chacha_encrypt_avx1,@function
.align 4
chacha_encrypt_avx1:
#else
.section __TEXT,__text
.globl _chacha_encrypt_avx1
.p2align 2
_chacha_encrypt_avx1:
#endif /* __APPLE__ */
subq $0x190, %rsp
movq %rsp, %r9
leaq 256(%rsp), %r10
@ -943,29 +997,75 @@ L_chacha20_avx1_partial_end64:
L_chacha20_avx1_partial_done:
addq $0x190, %rsp
repz retq
#ifndef __APPLE__
.size chacha_encrypt_avx1,.-chacha_encrypt_avx1
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX1 */
#ifdef HAVE_INTEL_AVX2
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_chacha20_avx2_rotl8:
.quad 0x605040702010003, 0xe0d0c0f0a09080b
.quad 0x605040702010003, 0xe0d0c0f0a09080b
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_chacha20_avx2_rotl16:
.quad 0x504070601000302, 0xd0c0f0e09080b0a
.quad 0x504070601000302, 0xd0c0f0e09080b0a
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_chacha20_avx2_add:
.quad 0x100000000, 0x300000002
.quad 0x500000004, 0x700000006
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_chacha20_avx2_eight:
.quad 0x800000008, 0x800000008
.quad 0x800000008, 0x800000008
#ifndef __APPLE__
.text
.globl chacha_encrypt_avx2
.type chacha_encrypt_avx2,@function
.align 4
chacha_encrypt_avx2:
#else
.section __TEXT,__text
.globl _chacha_encrypt_avx2
.p2align 2
_chacha_encrypt_avx2:
#endif /* __APPLE__ */
subq $0x310, %rsp
movq %rsp, %r9
leaq 512(%rsp), %r10
@ -1307,8 +1407,14 @@ L_chacha20_avx2_done256:
shl $3, %eax
addl %eax, 48(%rdi)
L_chacha20_avx2_end256:
#ifndef __APPLE__
callq chacha_encrypt_avx1@plt
#else
callq _chacha_encrypt_avx1
#endif /* __APPLE__ */
addq $0x310, %rsp
repz retq
#ifndef __APPLE__
.size chacha_encrypt_avx2,.-chacha_encrypt_avx2
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */

View File

@ -27,10 +27,18 @@
#endif /* HAVE_INTEL_AVX2 */
#ifdef HAVE_INTEL_AVX1
#ifndef __APPLE__
.text
.globl poly1305_setkey_avx
.type poly1305_setkey_avx,@function
.align 4
poly1305_setkey_avx:
#else
.section __TEXT,__text
.globl _poly1305_setkey_avx
.p2align 2
_poly1305_setkey_avx:
#endif /* __APPLE__ */
movabsq $0xffffffc0fffffff, %r10
movabsq $0xffffffc0ffffffc, %r11
movq (%rsi), %rdx
@ -76,11 +84,21 @@ poly1305_setkey_avx:
movq %r9, 608(%rdi)
movb $0x01, 616(%rdi)
repz retq
#ifndef __APPLE__
.size poly1305_setkey_avx,.-poly1305_setkey_avx
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl poly1305_block_avx
.type poly1305_block_avx,@function
.align 4
poly1305_block_avx:
#else
.section __TEXT,__text
.globl _poly1305_block_avx
.p2align 2
_poly1305_block_avx:
#endif /* __APPLE__ */
pushq %r15
pushq %rbx
pushq %r12
@ -150,11 +168,21 @@ poly1305_block_avx:
popq %rbx
popq %r15
repz retq
#ifndef __APPLE__
.size poly1305_block_avx,.-poly1305_block_avx
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl poly1305_blocks_avx
.type poly1305_blocks_avx,@function
.align 4
poly1305_blocks_avx:
#else
.section __TEXT,__text
.globl _poly1305_blocks_avx
.p2align 2
_poly1305_blocks_avx:
#endif /* __APPLE__ */
pushq %r15
pushq %rbx
pushq %r12
@ -228,11 +256,21 @@ L_poly1305_avx_blocks_start:
popq %rbx
popq %r15
repz retq
#ifndef __APPLE__
.size poly1305_blocks_avx,.-poly1305_blocks_avx
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl poly1305_final_avx
.type poly1305_final_avx,@function
.align 4
poly1305_final_avx:
#else
.section __TEXT,__text
.globl _poly1305_final_avx
.p2align 2
_poly1305_final_avx:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
movq %rsi, %rbx
@ -249,7 +287,11 @@ L_poly1305_avx_final_cmp_rem:
jl L_poly1305_avx_final_zero_rem
movb $0x00, 616(%rdi)
leaq 480(%rdi), %rsi
#ifndef __APPLE__
callq poly1305_block_avx@plt
#else
callq _poly1305_block_avx
#endif /* __APPLE__ */
L_poly1305_avx_final_no_more:
movq 24(%rdi), %rax
movq 32(%rdi), %rdx
@ -295,13 +337,23 @@ L_poly1305_avx_final_no_more:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size poly1305_final_avx,.-poly1305_final_avx
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX1 */
#ifdef HAVE_INTEL_AVX2
#ifndef __APPLE__
.text
.globl poly1305_calc_powers_avx2
.type poly1305_calc_powers_avx2,@function
.align 4
poly1305_calc_powers_avx2:
#else
.section __TEXT,__text
.globl _poly1305_calc_powers_avx2
.p2align 2
_poly1305_calc_powers_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
@ -554,12 +606,26 @@ poly1305_calc_powers_avx2:
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size poly1305_calc_powers_avx2,.-poly1305_calc_powers_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl poly1305_setkey_avx2
.type poly1305_setkey_avx2,@function
.align 4
poly1305_setkey_avx2:
#else
.section __TEXT,__text
.globl _poly1305_setkey_avx2
.p2align 2
_poly1305_setkey_avx2:
#endif /* __APPLE__ */
#ifndef __APPLE__
callq poly1305_setkey_avx@plt
#else
callq _poly1305_setkey_avx
#endif /* __APPLE__ */
vpxor %ymm0, %ymm0, %ymm0
vmovdqu %ymm0, 64(%rdi)
vmovdqu %ymm0, 96(%rdi)
@ -569,19 +635,47 @@ poly1305_setkey_avx2:
movq $0x00, 608(%rdi)
movw $0x00, 616(%rdi)
repz retq
#ifndef __APPLE__
.size poly1305_setkey_avx2,.-poly1305_setkey_avx2
.align 32
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_poly1305_avx2_blocks_mask:
.quad 0x3ffffff, 0x3ffffff
.quad 0x3ffffff, 0x3ffffff
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_poly1305_avx2_blocks_hibit:
.quad 0x1000000, 0x1000000
.quad 0x1000000, 0x1000000
#ifndef __APPLE__
.text
.globl poly1305_blocks_avx2
.type poly1305_blocks_avx2,@function
.align 4
poly1305_blocks_avx2:
#else
.section __TEXT,__text
.globl _poly1305_blocks_avx2
.p2align 2
_poly1305_blocks_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %rbx
subq $0x140, %rsp
@ -927,11 +1021,21 @@ L_poly1305_avx2_blocks_complete:
popq %rbx
popq %r12
repz retq
#ifndef __APPLE__
.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl poly1305_final_avx2
.type poly1305_final_avx2,@function
.align 4
poly1305_final_avx2:
#else
.section __TEXT,__text
.globl _poly1305_final_avx2
.p2align 2
_poly1305_final_avx2:
#endif /* __APPLE__ */
movb $0x01, 616(%rdi)
movb 617(%rdi), %cl
cmpb $0x00, %cl
@ -939,7 +1043,11 @@ poly1305_final_avx2:
pushq %rsi
movq $0x40, %rdx
xorq %rsi, %rsi
#ifndef __APPLE__
callq poly1305_blocks_avx2@plt
#else
callq _poly1305_blocks_avx2
#endif /* __APPLE__ */
popq %rsi
L_poly1305_avx2_final_done_blocks_X4:
movq 608(%rdi), %rax
@ -952,7 +1060,11 @@ L_poly1305_avx2_final_done_blocks_X4:
pushq %rsi
movq %rcx, %rdx
leaq 480(%rdi), %rsi
#ifndef __APPLE__
callq poly1305_blocks_avx@plt
#else
callq _poly1305_blocks_avx
#endif /* __APPLE__ */
popq %rsi
popq %rax
popq %rcx
@ -968,7 +1080,11 @@ L_poly1305_avx2_final_start_copy:
L_poly1305_avx2_final_cmp_copy:
cmp %rcx, %rax
jne L_poly1305_avx2_final_start_copy
#ifndef __APPLE__
callq poly1305_final_avx@plt
#else
callq _poly1305_final_avx
#endif /* __APPLE__ */
vpxor %ymm0, %ymm0, %ymm0
vmovdqu %ymm0, 64(%rdi)
vmovdqu %ymm0, 96(%rdi)
@ -982,5 +1098,7 @@ L_poly1305_avx2_final_cmp_copy:
movq $0x00, 608(%rdi)
movw $0x00, 616(%rdi)
repz retq
#ifndef __APPLE__
.size poly1305_final_avx2,.-poly1305_final_avx2
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */

View File

@ -27,6 +27,11 @@
#endif /* HAVE_INTEL_AVX2 */
#ifdef HAVE_INTEL_AVX1
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_avx1_sha256_k:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
@ -44,19 +49,54 @@ L_avx1_sha256_k:
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_sha256_shuf_00BA:
.quad 0xb0a090803020100, 0xffffffffffffffff
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_sha256_shuf_DC00:
.quad 0xffffffffffffffff, 0xb0a090803020100
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_sha256_flip_mask:
.quad 0x405060700010203, 0xc0d0e0f08090a0b
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX1
.type Transform_Sha256_AVX1,@function
.align 4
Transform_Sha256_AVX1:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX1
.p2align 2
_Transform_Sha256_AVX1:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -2432,11 +2472,21 @@ Transform_Sha256_AVX1:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX1,.-Transform_Sha256_AVX1
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX1_Len
.type Transform_Sha256_AVX1_Len,@function
.align 4
Transform_Sha256_AVX1_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX1_Len
.p2align 2
_Transform_Sha256_AVX1_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -4828,7 +4878,14 @@ L_sha256_len_avx1_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX1_Len,.-Transform_Sha256_AVX1_Len
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_avx1_rorx_sha256_k:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
@ -4846,19 +4903,54 @@ L_avx1_rorx_sha256_k:
.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_rorx_sha256_shuf_00BA:
.quad 0xb0a090803020100, 0xffffffffffffffff
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_rorx_sha256_shuf_DC00:
.quad 0xffffffffffffffff, 0xb0a090803020100
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_rorx_sha256_flip_mask:
.quad 0x405060700010203, 0xc0d0e0f08090a0b
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX1_RORX
.type Transform_Sha256_AVX1_RORX,@function
.align 4
Transform_Sha256_AVX1_RORX:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX1_RORX
.p2align 2
_Transform_Sha256_AVX1_RORX:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -7192,11 +7284,21 @@ Transform_Sha256_AVX1_RORX:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX1_RORX,.-Transform_Sha256_AVX1_RORX
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX1_RORX_Len
.type Transform_Sha256_AVX1_RORX_Len,@function
.align 4
Transform_Sha256_AVX1_RORX_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX1_RORX_Len
.p2align 2
_Transform_Sha256_AVX1_RORX_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -9547,9 +9649,16 @@ L_sha256_len_avx1_len_rorx_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX1_RORX_Len,.-Transform_Sha256_AVX1_RORX_Len
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX1 */
#ifdef HAVE_INTEL_AVX2
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_avx2_sha256_k:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -9583,22 +9692,57 @@ L_avx2_sha256_k:
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_sha256_shuf_00BA:
.quad 0xb0a090803020100, 0xffffffffffffffff
.quad 0xb0a090803020100, 0xffffffffffffffff
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_sha256_shuf_DC00:
.quad 0xffffffffffffffff, 0xb0a090803020100
.quad 0xffffffffffffffff, 0xb0a090803020100
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_sha256_flip_mask:
.quad 0x405060700010203, 0xc0d0e0f08090a0b
.quad 0x405060700010203, 0xc0d0e0f08090a0b
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX2
.type Transform_Sha256_AVX2,@function
.align 4
Transform_Sha256_AVX2:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX2
.p2align 2
_Transform_Sha256_AVX2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -11974,11 +12118,21 @@ Transform_Sha256_AVX2:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX2,.-Transform_Sha256_AVX2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX2_Len
.type Transform_Sha256_AVX2_Len,@function
.align 4
Transform_Sha256_AVX2_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX2_Len
.p2align 2
_Transform_Sha256_AVX2_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -11994,7 +12148,11 @@ Transform_Sha256_AVX2_Len:
vmovdqu 32(%rbp), %ymm1
vmovups %ymm0, 32(%rdi)
vmovups %ymm1, 64(%rdi)
#ifndef __APPLE__
call Transform_Sha256_AVX2@plt
#else
call _Transform_Sha256_AVX2
#endif /* __APPLE__ */
addq $0x40, %rbp
subl $0x40, %esi
jz L_sha256_len_avx2_done
@ -16089,7 +16247,14 @@ L_sha256_len_avx2_done:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX2_Len,.-Transform_Sha256_AVX2_Len
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_avx2_rorx_sha256_k:
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
@ -16123,22 +16288,57 @@ L_avx2_rorx_sha256_k:
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_rorx_sha256_flip_mask:
.quad 0x405060700010203, 0xc0d0e0f08090a0b
.quad 0x405060700010203, 0xc0d0e0f08090a0b
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_rorx_sha256_shuf_00BA:
.quad 0xb0a090803020100, 0xffffffffffffffff
.quad 0xb0a090803020100, 0xffffffffffffffff
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_rorx_sha256_shuf_DC00:
.quad 0xffffffffffffffff, 0xb0a090803020100
.quad 0xffffffffffffffff, 0xb0a090803020100
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX2_RORX
.type Transform_Sha256_AVX2_RORX,@function
.align 4
Transform_Sha256_AVX2_RORX:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX2_RORX
.p2align 2
_Transform_Sha256_AVX2_RORX:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -18489,11 +18689,21 @@ Transform_Sha256_AVX2_RORX:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX2_RORX,.-Transform_Sha256_AVX2_RORX
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha256_AVX2_RORX_Len
.type Transform_Sha256_AVX2_RORX_Len,@function
.align 4
Transform_Sha256_AVX2_RORX_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha256_AVX2_RORX_Len
.p2align 2
_Transform_Sha256_AVX2_RORX_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -18509,7 +18719,11 @@ Transform_Sha256_AVX2_RORX_Len:
vmovdqu 32(%rbp), %ymm1
vmovups %ymm0, 32(%rdi)
vmovups %ymm1, 64(%rdi)
#ifndef __APPLE__
call Transform_Sha256_AVX2_RORX@plt
#else
call _Transform_Sha256_AVX2_RORX
#endif /* __APPLE__ */
addq $0x40, %rbp
subl $0x40, %esi
jz L_sha256_len_avx2_rorx_done
@ -22433,5 +22647,7 @@ L_sha256_len_avx2_rorx_done:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha256_AVX2_RORX_Len,.-Transform_Sha256_AVX2_RORX_Len
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */

View File

@ -27,7 +27,16 @@
#endif /* HAVE_INTEL_AVX2 */
#ifdef HAVE_INTEL_AVX1
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_sha512_k:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@ -69,13 +78,30 @@ L_avx1_sha512_k:
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_sha512_flip_mask:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX1
.type Transform_Sha512_AVX1,@function
.align 4
Transform_Sha512_AVX1:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX1
.p2align 2
_Transform_Sha512_AVX1:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -1335,11 +1361,21 @@ L_sha256_len_avx1_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX1,.-Transform_Sha512_AVX1
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX1_Len
.type Transform_Sha512_AVX1_Len,@function
.align 4
Transform_Sha512_AVX1_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX1_Len
.p2align 2
_Transform_Sha512_AVX1_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -2618,8 +2654,19 @@ L_sha512_len_avx1_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX1_Len,.-Transform_Sha512_AVX1_Len
.align 16
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_rorx_sha512_k:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@ -2661,13 +2708,30 @@ L_avx1_rorx_sha512_k:
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx1_rorx_sha512_flip_mask:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX1_RORX
.type Transform_Sha512_AVX1_RORX,@function
.align 4
Transform_Sha512_AVX1_RORX:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX1_RORX
.p2align 2
_Transform_Sha512_AVX1_RORX:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -3856,11 +3920,21 @@ L_sha256_len_avx1_rorx_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX1_RORX,.-Transform_Sha512_AVX1_RORX
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX1_RORX_Len
.type Transform_Sha512_AVX1_RORX_Len,@function
.align 4
Transform_Sha512_AVX1_RORX_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX1_RORX_Len
.p2align 2
_Transform_Sha512_AVX1_RORX_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -5084,10 +5158,21 @@ L_sha512_len_avx1_rorx_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX1_RORX_Len,.-Transform_Sha512_AVX1_RORX_Len
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX1 */
#ifdef HAVE_INTEL_AVX2
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_sha512_k:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@ -5129,7 +5214,16 @@ L_avx2_sha512_k:
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_sha512_k_2:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
@ -5211,17 +5305,43 @@ L_avx2_sha512_k_2:
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.align 8
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 8
#else
.p2align 3
#endif /* __APPLE__ */
L_avx2_sha512_k_2_end:
.quad 1024+L_avx2_sha512_k_2
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_sha512_flip_mask:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX2
.type Transform_Sha512_AVX2,@function
.align 4
Transform_Sha512_AVX2:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX2
.p2align 2
_Transform_Sha512_AVX2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -6269,11 +6389,21 @@ L_sha256_avx2_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX2,.-Transform_Sha512_AVX2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX2_Len
.type Transform_Sha512_AVX2_Len,@function
.align 4
Transform_Sha512_AVX2_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX2_Len
.p2align 2
_Transform_Sha512_AVX2_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -6292,7 +6422,11 @@ Transform_Sha512_AVX2_Len:
vmovups %ymm1, 96(%rdi)
vmovups %ymm2, 128(%rdi)
vmovups %ymm3, 160(%rdi)
#ifndef __APPLE__
call Transform_Sha512_AVX2@plt
#else
call _Transform_Sha512_AVX2
#endif /* __APPLE__ */
addq $0x80, 224(%rdi)
subl $0x80, %ebp
jz L_sha512_len_avx2_done
@ -7881,8 +8015,19 @@ L_sha512_len_avx2_done:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX2_Len,.-Transform_Sha512_AVX2_Len
.align 16
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_rorx_sha512_k:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
@ -7924,7 +8069,16 @@ L_avx2_rorx_sha512_k:
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.align 16
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 16
#else
.p2align 4
#endif /* __APPLE__ */
L_avx2_rorx_sha512_k_2:
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
@ -8006,17 +8160,43 @@ L_avx2_rorx_sha512_k_2:
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
.align 8
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 8
#else
.p2align 3
#endif /* __APPLE__ */
L_avx2_rorx_sha512_k_2_end:
.quad 1024+L_avx2_rorx_sha512_k_2
.align 32
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_avx2_rorx_sha512_flip_mask:
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
.quad 0x1020304050607, 0x8090a0b0c0d0e0f
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX2_RORX
.type Transform_Sha512_AVX2_RORX,@function
.align 4
Transform_Sha512_AVX2_RORX:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX2_RORX
.p2align 2
_Transform_Sha512_AVX2_RORX:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -9006,11 +9186,21 @@ L_sha256_len_avx2_rorx_start:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX2_RORX,.-Transform_Sha512_AVX2_RORX
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl Transform_Sha512_AVX2_RORX_Len
.type Transform_Sha512_AVX2_RORX_Len,@function
.align 4
Transform_Sha512_AVX2_RORX_Len:
#else
.section __TEXT,__text
.globl _Transform_Sha512_AVX2_RORX_Len
.p2align 2
_Transform_Sha512_AVX2_RORX_Len:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
@ -9029,7 +9219,11 @@ Transform_Sha512_AVX2_RORX_Len:
vmovups %ymm1, 96(%rdi)
vmovups %ymm2, 128(%rdi)
vmovups %ymm3, 160(%rdi)
#ifndef __APPLE__
call Transform_Sha512_AVX2_RORX@plt
#else
call _Transform_Sha512_AVX2_RORX
#endif /* __APPLE__ */
pop %rsi
addq $0x80, 224(%rdi)
subl $0x80, %esi
@ -10541,5 +10735,7 @@ L_sha512_len_avx2_rorx_done:
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size Transform_Sha512_AVX2_RORX_Len,.-Transform_Sha512_AVX2_RORX_Len
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */

File diff suppressed because it is too large Load Diff