ARM ASM: add fpu directive to assembly files

This commit is contained in:
Sean Parkinson
2022-09-21 11:57:01 +10:00
parent 2578f2c8f2
commit 1b9656f72d
4 changed files with 14 additions and 4 deletions

View File

@ -1541,6 +1541,7 @@ L_SHA256_transform_neon_len_k:
.word 0xc67178f2 .word 0xc67178f2
.text .text
.align 2 .align 2
.fpu neon
.globl Transform_Sha256_Len .globl Transform_Sha256_Len
.type Transform_Sha256_Len, %function .type Transform_Sha256_Len, %function
Transform_Sha256_Len: Transform_Sha256_Len:
@ -1558,7 +1559,8 @@ Transform_Sha256_Len:
# Start of loop processing a block # Start of loop processing a block
L_SHA256_transform_neon_len_begin: L_SHA256_transform_neon_len_begin:
# Load W # Load W
vldm.32 r1!, {d0-d7} vld1.8 {d0-d3}, [r1]!
vld1.8 {d4-d7}, [r1]!
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
vrev32.8 q0, q0 vrev32.8 q0, q0
vrev32.8 q1, q1 vrev32.8 q1, q1

View File

@ -1561,7 +1561,8 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"\n" "\n"
"L_SHA256_transform_neon_len_begin_%=: \n\t" "L_SHA256_transform_neon_len_begin_%=: \n\t"
/* Load W */ /* Load W */
"vldm.32 %[data]!, {d0-d7}\n\t" "vld1.8 {d0-d3}, [%[data]]!\n\t"
"vld1.8 {d4-d7}, [%[data]]!\n\t"
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
"vrev32.8 q0, q0\n\t" "vrev32.8 q0, q0\n\t"
"vrev32.8 q1, q1\n\t" "vrev32.8 q1, q1\n\t"

View File

@ -4209,6 +4209,7 @@ L_SHA512_transform_neon_len_k:
.word 0x6c44198c .word 0x6c44198c
.text .text
.align 2 .align 2
.fpu neon
.globl Transform_Sha512_Len .globl Transform_Sha512_Len
.type Transform_Sha512_Len, %function .type Transform_Sha512_Len, %function
Transform_Sha512_Len: Transform_Sha512_Len:
@ -4218,7 +4219,10 @@ Transform_Sha512_Len:
# Start of loop processing a block # Start of loop processing a block
L_SHA512_transform_neon_len_begin: L_SHA512_transform_neon_len_begin:
# Load W # Load W
vldm.64 r1!, {d16-d31} vld1.8 {q8, q9}, [r1]!
vld1.8 {q10, q11}, [r1]!
vld1.8 {q12, q13}, [r1]!
vld1.8 {q14, q15}, [r1]!
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
vrev64.8 q8, q8 vrev64.8 q8, q8
vrev64.8 q9, q9 vrev64.8 q9, q9

View File

@ -3669,7 +3669,10 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"\n" "\n"
"L_SHA512_transform_neon_len_begin_%=: \n\t" "L_SHA512_transform_neon_len_begin_%=: \n\t"
/* Load W */ /* Load W */
"vldm.64 %[data]!, {d16-d31}\n\t" "vld1.8 {q8-q9}, [%[data]]!\n\t"
"vld1.8 {q10-q11}, [%[data]]!\n\t"
"vld1.8 {q12-q13}, [%[data]]!\n\t"
"vld1.8 {q14-q15}, [%[data]]!\n\t"
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
"vrev64.8 q8, q8\n\t" "vrev64.8 q8, q8\n\t"
"vrev64.8 q9, q9\n\t" "vrev64.8 q9, q9\n\t"