ARM ASM: add fpu directive to assembly files

This commit is contained in:
Sean Parkinson
2022-09-21 11:57:01 +10:00
parent 2578f2c8f2
commit 1b9656f72d
4 changed files with 14 additions and 4 deletions

View File

@ -1541,6 +1541,7 @@ L_SHA256_transform_neon_len_k:
.word 0xc67178f2
.text
.align 2
.fpu neon
.globl Transform_Sha256_Len
.type Transform_Sha256_Len, %function
Transform_Sha256_Len:
@ -1558,7 +1559,8 @@ Transform_Sha256_Len:
# Start of loop processing a block
L_SHA256_transform_neon_len_begin:
# Load W
vldm.32 r1!, {d0-d7}
vld1.8 {d0-d3}, [r1]!
vld1.8 {d4-d7}, [r1]!
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
vrev32.8 q0, q0
vrev32.8 q1, q1

View File

@ -1561,7 +1561,8 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"\n"
"L_SHA256_transform_neon_len_begin_%=: \n\t"
/* Load W */
"vldm.32 %[data]!, {d0-d7}\n\t"
"vld1.8 {d0-d3}, [%[data]]!\n\t"
"vld1.8 {d4-d7}, [%[data]]!\n\t"
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
"vrev32.8 q0, q0\n\t"
"vrev32.8 q1, q1\n\t"

View File

@ -4209,6 +4209,7 @@ L_SHA512_transform_neon_len_k:
.word 0x6c44198c
.text
.align 2
.fpu neon
.globl Transform_Sha512_Len
.type Transform_Sha512_Len, %function
Transform_Sha512_Len:
@ -4218,7 +4219,10 @@ Transform_Sha512_Len:
# Start of loop processing a block
L_SHA512_transform_neon_len_begin:
# Load W
vldm.64 r1!, {d16-d31}
vld1.8 {q8, q9}, [r1]!
vld1.8 {q10, q11}, [r1]!
vld1.8 {q12, q13}, [r1]!
vld1.8 {q14, q15}, [r1]!
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
vrev64.8 q8, q8
vrev64.8 q9, q9

View File

@ -3669,7 +3669,10 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"\n"
"L_SHA512_transform_neon_len_begin_%=: \n\t"
/* Load W */
"vldm.64 %[data]!, {d16-d31}\n\t"
"vld1.8 {q8-q9}, [%[data]]!\n\t"
"vld1.8 {q10-q11}, [%[data]]!\n\t"
"vld1.8 {q12-q13}, [%[data]]!\n\t"
"vld1.8 {q14-q15}, [%[data]]!\n\t"
#ifndef WOLFSSL_ARM_ARCH_NEON_64BIT
"vrev64.8 q8, q8\n\t"
"vrev64.8 q9, q9\n\t"