diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S index e705558a9..6814bdd4d 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S @@ -1541,6 +1541,7 @@ L_SHA256_transform_neon_len_k: .word 0xc67178f2 .text .align 2 + .fpu neon .globl Transform_Sha256_Len .type Transform_Sha256_Len, %function Transform_Sha256_Len: @@ -1558,7 +1559,8 @@ Transform_Sha256_Len: # Start of loop processing a block L_SHA256_transform_neon_len_begin: # Load W - vldm.32 r1!, {d0-d7} + vld1.8 {d0-d3}, [r1]! + vld1.8 {d4-d7}, [r1]! #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT vrev32.8 q0, q0 vrev32.8 q1, q1 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 22b1331fa..e211af660 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -1561,7 +1561,8 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "\n" "L_SHA256_transform_neon_len_begin_%=: \n\t" /* Load W */ - "vldm.32 %[data]!, {d0-d7}\n\t" + "vld1.8 {d0-d3}, [%[data]]!\n\t" + "vld1.8 {d4-d7}, [%[data]]!\n\t" #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT "vrev32.8 q0, q0\n\t" "vrev32.8 q1, q1\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index 7d4dcdc26..0dd0c6b36 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -4209,6 +4209,7 @@ L_SHA512_transform_neon_len_k: .word 0x6c44198c .text .align 2 + .fpu neon .globl Transform_Sha512_Len .type Transform_Sha512_Len, %function Transform_Sha512_Len: @@ -4218,7 +4219,10 @@ Transform_Sha512_Len: # Start of loop processing a block L_SHA512_transform_neon_len_begin: # Load W - vldm.64 r1!, {d16-d31} + vld1.8 {q8, q9}, [r1]! + vld1.8 {q10, q11}, [r1]! + vld1.8 {q12, q13}, [r1]! + vld1.8 {q14, q15}, [r1]! #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT vrev64.8 q8, q8 vrev64.8 q9, q9 diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index f08e72796..ef1b2b1d4 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -3669,7 +3669,10 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "\n" "L_SHA512_transform_neon_len_begin_%=: \n\t" /* Load W */ - "vldm.64 %[data]!, {d16-d31}\n\t" + "vld1.8 {q8-q9}, [%[data]]!\n\t" + "vld1.8 {q10-q11}, [%[data]]!\n\t" + "vld1.8 {q12-q13}, [%[data]]!\n\t" + "vld1.8 {q14-q15}, [%[data]]!\n\t" #ifndef WOLFSSL_ARM_ARCH_NEON_64BIT "vrev64.8 q8, q8\n\t" "vrev64.8 q9, q9\n\t"