mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-08-01 03:34:39 +02:00
ARM32 ASM: vrev not always available
Provide alternative assembly instructions to vrev when WOLFSSL_ARM_ARCH_NO_VREV is defined.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
/* armv8-32-sha256-asm
|
||||
*
|
||||
* Copyright (C) 2006-2021 wolfSSL Inc.
|
||||
* Copyright (C) 2006-2022 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
@@ -1559,10 +1559,29 @@ Transform_Sha256_Len:
|
||||
L_SHA256_transform_neon_len_begin:
|
||||
# Load W
|
||||
vldm.32 r1!, {d0-d7}
|
||||
#ifndef WOLFSSL_ARM_ARCH_NO_VREV
|
||||
vrev32.8 q0, q0
|
||||
vrev32.8 q1, q1
|
||||
vrev32.8 q2, q2
|
||||
vrev32.8 q3, q3
|
||||
#else
|
||||
vshl.i16 q4, q0, #8
|
||||
vshl.i16 q5, q1, #8
|
||||
vsri.i16 q4, q0, #8
|
||||
vsri.i16 q5, q1, #8
|
||||
vshl.i32 q0, q4, #16
|
||||
vshl.i32 q1, q5, #16
|
||||
vsri.i32 q0, q4, #16
|
||||
vsri.i32 q1, q5, #16
|
||||
vshl.i16 q4, q2, #8
|
||||
vshl.i16 q5, q3, #8
|
||||
vsri.i16 q4, q2, #8
|
||||
vsri.i16 q5, q3, #8
|
||||
vshl.i32 q2, q4, #16
|
||||
vshl.i32 q3, q5, #16
|
||||
vsri.i32 q2, q4, #16
|
||||
vsri.i32 q3, q5, #16
|
||||
#endif /* WOLFSSL_ARM_ARCH_NO_VREV */
|
||||
str r1, [sp, #4]
|
||||
mov lr, #3
|
||||
# Start of 16 rounds
|
||||
|
@@ -1,6 +1,6 @@
|
||||
/* armv8-32-sha256-asm
|
||||
*
|
||||
* Copyright (C) 2006-2021 wolfSSL Inc.
|
||||
* Copyright (C) 2006-2022 wolfSSL Inc.
|
||||
*
|
||||
* This file is part of wolfSSL.
|
||||
*
|
||||
@@ -1554,10 +1554,29 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
|
||||
"L_SHA256_transform_neon_len_begin_%=: \n\t"
|
||||
/* Load W */
|
||||
"vldm.32 %[data]!, {d0-d7}\n\t"
|
||||
#ifndef WOLFSSL_ARM_ARCH_NO_VREV
|
||||
"vrev32.8 q0, q0\n\t"
|
||||
"vrev32.8 q1, q1\n\t"
|
||||
"vrev32.8 q2, q2\n\t"
|
||||
"vrev32.8 q3, q3\n\t"
|
||||
#else
|
||||
"vshl.i16 q4, q0, #8\n\t"
|
||||
"vshl.i16 q5, q1, #8\n\t"
|
||||
"vsri.i16 q4, q0, #8\n\t"
|
||||
"vsri.i16 q5, q1, #8\n\t"
|
||||
"vshl.i32 q0, q4, #16\n\t"
|
||||
"vshl.i32 q1, q5, #16\n\t"
|
||||
"vsri.i32 q0, q4, #16\n\t"
|
||||
"vsri.i32 q1, q5, #16\n\t"
|
||||
"vshl.i16 q4, q2, #8\n\t"
|
||||
"vshl.i16 q5, q3, #8\n\t"
|
||||
"vsri.i16 q4, q2, #8\n\t"
|
||||
"vsri.i16 q5, q3, #8\n\t"
|
||||
"vshl.i32 q2, q4, #16\n\t"
|
||||
"vshl.i32 q3, q5, #16\n\t"
|
||||
"vsri.i32 q2, q4, #16\n\t"
|
||||
"vsri.i32 q3, q5, #16\n\t"
|
||||
#endif /* WOLFSSL_ARM_ARCH_NO_VREV */
|
||||
"str %[data], [sp, #4]\n\t"
|
||||
"mov lr, #3\n\t"
|
||||
/* Start of 16 rounds */
|
||||
|
@@ -4219,6 +4219,7 @@ Transform_Sha512_Len:
|
||||
L_SHA512_transform_neon_len_begin:
|
||||
# Load W
|
||||
vldm.64 r1!, {d16-d31}
|
||||
#ifndef WOLFSSL_ARM_ARCH_NO_VREV
|
||||
vrev64.8 q8, q8
|
||||
vrev64.8 q9, q9
|
||||
vrev64.8 q10, q10
|
||||
@@ -4227,6 +4228,56 @@ L_SHA512_transform_neon_len_begin:
|
||||
vrev64.8 q13, q13
|
||||
vrev64.8 q14, q14
|
||||
vrev64.8 q15, q15
|
||||
#else
|
||||
vshl.i16 q4, q8, #8
|
||||
vshl.i16 q5, q9, #8
|
||||
vsri.i16 q4, q8, #8
|
||||
vsri.i16 q5, q9, #8
|
||||
vshl.i32 q6, q4, #16
|
||||
vshl.i32 q7, q5, #16
|
||||
vsri.i32 q6, q4, #16
|
||||
vsri.i32 q7, q5, #16
|
||||
vshl.i64 q8, q6, #32
|
||||
vshl.i64 q9, q7, #32
|
||||
vsri.i64 q8, q6, #32
|
||||
vsri.i64 q9, q7, #32
|
||||
vshl.i16 q4, q10, #8
|
||||
vshl.i16 q5, q11, #8
|
||||
vsri.i16 q4, q10, #8
|
||||
vsri.i16 q5, q11, #8
|
||||
vshl.i32 q6, q4, #16
|
||||
vshl.i32 q7, q5, #16
|
||||
vsri.i32 q6, q4, #16
|
||||
vsri.i32 q7, q5, #16
|
||||
vshl.i64 q10, q6, #32
|
||||
vshl.i64 q11, q7, #32
|
||||
vsri.i64 q10, q6, #32
|
||||
vsri.i64 q11, q7, #32
|
||||
vshl.i16 q4, q12, #8
|
||||
vshl.i16 q5, q13, #8
|
||||
vsri.i16 q4, q12, #8
|
||||
vsri.i16 q5, q13, #8
|
||||
vshl.i32 q6, q4, #16
|
||||
vshl.i32 q7, q5, #16
|
||||
vsri.i32 q6, q4, #16
|
||||
vsri.i32 q7, q5, #16
|
||||
vshl.i64 q12, q6, #32
|
||||
vshl.i64 q13, q7, #32
|
||||
vsri.i64 q12, q6, #32
|
||||
vsri.i64 q13, q7, #32
|
||||
vshl.i16 q4, q14, #8
|
||||
vshl.i16 q5, q15, #8
|
||||
vsri.i16 q4, q14, #8
|
||||
vsri.i16 q5, q15, #8
|
||||
vshl.i32 q6, q4, #16
|
||||
vshl.i32 q7, q5, #16
|
||||
vsri.i32 q6, q4, #16
|
||||
vsri.i32 q7, q5, #16
|
||||
vshl.i64 q14, q6, #32
|
||||
vshl.i64 q15, q7, #32
|
||||
vsri.i64 q14, q6, #32
|
||||
vsri.i64 q15, q7, #32
|
||||
#endif /* WOLFSSL_ARM_ARCH_NO_VREV */
|
||||
adr r3, L_SHA512_transform_neon_len_k
|
||||
mov r12, #4
|
||||
# Start of 16 rounds
|
||||
|
@@ -3662,6 +3662,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
"L_SHA512_transform_neon_len_begin_%=: \n\t"
|
||||
/* Load W */
|
||||
"vldm.64 %[data]!, {d16-d31}\n\t"
|
||||
#ifndef WOLFSSL_ARM_ARCH_NO_VREV
|
||||
"vrev64.8 q8, q8\n\t"
|
||||
"vrev64.8 q9, q9\n\t"
|
||||
"vrev64.8 q10, q10\n\t"
|
||||
@@ -3670,6 +3671,56 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
"vrev64.8 q13, q13\n\t"
|
||||
"vrev64.8 q14, q14\n\t"
|
||||
"vrev64.8 q15, q15\n\t"
|
||||
#else
|
||||
"vshl.i16 q4, q8, #8\n\t"
|
||||
"vshl.i16 q5, q9, #8\n\t"
|
||||
"vsri.i16 q4, q8, #8\n\t"
|
||||
"vsri.i16 q5, q9, #8\n\t"
|
||||
"vshl.i32 q6, q4, #16\n\t"
|
||||
"vshl.i32 q7, q5, #16\n\t"
|
||||
"vsri.i32 q6, q4, #16\n\t"
|
||||
"vsri.i32 q7, q5, #16\n\t"
|
||||
"vshl.i64 q8, q6, #32\n\t"
|
||||
"vshl.i64 q9, q7, #32\n\t"
|
||||
"vsri.i64 q8, q6, #32\n\t"
|
||||
"vsri.i64 q9, q7, #32\n\t"
|
||||
"vshl.i16 q4, q10, #8\n\t"
|
||||
"vshl.i16 q5, q11, #8\n\t"
|
||||
"vsri.i16 q4, q10, #8\n\t"
|
||||
"vsri.i16 q5, q11, #8\n\t"
|
||||
"vshl.i32 q6, q4, #16\n\t"
|
||||
"vshl.i32 q7, q5, #16\n\t"
|
||||
"vsri.i32 q6, q4, #16\n\t"
|
||||
"vsri.i32 q7, q5, #16\n\t"
|
||||
"vshl.i64 q10, q6, #32\n\t"
|
||||
"vshl.i64 q11, q7, #32\n\t"
|
||||
"vsri.i64 q10, q6, #32\n\t"
|
||||
"vsri.i64 q11, q7, #32\n\t"
|
||||
"vshl.i16 q4, q12, #8\n\t"
|
||||
"vshl.i16 q5, q13, #8\n\t"
|
||||
"vsri.i16 q4, q12, #8\n\t"
|
||||
"vsri.i16 q5, q13, #8\n\t"
|
||||
"vshl.i32 q6, q4, #16\n\t"
|
||||
"vshl.i32 q7, q5, #16\n\t"
|
||||
"vsri.i32 q6, q4, #16\n\t"
|
||||
"vsri.i32 q7, q5, #16\n\t"
|
||||
"vshl.i64 q12, q6, #32\n\t"
|
||||
"vshl.i64 q13, q7, #32\n\t"
|
||||
"vsri.i64 q12, q6, #32\n\t"
|
||||
"vsri.i64 q13, q7, #32\n\t"
|
||||
"vshl.i16 q4, q14, #8\n\t"
|
||||
"vshl.i16 q5, q15, #8\n\t"
|
||||
"vsri.i16 q4, q14, #8\n\t"
|
||||
"vsri.i16 q5, q15, #8\n\t"
|
||||
"vshl.i32 q6, q4, #16\n\t"
|
||||
"vshl.i32 q7, q5, #16\n\t"
|
||||
"vsri.i32 q6, q4, #16\n\t"
|
||||
"vsri.i32 q7, q5, #16\n\t"
|
||||
"vshl.i64 q14, q6, #32\n\t"
|
||||
"vshl.i64 q15, q7, #32\n\t"
|
||||
"vsri.i64 q14, q6, #32\n\t"
|
||||
"vsri.i64 q15, q7, #32\n\t"
|
||||
#endif /* WOLFSSL_ARM_ARCH_NO_VREV */
|
||||
"mov r3, %[L_SHA512_transform_neon_len_k]\n\t"
|
||||
"mov r12, #4\n\t"
|
||||
/* Start of 16 rounds */
|
||||
|
Reference in New Issue
Block a user