mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-07-29 18:27:29 +02:00
ARM64 assembly - x18 not able to be used
Fix Curve25519/Ed25519, SHA-512 and SP code to not use x18.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -23,8 +23,6 @@
|
||||
* cd ../scripts
|
||||
* ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S
|
||||
*/
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#ifdef __aarch64__
|
||||
.text
|
||||
.section .rodata
|
||||
@ -127,16 +125,16 @@ Transform_Sha512_Len:
|
||||
stp x29, x30, [sp, #-128]!
|
||||
add x29, sp, #0
|
||||
str x17, [x29, #16]
|
||||
stp x18, x19, [x29, #24]
|
||||
stp x20, x21, [x29, #40]
|
||||
stp x22, x23, [x29, #56]
|
||||
stp x24, x25, [x29, #72]
|
||||
str x26, [x29, #88]
|
||||
str x19, [x29, #24]
|
||||
stp x20, x21, [x29, #32]
|
||||
stp x22, x23, [x29, #48]
|
||||
stp x24, x25, [x29, #64]
|
||||
stp x26, x27, [x29, #80]
|
||||
stp d8, d9, [x29, #96]
|
||||
stp d10, d11, [x29, #112]
|
||||
adr x3, L_SHA512_transform_neon_len_k
|
||||
adr x26, L_SHA512_transform_neon_len_ror8
|
||||
ld1 {v11.16b}, [x26]
|
||||
adr x27, L_SHA512_transform_neon_len_ror8
|
||||
ld1 {v11.16b}, [x27]
|
||||
# Load digest into working vars
|
||||
ldp x4, x5, [x0]
|
||||
ldp x6, x7, [x0, #16]
|
||||
@ -147,26 +145,26 @@ L_sha512_len_neon_begin:
|
||||
# Load W
|
||||
# Copy digest to add in at end
|
||||
ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x1], #0x40
|
||||
mov x18, x4
|
||||
mov x19, x4
|
||||
ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x1], #0x40
|
||||
mov x19, x5
|
||||
mov x20, x5
|
||||
rev64 v0.16b, v0.16b
|
||||
mov x20, x6
|
||||
mov x21, x6
|
||||
rev64 v1.16b, v1.16b
|
||||
mov x21, x7
|
||||
mov x22, x7
|
||||
rev64 v2.16b, v2.16b
|
||||
mov x22, x8
|
||||
mov x23, x8
|
||||
rev64 v3.16b, v3.16b
|
||||
mov x23, x9
|
||||
mov x24, x9
|
||||
rev64 v4.16b, v4.16b
|
||||
mov x24, x10
|
||||
mov x25, x10
|
||||
rev64 v5.16b, v5.16b
|
||||
mov x25, x11
|
||||
mov x26, x11
|
||||
rev64 v6.16b, v6.16b
|
||||
rev64 v7.16b, v7.16b
|
||||
# Pre-calc: b ^ c
|
||||
eor x16, x5, x6
|
||||
mov x26, #4
|
||||
mov x27, #4
|
||||
# Start of 16 rounds
|
||||
L_sha512_len_neon_start:
|
||||
# Round 0
|
||||
@ -665,7 +663,7 @@ L_sha512_len_neon_start:
|
||||
add v7.2d, v7.2d, v9.2d
|
||||
add x8, x8, x4
|
||||
add x4, x4, x14
|
||||
subs x26, x26, #1
|
||||
subs x27, x27, #1
|
||||
bne L_sha512_len_neon_start
|
||||
# Round 0
|
||||
mov x13, v0.d[0]
|
||||
@ -1019,14 +1017,14 @@ L_sha512_len_neon_start:
|
||||
add x14, x14, x17
|
||||
add x8, x8, x4
|
||||
add x4, x4, x14
|
||||
add x11, x11, x25
|
||||
add x10, x10, x24
|
||||
add x9, x9, x23
|
||||
add x8, x8, x22
|
||||
add x7, x7, x21
|
||||
add x6, x6, x20
|
||||
add x5, x5, x19
|
||||
add x4, x4, x18
|
||||
add x11, x11, x26
|
||||
add x10, x10, x25
|
||||
add x9, x9, x24
|
||||
add x8, x8, x23
|
||||
add x7, x7, x22
|
||||
add x6, x6, x21
|
||||
add x5, x5, x20
|
||||
add x4, x4, x19
|
||||
adr x3, L_SHA512_transform_neon_len_k
|
||||
subs w2, w2, #0x80
|
||||
bne L_sha512_len_neon_begin
|
||||
@ -1035,15 +1033,14 @@ L_sha512_len_neon_start:
|
||||
stp x8, x9, [x0, #32]
|
||||
stp x10, x11, [x0, #48]
|
||||
ldr x17, [x29, #16]
|
||||
ldp x18, x19, [x29, #24]
|
||||
ldp x20, x21, [x29, #40]
|
||||
ldp x22, x23, [x29, #56]
|
||||
ldp x24, x25, [x29, #72]
|
||||
ldr x26, [x29, #88]
|
||||
ldr x19, [x29, #24]
|
||||
ldp x20, x21, [x29, #32]
|
||||
ldp x22, x23, [x29, #48]
|
||||
ldp x24, x25, [x29, #64]
|
||||
ldp x26, x27, [x29, #80]
|
||||
ldp d8, d9, [x29, #96]
|
||||
ldp d10, d11, [x29, #112]
|
||||
ldp x29, x30, [sp], #0x80
|
||||
ret
|
||||
.size Transform_Sha512_Len,.-Transform_Sha512_Len
|
||||
#endif /* __aarch64__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
@ -24,16 +24,7 @@
|
||||
* ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
|
||||
*/
|
||||
#ifdef __aarch64__
|
||||
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
|
||||
#include <wolfssl/wolfcrypt/sha512.h>
|
||||
|
||||
static const uint64_t L_SHA512_transform_neon_len_k[] = {
|
||||
@ -130,8 +121,8 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
"stp x29, x30, [sp, #-16]!\n\t"
|
||||
"add x29, sp, #0\n\t"
|
||||
"adr x3, %[L_SHA512_transform_neon_len_k]\n\t"
|
||||
"adr x26, %[L_SHA512_transform_neon_len_ror8]\n\t"
|
||||
"ld1 {v11.16b}, [x26]\n\t"
|
||||
"adr x27, %[L_SHA512_transform_neon_len_ror8]\n\t"
|
||||
"ld1 {v11.16b}, [x27]\n\t"
|
||||
/* Load digest into working vars */
|
||||
"ldp x4, x5, [%x[sha512]]\n\t"
|
||||
"ldp x6, x7, [%x[sha512], #16]\n\t"
|
||||
@ -143,26 +134,26 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
/* Load W */
|
||||
/* Copy digest to add in at end */
|
||||
"ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t"
|
||||
"mov x18, x4\n\t"
|
||||
"mov x19, x4\n\t"
|
||||
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t"
|
||||
"mov x19, x5\n\t"
|
||||
"mov x20, x5\n\t"
|
||||
"rev64 v0.16b, v0.16b\n\t"
|
||||
"mov x20, x6\n\t"
|
||||
"mov x21, x6\n\t"
|
||||
"rev64 v1.16b, v1.16b\n\t"
|
||||
"mov x21, x7\n\t"
|
||||
"mov x22, x7\n\t"
|
||||
"rev64 v2.16b, v2.16b\n\t"
|
||||
"mov x22, x8\n\t"
|
||||
"mov x23, x8\n\t"
|
||||
"rev64 v3.16b, v3.16b\n\t"
|
||||
"mov x23, x9\n\t"
|
||||
"mov x24, x9\n\t"
|
||||
"rev64 v4.16b, v4.16b\n\t"
|
||||
"mov x24, x10\n\t"
|
||||
"mov x25, x10\n\t"
|
||||
"rev64 v5.16b, v5.16b\n\t"
|
||||
"mov x25, x11\n\t"
|
||||
"mov x26, x11\n\t"
|
||||
"rev64 v6.16b, v6.16b\n\t"
|
||||
"rev64 v7.16b, v7.16b\n\t"
|
||||
/* Pre-calc: b ^ c */
|
||||
"eor x16, x5, x6\n\t"
|
||||
"mov x26, #4\n\t"
|
||||
"mov x27, #4\n\t"
|
||||
/* Start of 16 rounds */
|
||||
"\n"
|
||||
"L_sha512_len_neon_start_%=: \n\t"
|
||||
@ -662,7 +653,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
"add v7.2d, v7.2d, v9.2d\n\t"
|
||||
"add x8, x8, x4\n\t"
|
||||
"add x4, x4, x14\n\t"
|
||||
"subs x26, x26, #1\n\t"
|
||||
"subs x27, x27, #1\n\t"
|
||||
"bne L_sha512_len_neon_start_%=\n\t"
|
||||
/* Round 0 */
|
||||
"mov x13, v0.d[0]\n\t"
|
||||
@ -1016,14 +1007,14 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
"add x14, x14, x17\n\t"
|
||||
"add x8, x8, x4\n\t"
|
||||
"add x4, x4, x14\n\t"
|
||||
"add x11, x11, x25\n\t"
|
||||
"add x10, x10, x24\n\t"
|
||||
"add x9, x9, x23\n\t"
|
||||
"add x8, x8, x22\n\t"
|
||||
"add x7, x7, x21\n\t"
|
||||
"add x6, x6, x20\n\t"
|
||||
"add x5, x5, x19\n\t"
|
||||
"add x4, x4, x18\n\t"
|
||||
"add x11, x11, x26\n\t"
|
||||
"add x10, x10, x25\n\t"
|
||||
"add x9, x9, x24\n\t"
|
||||
"add x8, x8, x23\n\t"
|
||||
"add x7, x7, x22\n\t"
|
||||
"add x6, x6, x21\n\t"
|
||||
"add x5, x5, x20\n\t"
|
||||
"add x4, x4, x19\n\t"
|
||||
"adr x3, %[L_SHA512_transform_neon_len_k]\n\t"
|
||||
"subs %w[len], %w[len], #0x80\n\t"
|
||||
"bne L_sha512_len_neon_begin_%=\n\t"
|
||||
@ -1034,9 +1025,8 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
"ldp x29, x30, [sp], #16\n\t"
|
||||
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
|
||||
: [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8)
|
||||
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11"
|
||||
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* __aarch64__ */
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user