Merge pull request #5643 from SparkiDev/gen_arm32_fixup

ARM32 assembly code: fixed scripts
This commit is contained in:
David Garske
2022-09-29 10:20:06 -07:00
committed by GitHub
7 changed files with 18804 additions and 1266 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -108,24 +108,81 @@ Transform_Sha256_Len:
sub sp, sp, #0xc0 sub sp, sp, #0xc0
adr r3, L_SHA256_transform_len_k adr r3, L_SHA256_transform_len_k
# Copy digest to add in at end # Copy digest to add in at end
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r0] ldr r12, [r0]
ldr lr, [r0, #4] ldr lr, [r0, #4]
ldrd r4, r5, [r0, #8] #else
ldrd r6, r7, [r0, #16] ldrd r12, lr, [r0]
ldrd r8, r9, [r0, #24] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r0, #16]
ldr r7, [r0, #20]
#else
ldrd r6, r7, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r0, #24]
ldr r9, [r0, #28]
#else
ldrd r8, r9, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #64] str r12, [sp, #64]
str lr, [sp, #68] str lr, [sp, #68]
strd r4, r5, [sp, #72] #else
strd r6, r7, [sp, #80] strd r12, lr, [sp, #64]
strd r8, r9, [sp, #88] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #72]
str r5, [sp, #76]
#else
strd r4, r5, [sp, #72]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [sp, #80]
str r7, [sp, #84]
#else
strd r6, r7, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [sp, #88]
str r9, [sp, #92]
#else
strd r8, r9, [sp, #88]
#endif
# Start of loop processing a block # Start of loop processing a block
L_SHA256_transform_len_begin: L_SHA256_transform_len_begin:
# Load, Reverse and Store W - 64 bytes # Load, Reverse and Store W - 64 bytes
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r1] ldr r12, [r1]
ldr lr, [r1, #4] ldr lr, [r1, #4]
ldrd r4, r5, [r1, #8] #else
ldrd r6, r7, [r1, #16] ldrd r12, lr, [r1]
ldrd r8, r9, [r1, #24] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r1, #8]
ldr r5, [r1, #12]
#else
ldrd r4, r5, [r1, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r1, #16]
ldr r7, [r1, #20]
#else
ldrd r6, r7, [r1, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r1, #24]
ldr r9, [r1, #28]
#else
ldrd r8, r9, [r1, #24]
#endif
rev r12, r12 rev r12, r12
rev lr, lr rev lr, lr
rev r4, r4 rev r4, r4
@ -134,16 +191,54 @@ L_SHA256_transform_len_begin:
rev r7, r7 rev r7, r7
rev r8, r8 rev r8, r8
rev r9, r9 rev r9, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp] str r12, [sp]
str lr, [sp, #4] str lr, [sp, #4]
strd r4, r5, [sp, #8] #else
strd r6, r7, [sp, #16] strd r12, lr, [sp]
strd r8, r9, [sp, #24] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #8]
str r5, [sp, #12]
#else
strd r4, r5, [sp, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [sp, #16]
str r7, [sp, #20]
#else
strd r6, r7, [sp, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [sp, #24]
str r9, [sp, #28]
#else
strd r8, r9, [sp, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r1, #32] ldr r12, [r1, #32]
ldr lr, [r1, #36] ldr lr, [r1, #36]
ldrd r4, r5, [r1, #40] #else
ldrd r6, r7, [r1, #48] ldrd r12, lr, [r1, #32]
ldrd r8, r9, [r1, #56] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r1, #40]
ldr r5, [r1, #44]
#else
ldrd r4, r5, [r1, #40]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r1, #48]
ldr r7, [r1, #52]
#else
ldrd r6, r7, [r1, #48]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r1, #56]
ldr r9, [r1, #60]
#else
ldrd r8, r9, [r1, #56]
#endif
rev r12, r12 rev r12, r12
rev lr, lr rev lr, lr
rev r4, r4 rev r4, r4
@ -152,11 +247,30 @@ L_SHA256_transform_len_begin:
rev r7, r7 rev r7, r7
rev r8, r8 rev r8, r8
rev r9, r9 rev r9, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #32] str r12, [sp, #32]
str lr, [sp, #36] str lr, [sp, #36]
strd r4, r5, [sp, #40] #else
strd r6, r7, [sp, #48] strd r12, lr, [sp, #32]
strd r8, r9, [sp, #56] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #40]
str r5, [sp, #44]
#else
strd r4, r5, [sp, #40]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [sp, #48]
str r7, [sp, #52]
#else
strd r6, r7, [sp, #48]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [sp, #56]
str r9, [sp, #60]
#else
strd r8, r9, [sp, #56]
#endif
ldr r9, [r0, #4] ldr r9, [r0, #4]
ldr r12, [r0, #8] ldr r12, [r0, #8]
eor r9, r9, r12 eor r9, r9, r12
@ -1431,36 +1545,110 @@ L_SHA256_transform_len_start:
str r6, [r0, #16] str r6, [r0, #16]
str r7, [r0] str r7, [r0]
# Add in digest from start # Add in digest from start
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r0] ldr r12, [r0]
ldr lr, [r0, #4] ldr lr, [r0, #4]
ldrd r4, r5, [r0, #8] #else
ldrd r6, r7, [sp, #64] ldrd r12, lr, [r0]
ldrd r8, r9, [sp, #72] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [sp, #64]
ldr r7, [sp, #68]
#else
ldrd r6, r7, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [sp, #72]
ldr r9, [sp, #76]
#else
ldrd r8, r9, [sp, #72]
#endif
add r12, r12, r6 add r12, r12, r6
add lr, lr, r7 add lr, lr, r7
add r4, r4, r8 add r4, r4, r8
add r5, r5, r9 add r5, r5, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [r0] str r12, [r0]
str lr, [r0, #4] str lr, [r0, #4]
strd r4, r5, [r0, #8] #else
strd r12, lr, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [r0, #8]
str r5, [r0, #12]
#else
strd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #64] str r12, [sp, #64]
str lr, [sp, #68] str lr, [sp, #68]
strd r4, r5, [sp, #72] #else
strd r12, lr, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #72]
str r5, [sp, #76]
#else
strd r4, r5, [sp, #72]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r0, #16] ldr r12, [r0, #16]
ldr lr, [r0, #20] ldr lr, [r0, #20]
ldrd r4, r5, [r0, #24] #else
ldrd r6, r7, [sp, #80] ldrd r12, lr, [r0, #16]
ldrd r8, r9, [sp, #88] #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #24]
ldr r5, [r0, #28]
#else
ldrd r4, r5, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [sp, #80]
ldr r7, [sp, #84]
#else
ldrd r6, r7, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [sp, #88]
ldr r9, [sp, #92]
#else
ldrd r8, r9, [sp, #88]
#endif
add r12, r12, r6 add r12, r12, r6
add lr, lr, r7 add lr, lr, r7
add r4, r4, r8 add r4, r4, r8
add r5, r5, r9 add r5, r5, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [r0, #16] str r12, [r0, #16]
str lr, [r0, #20] str lr, [r0, #20]
strd r4, r5, [r0, #24] #else
strd r12, lr, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [r0, #24]
str r5, [r0, #28]
#else
strd r4, r5, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #80] str r12, [sp, #80]
str lr, [sp, #84] str lr, [sp, #84]
strd r4, r5, [sp, #88] #else
strd r12, lr, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #88]
str r5, [sp, #92]
#else
strd r4, r5, [sp, #88]
#endif
subs r2, r2, #0x40 subs r2, r2, #0x40
sub r3, r3, #0xc0 sub r3, r3, #0xc0
add r1, r1, #0x40 add r1, r1, #0x40
@ -1548,14 +1736,39 @@ Transform_Sha256_Len:
push {r4, r5, r6, r7, r8, r9, r10, lr} push {r4, r5, r6, r7, r8, r9, r10, lr}
vpush {d8-d11} vpush {d8-d11}
sub sp, sp, #24 sub sp, sp, #24
strd r0, r1, [sp] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r0, [sp]
str r1, [sp, #4]
#else
strd r0, r1, [sp]
#endif
str r2, [sp, #8] str r2, [sp, #8]
adr r12, L_SHA256_transform_neon_len_k adr r12, L_SHA256_transform_neon_len_k
# Load digest into registers # Load digest into registers
ldrd r2, r3, [r0] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldrd r4, r5, [r0, #8] ldr r2, [r0]
ldrd r6, r7, [r0, #16] ldr r3, [r0, #4]
ldrd r8, r9, [r0, #24] #else
ldrd r2, r3, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r0, #16]
ldr r7, [r0, #20]
#else
ldrd r6, r7, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r0, #24]
ldr r9, [r0, #28]
#else
ldrd r8, r9, [r0, #24]
#endif
# Start of loop processing a block # Start of loop processing a block
L_SHA256_transform_neon_len_begin: L_SHA256_transform_neon_len_begin:
# Load W # Load W
@ -2481,22 +2694,62 @@ L_SHA256_transform_neon_len_start:
add r2, r2, r1 add r2, r2, r1
ldr r10, [sp] ldr r10, [sp]
# Add in digest from start # Add in digest from start
ldrd r0, r1, [r10] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10]
ldr r1, [r10, #4]
#else
ldrd r0, r1, [r10]
#endif
add r2, r2, r0 add r2, r2, r0
add r3, r3, r1 add r3, r3, r1
strd r2, r3, [r10] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldrd r0, r1, [r10, #8] str r2, [r10]
str r3, [r10, #4]
#else
strd r2, r3, [r10]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10, #8]
ldr r1, [r10, #12]
#else
ldrd r0, r1, [r10, #8]
#endif
add r4, r4, r0 add r4, r4, r0
add r5, r5, r1 add r5, r5, r1
strd r4, r5, [r10, #8] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldrd r0, r1, [r10, #16] str r4, [r10, #8]
str r5, [r10, #12]
#else
strd r4, r5, [r10, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10, #16]
ldr r1, [r10, #20]
#else
ldrd r0, r1, [r10, #16]
#endif
add r6, r6, r0 add r6, r6, r0
add r7, r7, r1 add r7, r7, r1
strd r6, r7, [r10, #16] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldrd r0, r1, [r10, #24] str r6, [r10, #16]
str r7, [r10, #20]
#else
strd r6, r7, [r10, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10, #24]
ldr r1, [r10, #28]
#else
ldrd r0, r1, [r10, #24]
#endif
add r8, r8, r0 add r8, r8, r0
add r9, r9, r1 add r9, r9, r1
strd r8, r9, [r10, #24] #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [r10, #24]
str r9, [r10, #28]
#else
strd r8, r9, [r10, #24]
#endif
ldr r10, [sp, #8] ldr r10, [sp, #8]
ldr r1, [sp, #4] ldr r1, [sp, #4]
subs r10, r10, #0x40 subs r10, r10, #0x40

View File

@ -115,22 +115,82 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"sub sp, sp, #0xc0\n\t" "sub sp, sp, #0xc0\n\t"
"mov r3, %[L_SHA256_transform_len_k]\n\t" "mov r3, %[L_SHA256_transform_len_k]\n\t"
/* Copy digest to add in at end */ /* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[sha256]]\n\t"
"ldr lr, [%[sha256], #4]\n\t"
#else
"ldrd r12, lr, [%[sha256]]\n\t" "ldrd r12, lr, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t" "ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[sha256], #16]\n\t"
"ldr r7, [%[sha256], #20]\n\t"
#else
"ldrd r6, r7, [%[sha256], #16]\n\t" "ldrd r6, r7, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[sha256], #24]\n\t"
"ldr r9, [%[sha256], #28]\n\t"
#else
"ldrd r8, r9, [%[sha256], #24]\n\t" "ldrd r8, r9, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #64]\n\t"
"str lr, [sp, #68]\n\t"
#else
"strd r12, lr, [sp, #64]\n\t" "strd r12, lr, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #72]\n\t"
"str r5, [sp, #76]\n\t"
#else
"strd r4, r5, [sp, #72]\n\t" "strd r4, r5, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [sp, #80]\n\t"
"str r7, [sp, #84]\n\t"
#else
"strd r6, r7, [sp, #80]\n\t" "strd r6, r7, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [sp, #88]\n\t"
"str r9, [sp, #92]\n\t"
#else
"strd r8, r9, [sp, #88]\n\t" "strd r8, r9, [sp, #88]\n\t"
#endif
/* Start of loop processing a block */ /* Start of loop processing a block */
"\n" "\n"
"L_SHA256_transform_len_begin_%=: \n\t" "L_SHA256_transform_len_begin_%=: \n\t"
/* Load, Reverse and Store W - 64 bytes */ /* Load, Reverse and Store W - 64 bytes */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[data]]\n\t"
"ldr lr, [%[data], #4]\n\t"
#else
"ldrd r12, lr, [%[data]]\n\t" "ldrd r12, lr, [%[data]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[data], #8]\n\t"
"ldr r5, [%[data], #12]\n\t"
#else
"ldrd r4, r5, [%[data], #8]\n\t" "ldrd r4, r5, [%[data], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[data], #16]\n\t"
"ldr r7, [%[data], #20]\n\t"
#else
"ldrd r6, r7, [%[data], #16]\n\t" "ldrd r6, r7, [%[data], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[data], #24]\n\t"
"ldr r9, [%[data], #28]\n\t"
#else
"ldrd r8, r9, [%[data], #24]\n\t" "ldrd r8, r9, [%[data], #24]\n\t"
#endif
"rev r12, r12\n\t" "rev r12, r12\n\t"
"rev lr, lr\n\t" "rev lr, lr\n\t"
"rev r4, r4\n\t" "rev r4, r4\n\t"
@ -139,14 +199,54 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r7, r7\n\t" "rev r7, r7\n\t"
"rev r8, r8\n\t" "rev r8, r8\n\t"
"rev r9, r9\n\t" "rev r9, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp]\n\t"
"str lr, [sp, #4]\n\t"
#else
"strd r12, lr, [sp]\n\t" "strd r12, lr, [sp]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #8]\n\t"
"str r5, [sp, #12]\n\t"
#else
"strd r4, r5, [sp, #8]\n\t" "strd r4, r5, [sp, #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [sp, #16]\n\t"
"str r7, [sp, #20]\n\t"
#else
"strd r6, r7, [sp, #16]\n\t" "strd r6, r7, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [sp, #24]\n\t"
"str r9, [sp, #28]\n\t"
#else
"strd r8, r9, [sp, #24]\n\t" "strd r8, r9, [sp, #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[data], #32]\n\t"
"ldr lr, [%[data], #36]\n\t"
#else
"ldrd r12, lr, [%[data], #32]\n\t" "ldrd r12, lr, [%[data], #32]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[data], #40]\n\t"
"ldr r5, [%[data], #44]\n\t"
#else
"ldrd r4, r5, [%[data], #40]\n\t" "ldrd r4, r5, [%[data], #40]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[data], #48]\n\t"
"ldr r7, [%[data], #52]\n\t"
#else
"ldrd r6, r7, [%[data], #48]\n\t" "ldrd r6, r7, [%[data], #48]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[data], #56]\n\t"
"ldr r9, [%[data], #60]\n\t"
#else
"ldrd r8, r9, [%[data], #56]\n\t" "ldrd r8, r9, [%[data], #56]\n\t"
#endif
"rev r12, r12\n\t" "rev r12, r12\n\t"
"rev lr, lr\n\t" "rev lr, lr\n\t"
"rev r4, r4\n\t" "rev r4, r4\n\t"
@ -155,10 +255,30 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r7, r7\n\t" "rev r7, r7\n\t"
"rev r8, r8\n\t" "rev r8, r8\n\t"
"rev r9, r9\n\t" "rev r9, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #32]\n\t"
"str lr, [sp, #36]\n\t"
#else
"strd r12, lr, [sp, #32]\n\t" "strd r12, lr, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #40]\n\t"
"str r5, [sp, #44]\n\t"
#else
"strd r4, r5, [sp, #40]\n\t" "strd r4, r5, [sp, #40]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [sp, #48]\n\t"
"str r7, [sp, #52]\n\t"
#else
"strd r6, r7, [sp, #48]\n\t" "strd r6, r7, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [sp, #56]\n\t"
"str r9, [sp, #60]\n\t"
#else
"strd r8, r9, [sp, #56]\n\t" "strd r8, r9, [sp, #56]\n\t"
#endif
"ldr r9, [%[sha256], #4]\n\t" "ldr r9, [%[sha256], #4]\n\t"
"ldr r12, [%[sha256], #8]\n\t" "ldr r12, [%[sha256], #8]\n\t"
"eor r9, r9, r12\n\t" "eor r9, r9, r12\n\t"
@ -1434,30 +1554,110 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str r6, [%[sha256], #16]\n\t" "str r6, [%[sha256], #16]\n\t"
"str r7, [%[sha256]]\n\t" "str r7, [%[sha256]]\n\t"
/* Add in digest from start */ /* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[sha256]]\n\t"
"ldr lr, [%[sha256], #4]\n\t"
#else
"ldrd r12, lr, [%[sha256]]\n\t" "ldrd r12, lr, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t" "ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [sp, #64]\n\t"
"ldr r7, [sp, #68]\n\t"
#else
"ldrd r6, r7, [sp, #64]\n\t" "ldrd r6, r7, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [sp, #72]\n\t"
"ldr r9, [sp, #76]\n\t"
#else
"ldrd r8, r9, [sp, #72]\n\t" "ldrd r8, r9, [sp, #72]\n\t"
#endif
"add r12, r12, r6\n\t" "add r12, r12, r6\n\t"
"add lr, lr, r7\n\t" "add lr, lr, r7\n\t"
"add r4, r4, r8\n\t" "add r4, r4, r8\n\t"
"add r5, r5, r9\n\t" "add r5, r5, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [%[sha256]]\n\t"
"str lr, [%[sha256], #4]\n\t"
#else
"strd r12, lr, [%[sha256]]\n\t" "strd r12, lr, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [%[sha256], #8]\n\t"
"str r5, [%[sha256], #12]\n\t"
#else
"strd r4, r5, [%[sha256], #8]\n\t" "strd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #64]\n\t"
"str lr, [sp, #68]\n\t"
#else
"strd r12, lr, [sp, #64]\n\t" "strd r12, lr, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #72]\n\t"
"str r5, [sp, #76]\n\t"
#else
"strd r4, r5, [sp, #72]\n\t" "strd r4, r5, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[sha256], #16]\n\t"
"ldr lr, [%[sha256], #20]\n\t"
#else
"ldrd r12, lr, [%[sha256], #16]\n\t" "ldrd r12, lr, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #24]\n\t"
"ldr r5, [%[sha256], #28]\n\t"
#else
"ldrd r4, r5, [%[sha256], #24]\n\t" "ldrd r4, r5, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [sp, #80]\n\t"
"ldr r7, [sp, #84]\n\t"
#else
"ldrd r6, r7, [sp, #80]\n\t" "ldrd r6, r7, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [sp, #88]\n\t"
"ldr r9, [sp, #92]\n\t"
#else
"ldrd r8, r9, [sp, #88]\n\t" "ldrd r8, r9, [sp, #88]\n\t"
#endif
"add r12, r12, r6\n\t" "add r12, r12, r6\n\t"
"add lr, lr, r7\n\t" "add lr, lr, r7\n\t"
"add r4, r4, r8\n\t" "add r4, r4, r8\n\t"
"add r5, r5, r9\n\t" "add r5, r5, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [%[sha256], #16]\n\t"
"str lr, [%[sha256], #20]\n\t"
#else
"strd r12, lr, [%[sha256], #16]\n\t" "strd r12, lr, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [%[sha256], #24]\n\t"
"str r5, [%[sha256], #28]\n\t"
#else
"strd r4, r5, [%[sha256], #24]\n\t" "strd r4, r5, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #80]\n\t"
"str lr, [sp, #84]\n\t"
#else
"strd r12, lr, [sp, #80]\n\t" "strd r12, lr, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #88]\n\t"
"str r5, [sp, #92]\n\t"
#else
"strd r4, r5, [sp, #88]\n\t" "strd r4, r5, [sp, #88]\n\t"
#endif
"subs %[len], %[len], #0x40\n\t" "subs %[len], %[len], #0x40\n\t"
"sub r3, r3, #0xc0\n\t" "sub r3, r3, #0xc0\n\t"
"add %[data], %[data], #0x40\n\t" "add %[data], %[data], #0x40\n\t"
@ -1549,14 +1749,39 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
__asm__ __volatile__ ( __asm__ __volatile__ (
"sub sp, sp, #24\n\t" "sub sp, sp, #24\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str %[sha256], [sp]\n\t"
"str %[data], [sp, #4]\n\t"
#else
"strd %[sha256], %[data], [sp]\n\t" "strd %[sha256], %[data], [sp]\n\t"
#endif
"str %[len], [sp, #8]\n\t" "str %[len], [sp, #8]\n\t"
"mov r12, %[L_SHA256_transform_neon_len_k]\n\t" "mov r12, %[L_SHA256_transform_neon_len_k]\n\t"
/* Load digest into registers */ /* Load digest into registers */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[len], [%[sha256]]\n\t"
"ldr r3, [%[sha256], #4]\n\t"
#else
"ldrd %[len], r3, [%[sha256]]\n\t" "ldrd %[len], r3, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t" "ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[sha256], #16]\n\t"
"ldr r7, [%[sha256], #20]\n\t"
#else
"ldrd r6, r7, [%[sha256], #16]\n\t" "ldrd r6, r7, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[sha256], #24]\n\t"
"ldr r9, [%[sha256], #28]\n\t"
#else
"ldrd r8, r9, [%[sha256], #24]\n\t" "ldrd r8, r9, [%[sha256], #24]\n\t"
#endif
/* Start of loop processing a block */ /* Start of loop processing a block */
"\n" "\n"
"L_SHA256_transform_neon_len_begin_%=: \n\t" "L_SHA256_transform_neon_len_begin_%=: \n\t"
@ -2484,22 +2709,62 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add %[len], %[len], %[data]\n\t" "add %[len], %[len], %[data]\n\t"
"ldr r10, [sp]\n\t" "ldr r10, [sp]\n\t"
/* Add in digest from start */ /* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10]\n\t"
"ldr %[data], [r10, #4]\n\t"
#else
"ldrd %[sha256], %[data], [r10]\n\t" "ldrd %[sha256], %[data], [r10]\n\t"
#endif
"add %[len], %[len], %[sha256]\n\t" "add %[len], %[len], %[sha256]\n\t"
"add r3, r3, %[data]\n\t" "add r3, r3, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str %[len], [r10]\n\t"
"str r3, [r10, #4]\n\t"
#else
"strd %[len], r3, [r10]\n\t" "strd %[len], r3, [r10]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10, #8]\n\t"
"ldr %[data], [r10, #12]\n\t"
#else
"ldrd %[sha256], %[data], [r10, #8]\n\t" "ldrd %[sha256], %[data], [r10, #8]\n\t"
#endif
"add r4, r4, %[sha256]\n\t" "add r4, r4, %[sha256]\n\t"
"add r5, r5, %[data]\n\t" "add r5, r5, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [r10, #8]\n\t"
"str r5, [r10, #12]\n\t"
#else
"strd r4, r5, [r10, #8]\n\t" "strd r4, r5, [r10, #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10, #16]\n\t"
"ldr %[data], [r10, #20]\n\t"
#else
"ldrd %[sha256], %[data], [r10, #16]\n\t" "ldrd %[sha256], %[data], [r10, #16]\n\t"
#endif
"add r6, r6, %[sha256]\n\t" "add r6, r6, %[sha256]\n\t"
"add r7, r7, %[data]\n\t" "add r7, r7, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [r10, #16]\n\t"
"str r7, [r10, #20]\n\t"
#else
"strd r6, r7, [r10, #16]\n\t" "strd r6, r7, [r10, #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10, #24]\n\t"
"ldr %[data], [r10, #28]\n\t"
#else
"ldrd %[sha256], %[data], [r10, #24]\n\t" "ldrd %[sha256], %[data], [r10, #24]\n\t"
#endif
"add r8, r8, %[sha256]\n\t" "add r8, r8, %[sha256]\n\t"
"add r9, r9, %[data]\n\t" "add r9, r9, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [r10, #24]\n\t"
"str r9, [r10, #28]\n\t"
#else
"strd r8, r9, [r10, #24]\n\t" "strd r8, r9, [r10, #24]\n\t"
#endif
"ldr r10, [sp, #8]\n\t" "ldr r10, [sp, #8]\n\t"
"ldr %[data], [sp, #4]\n\t" "ldr %[data], [sp, #4]\n\t"
"subs r10, r10, #0x40\n\t" "subs r10, r10, #0x40\n\t"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff