Merge pull request #5643 from SparkiDev/gen_arm32_fixup

ARM32 assembly code: fixed scripts
This commit is contained in:
David Garske
2022-09-29 10:20:06 -07:00
committed by GitHub
7 changed files with 18804 additions and 1266 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -108,24 +108,81 @@ Transform_Sha256_Len:
sub sp, sp, #0xc0
adr r3, L_SHA256_transform_len_k
# Copy digest to add in at end
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r0]
ldr lr, [r0, #4]
ldrd r4, r5, [r0, #8]
ldrd r6, r7, [r0, #16]
ldrd r8, r9, [r0, #24]
#else
ldrd r12, lr, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r0, #16]
ldr r7, [r0, #20]
#else
ldrd r6, r7, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r0, #24]
ldr r9, [r0, #28]
#else
ldrd r8, r9, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #64]
str lr, [sp, #68]
strd r4, r5, [sp, #72]
strd r6, r7, [sp, #80]
strd r8, r9, [sp, #88]
#else
strd r12, lr, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #72]
str r5, [sp, #76]
#else
strd r4, r5, [sp, #72]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [sp, #80]
str r7, [sp, #84]
#else
strd r6, r7, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [sp, #88]
str r9, [sp, #92]
#else
strd r8, r9, [sp, #88]
#endif
# Start of loop processing a block
L_SHA256_transform_len_begin:
# Load, Reverse and Store W - 64 bytes
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r1]
ldr lr, [r1, #4]
ldrd r4, r5, [r1, #8]
ldrd r6, r7, [r1, #16]
ldrd r8, r9, [r1, #24]
#else
ldrd r12, lr, [r1]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r1, #8]
ldr r5, [r1, #12]
#else
ldrd r4, r5, [r1, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r1, #16]
ldr r7, [r1, #20]
#else
ldrd r6, r7, [r1, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r1, #24]
ldr r9, [r1, #28]
#else
ldrd r8, r9, [r1, #24]
#endif
rev r12, r12
rev lr, lr
rev r4, r4
@ -134,16 +191,54 @@ L_SHA256_transform_len_begin:
rev r7, r7
rev r8, r8
rev r9, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp]
str lr, [sp, #4]
strd r4, r5, [sp, #8]
strd r6, r7, [sp, #16]
strd r8, r9, [sp, #24]
#else
strd r12, lr, [sp]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #8]
str r5, [sp, #12]
#else
strd r4, r5, [sp, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [sp, #16]
str r7, [sp, #20]
#else
strd r6, r7, [sp, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [sp, #24]
str r9, [sp, #28]
#else
strd r8, r9, [sp, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r1, #32]
ldr lr, [r1, #36]
ldrd r4, r5, [r1, #40]
ldrd r6, r7, [r1, #48]
ldrd r8, r9, [r1, #56]
#else
ldrd r12, lr, [r1, #32]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r1, #40]
ldr r5, [r1, #44]
#else
ldrd r4, r5, [r1, #40]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r1, #48]
ldr r7, [r1, #52]
#else
ldrd r6, r7, [r1, #48]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r1, #56]
ldr r9, [r1, #60]
#else
ldrd r8, r9, [r1, #56]
#endif
rev r12, r12
rev lr, lr
rev r4, r4
@ -152,11 +247,30 @@ L_SHA256_transform_len_begin:
rev r7, r7
rev r8, r8
rev r9, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #32]
str lr, [sp, #36]
strd r4, r5, [sp, #40]
strd r6, r7, [sp, #48]
strd r8, r9, [sp, #56]
#else
strd r12, lr, [sp, #32]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #40]
str r5, [sp, #44]
#else
strd r4, r5, [sp, #40]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [sp, #48]
str r7, [sp, #52]
#else
strd r6, r7, [sp, #48]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [sp, #56]
str r9, [sp, #60]
#else
strd r8, r9, [sp, #56]
#endif
ldr r9, [r0, #4]
ldr r12, [r0, #8]
eor r9, r9, r12
@ -1431,36 +1545,110 @@ L_SHA256_transform_len_start:
str r6, [r0, #16]
str r7, [r0]
# Add in digest from start
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r0]
ldr lr, [r0, #4]
ldrd r4, r5, [r0, #8]
ldrd r6, r7, [sp, #64]
ldrd r8, r9, [sp, #72]
#else
ldrd r12, lr, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [sp, #64]
ldr r7, [sp, #68]
#else
ldrd r6, r7, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [sp, #72]
ldr r9, [sp, #76]
#else
ldrd r8, r9, [sp, #72]
#endif
add r12, r12, r6
add lr, lr, r7
add r4, r4, r8
add r5, r5, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [r0]
str lr, [r0, #4]
strd r4, r5, [r0, #8]
#else
strd r12, lr, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [r0, #8]
str r5, [r0, #12]
#else
strd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #64]
str lr, [sp, #68]
strd r4, r5, [sp, #72]
#else
strd r12, lr, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #72]
str r5, [sp, #76]
#else
strd r4, r5, [sp, #72]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r12, [r0, #16]
ldr lr, [r0, #20]
ldrd r4, r5, [r0, #24]
ldrd r6, r7, [sp, #80]
ldrd r8, r9, [sp, #88]
#else
ldrd r12, lr, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #24]
ldr r5, [r0, #28]
#else
ldrd r4, r5, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [sp, #80]
ldr r7, [sp, #84]
#else
ldrd r6, r7, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [sp, #88]
ldr r9, [sp, #92]
#else
ldrd r8, r9, [sp, #88]
#endif
add r12, r12, r6
add lr, lr, r7
add r4, r4, r8
add r5, r5, r9
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [r0, #16]
str lr, [r0, #20]
strd r4, r5, [r0, #24]
#else
strd r12, lr, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [r0, #24]
str r5, [r0, #28]
#else
strd r4, r5, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r12, [sp, #80]
str lr, [sp, #84]
strd r4, r5, [sp, #88]
#else
strd r12, lr, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [sp, #88]
str r5, [sp, #92]
#else
strd r4, r5, [sp, #88]
#endif
subs r2, r2, #0x40
sub r3, r3, #0xc0
add r1, r1, #0x40
@ -1548,14 +1736,39 @@ Transform_Sha256_Len:
push {r4, r5, r6, r7, r8, r9, r10, lr}
vpush {d8-d11}
sub sp, sp, #24
strd r0, r1, [sp]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r0, [sp]
str r1, [sp, #4]
#else
strd r0, r1, [sp]
#endif
str r2, [sp, #8]
adr r12, L_SHA256_transform_neon_len_k
# Load digest into registers
ldrd r2, r3, [r0]
ldrd r4, r5, [r0, #8]
ldrd r6, r7, [r0, #16]
ldrd r8, r9, [r0, #24]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r2, [r0]
ldr r3, [r0, #4]
#else
ldrd r2, r3, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r6, [r0, #16]
ldr r7, [r0, #20]
#else
ldrd r6, r7, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r8, [r0, #24]
ldr r9, [r0, #28]
#else
ldrd r8, r9, [r0, #24]
#endif
# Start of loop processing a block
L_SHA256_transform_neon_len_begin:
# Load W
@ -2481,22 +2694,62 @@ L_SHA256_transform_neon_len_start:
add r2, r2, r1
ldr r10, [sp]
# Add in digest from start
ldrd r0, r1, [r10]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10]
ldr r1, [r10, #4]
#else
ldrd r0, r1, [r10]
#endif
add r2, r2, r0
add r3, r3, r1
strd r2, r3, [r10]
ldrd r0, r1, [r10, #8]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r2, [r10]
str r3, [r10, #4]
#else
strd r2, r3, [r10]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10, #8]
ldr r1, [r10, #12]
#else
ldrd r0, r1, [r10, #8]
#endif
add r4, r4, r0
add r5, r5, r1
strd r4, r5, [r10, #8]
ldrd r0, r1, [r10, #16]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r4, [r10, #8]
str r5, [r10, #12]
#else
strd r4, r5, [r10, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10, #16]
ldr r1, [r10, #20]
#else
ldrd r0, r1, [r10, #16]
#endif
add r6, r6, r0
add r7, r7, r1
strd r6, r7, [r10, #16]
ldrd r0, r1, [r10, #24]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r6, [r10, #16]
str r7, [r10, #20]
#else
strd r6, r7, [r10, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
ldr r0, [r10, #24]
ldr r1, [r10, #28]
#else
ldrd r0, r1, [r10, #24]
#endif
add r8, r8, r0
add r9, r9, r1
strd r8, r9, [r10, #24]
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
str r8, [r10, #24]
str r9, [r10, #28]
#else
strd r8, r9, [r10, #24]
#endif
ldr r10, [sp, #8]
ldr r1, [sp, #4]
subs r10, r10, #0x40

View File

@ -115,22 +115,82 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"sub sp, sp, #0xc0\n\t"
"mov r3, %[L_SHA256_transform_len_k]\n\t"
/* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[sha256]]\n\t"
"ldr lr, [%[sha256], #4]\n\t"
#else
"ldrd r12, lr, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[sha256], #16]\n\t"
"ldr r7, [%[sha256], #20]\n\t"
#else
"ldrd r6, r7, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[sha256], #24]\n\t"
"ldr r9, [%[sha256], #28]\n\t"
#else
"ldrd r8, r9, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #64]\n\t"
"str lr, [sp, #68]\n\t"
#else
"strd r12, lr, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #72]\n\t"
"str r5, [sp, #76]\n\t"
#else
"strd r4, r5, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [sp, #80]\n\t"
"str r7, [sp, #84]\n\t"
#else
"strd r6, r7, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [sp, #88]\n\t"
"str r9, [sp, #92]\n\t"
#else
"strd r8, r9, [sp, #88]\n\t"
#endif
/* Start of loop processing a block */
"\n"
"L_SHA256_transform_len_begin_%=: \n\t"
/* Load, Reverse and Store W - 64 bytes */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[data]]\n\t"
"ldr lr, [%[data], #4]\n\t"
#else
"ldrd r12, lr, [%[data]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[data], #8]\n\t"
"ldr r5, [%[data], #12]\n\t"
#else
"ldrd r4, r5, [%[data], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[data], #16]\n\t"
"ldr r7, [%[data], #20]\n\t"
#else
"ldrd r6, r7, [%[data], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[data], #24]\n\t"
"ldr r9, [%[data], #28]\n\t"
#else
"ldrd r8, r9, [%[data], #24]\n\t"
#endif
"rev r12, r12\n\t"
"rev lr, lr\n\t"
"rev r4, r4\n\t"
@ -139,14 +199,54 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r7, r7\n\t"
"rev r8, r8\n\t"
"rev r9, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp]\n\t"
"str lr, [sp, #4]\n\t"
#else
"strd r12, lr, [sp]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #8]\n\t"
"str r5, [sp, #12]\n\t"
#else
"strd r4, r5, [sp, #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [sp, #16]\n\t"
"str r7, [sp, #20]\n\t"
#else
"strd r6, r7, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [sp, #24]\n\t"
"str r9, [sp, #28]\n\t"
#else
"strd r8, r9, [sp, #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[data], #32]\n\t"
"ldr lr, [%[data], #36]\n\t"
#else
"ldrd r12, lr, [%[data], #32]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[data], #40]\n\t"
"ldr r5, [%[data], #44]\n\t"
#else
"ldrd r4, r5, [%[data], #40]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[data], #48]\n\t"
"ldr r7, [%[data], #52]\n\t"
#else
"ldrd r6, r7, [%[data], #48]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[data], #56]\n\t"
"ldr r9, [%[data], #60]\n\t"
#else
"ldrd r8, r9, [%[data], #56]\n\t"
#endif
"rev r12, r12\n\t"
"rev lr, lr\n\t"
"rev r4, r4\n\t"
@ -155,10 +255,30 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r7, r7\n\t"
"rev r8, r8\n\t"
"rev r9, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #32]\n\t"
"str lr, [sp, #36]\n\t"
#else
"strd r12, lr, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #40]\n\t"
"str r5, [sp, #44]\n\t"
#else
"strd r4, r5, [sp, #40]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [sp, #48]\n\t"
"str r7, [sp, #52]\n\t"
#else
"strd r6, r7, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [sp, #56]\n\t"
"str r9, [sp, #60]\n\t"
#else
"strd r8, r9, [sp, #56]\n\t"
#endif
"ldr r9, [%[sha256], #4]\n\t"
"ldr r12, [%[sha256], #8]\n\t"
"eor r9, r9, r12\n\t"
@ -1434,30 +1554,110 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str r6, [%[sha256], #16]\n\t"
"str r7, [%[sha256]]\n\t"
/* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[sha256]]\n\t"
"ldr lr, [%[sha256], #4]\n\t"
#else
"ldrd r12, lr, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [sp, #64]\n\t"
"ldr r7, [sp, #68]\n\t"
#else
"ldrd r6, r7, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [sp, #72]\n\t"
"ldr r9, [sp, #76]\n\t"
#else
"ldrd r8, r9, [sp, #72]\n\t"
#endif
"add r12, r12, r6\n\t"
"add lr, lr, r7\n\t"
"add r4, r4, r8\n\t"
"add r5, r5, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [%[sha256]]\n\t"
"str lr, [%[sha256], #4]\n\t"
#else
"strd r12, lr, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [%[sha256], #8]\n\t"
"str r5, [%[sha256], #12]\n\t"
#else
"strd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #64]\n\t"
"str lr, [sp, #68]\n\t"
#else
"strd r12, lr, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #72]\n\t"
"str r5, [sp, #76]\n\t"
#else
"strd r4, r5, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r12, [%[sha256], #16]\n\t"
"ldr lr, [%[sha256], #20]\n\t"
#else
"ldrd r12, lr, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #24]\n\t"
"ldr r5, [%[sha256], #28]\n\t"
#else
"ldrd r4, r5, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [sp, #80]\n\t"
"ldr r7, [sp, #84]\n\t"
#else
"ldrd r6, r7, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [sp, #88]\n\t"
"ldr r9, [sp, #92]\n\t"
#else
"ldrd r8, r9, [sp, #88]\n\t"
#endif
"add r12, r12, r6\n\t"
"add lr, lr, r7\n\t"
"add r4, r4, r8\n\t"
"add r5, r5, r9\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [%[sha256], #16]\n\t"
"str lr, [%[sha256], #20]\n\t"
#else
"strd r12, lr, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [%[sha256], #24]\n\t"
"str r5, [%[sha256], #28]\n\t"
#else
"strd r4, r5, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r12, [sp, #80]\n\t"
"str lr, [sp, #84]\n\t"
#else
"strd r12, lr, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [sp, #88]\n\t"
"str r5, [sp, #92]\n\t"
#else
"strd r4, r5, [sp, #88]\n\t"
#endif
"subs %[len], %[len], #0x40\n\t"
"sub r3, r3, #0xc0\n\t"
"add %[data], %[data], #0x40\n\t"
@ -1549,14 +1749,39 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
__asm__ __volatile__ (
"sub sp, sp, #24\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str %[sha256], [sp]\n\t"
"str %[data], [sp, #4]\n\t"
#else
"strd %[sha256], %[data], [sp]\n\t"
#endif
"str %[len], [sp, #8]\n\t"
"mov r12, %[L_SHA256_transform_neon_len_k]\n\t"
/* Load digest into registers */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[len], [%[sha256]]\n\t"
"ldr r3, [%[sha256], #4]\n\t"
#else
"ldrd %[len], r3, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r6, [%[sha256], #16]\n\t"
"ldr r7, [%[sha256], #20]\n\t"
#else
"ldrd r6, r7, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr r8, [%[sha256], #24]\n\t"
"ldr r9, [%[sha256], #28]\n\t"
#else
"ldrd r8, r9, [%[sha256], #24]\n\t"
#endif
/* Start of loop processing a block */
"\n"
"L_SHA256_transform_neon_len_begin_%=: \n\t"
@ -2484,22 +2709,62 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add %[len], %[len], %[data]\n\t"
"ldr r10, [sp]\n\t"
/* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10]\n\t"
"ldr %[data], [r10, #4]\n\t"
#else
"ldrd %[sha256], %[data], [r10]\n\t"
#endif
"add %[len], %[len], %[sha256]\n\t"
"add r3, r3, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str %[len], [r10]\n\t"
"str r3, [r10, #4]\n\t"
#else
"strd %[len], r3, [r10]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10, #8]\n\t"
"ldr %[data], [r10, #12]\n\t"
#else
"ldrd %[sha256], %[data], [r10, #8]\n\t"
#endif
"add r4, r4, %[sha256]\n\t"
"add r5, r5, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r4, [r10, #8]\n\t"
"str r5, [r10, #12]\n\t"
#else
"strd r4, r5, [r10, #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10, #16]\n\t"
"ldr %[data], [r10, #20]\n\t"
#else
"ldrd %[sha256], %[data], [r10, #16]\n\t"
#endif
"add r6, r6, %[sha256]\n\t"
"add r7, r7, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r6, [r10, #16]\n\t"
"str r7, [r10, #20]\n\t"
#else
"strd r6, r7, [r10, #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"ldr %[sha256], [r10, #24]\n\t"
"ldr %[data], [r10, #28]\n\t"
#else
"ldrd %[sha256], %[data], [r10, #24]\n\t"
#endif
"add r8, r8, %[sha256]\n\t"
"add r9, r9, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
"str r8, [r10, #24]\n\t"
"str r9, [r10, #28]\n\t"
#else
"strd r8, r9, [r10, #24]\n\t"
#endif
"ldr r10, [sp, #8]\n\t"
"ldr %[data], [sp, #4]\n\t"
"subs r10, r10, #0x40\n\t"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff