Improve SHA-512 and Curve25519

This commit is contained in:
Sean Parkinson
2019-05-30 08:49:39 +10:00
parent 173163d1a3
commit 33d27a391a
4 changed files with 1177 additions and 2015 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/ */
/* Generated using (from wolfssl):
* cd ../scripts
* ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.S
*/
#ifdef __aarch64__ #ifdef __aarch64__
.text .text
.section .rodata .section .rodata
@ -109,8 +113,8 @@ L_SHA512_transform_neon_len_k:
.text .text
.section .rodata .section .rodata
.type L_SHA512_transform_neon_len_ror8, %object .type L_SHA512_transform_neon_len_ror8, %object
.align 4
.size L_SHA512_transform_neon_len_ror8, 16 .size L_SHA512_transform_neon_len_ror8, 16
.align 4
L_SHA512_transform_neon_len_ror8: L_SHA512_transform_neon_len_ror8:
.xword 0x7060504030201, 0x80f0e0d0c0b0a09 .xword 0x7060504030201, 0x80f0e0d0c0b0a09
.text .text
@ -169,14 +173,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x8, #14 ror x12, x8, #14
ror x14, x4, #28 ror x14, x4, #28
ror x13, x8, #18 eor x12, x12, x8, ror 18
ror x15, x4, #34 eor x14, x14, x4, ror 34
eor x12, x13, x12 eor x12, x12, x8, ror 41
eor x14, x15, x14 eor x15, x14, x4, ror 39
ror x13, x8, #41
ror x15, x4, #39
eor x12, x13, x12
eor x15, x15, x14
add x11, x11, x12 add x11, x11, x12
eor x17, x4, x5 eor x17, x4, x5
eor x12, x9, x10 eor x12, x9, x10
@ -195,43 +195,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v0.16b, v1.16b, #8 ext v10.16b, v0.16b, v1.16b, #8
ror x12, x7, #14 ror x12, x7, #14
ext v9.16b, v4.16b, v5.16b, #8
ror x14, x11, #28
add v0.2d, v0.2d, v9.2d
ror x13, x7, #18
shl v8.2d, v7.2d, #45 shl v8.2d, v7.2d, #45
ror x15, x11, #34 ror x14, x11, #28
sri v8.2d, v7.2d, #19 sri v8.2d, v7.2d, #19
eor x12, x13, x12 eor x12, x12, x7, ror 18
shl v9.2d, v7.2d, #3 shl v9.2d, v7.2d, #3
eor x14, x15, x14 eor x14, x14, x11, ror 34
sri v9.2d, v7.2d, #61 sri v9.2d, v7.2d, #61
ror x13, x7, #41 eor x12, x12, x7, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x11, #39 eor x15, x14, x11, ror 39
ushr v8.2d, v7.2d, #6 ushr v8.2d, v7.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v0.2d, v0.2d, v9.2d
add x10, x10, x12 add x10, x10, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x11, x4 eor x16, x11, x4
sri v8.2d, v10.2d, #1
eor x12, x8, x9
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x7
ushr v10.2d, v10.2d, #7
add x10, x10, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x9
add v0.2d, v0.2d, v9.2d add v0.2d, v0.2d, v9.2d
eor x12, x8, x9
ext v9.16b, v4.16b, v5.16b, #8
and x17, x16, x17
add v0.2d, v0.2d, v9.2d
and x12, x12, x7
shl v8.2d, v10.2d, #63
add x10, x10, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x9
tbl v9.16b, {v10.16b}, v11.16b
add x10, x10, x19 add x10, x10, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x4 eor x17, x17, x4
ushr v10.2d, v10.2d, #7
add x10, x10, x12 add x10, x10, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v0.2d, v0.2d, v9.2d
add x6, x6, x10 add x6, x6, x10
add x10, x10, x15 add x10, x10, x15
# Round 2 # Round 2
@ -239,14 +235,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x6, #14 ror x12, x6, #14
ror x14, x10, #28 ror x14, x10, #28
ror x13, x6, #18 eor x12, x12, x6, ror 18
ror x15, x10, #34 eor x14, x14, x10, ror 34
eor x12, x13, x12 eor x12, x12, x6, ror 41
eor x14, x15, x14 eor x15, x14, x10, ror 39
ror x13, x6, #41
ror x15, x10, #39
eor x12, x13, x12
eor x15, x15, x14
add x9, x9, x12 add x9, x9, x12
eor x17, x10, x11 eor x17, x10, x11
eor x12, x7, x8 eor x12, x7, x8
@ -265,43 +257,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v1.16b, v2.16b, #8 ext v10.16b, v1.16b, v2.16b, #8
ror x12, x5, #14 ror x12, x5, #14
ext v9.16b, v5.16b, v6.16b, #8
ror x14, x9, #28
add v1.2d, v1.2d, v9.2d
ror x13, x5, #18
shl v8.2d, v0.2d, #45 shl v8.2d, v0.2d, #45
ror x15, x9, #34 ror x14, x9, #28
sri v8.2d, v0.2d, #19 sri v8.2d, v0.2d, #19
eor x12, x13, x12 eor x12, x12, x5, ror 18
shl v9.2d, v0.2d, #3 shl v9.2d, v0.2d, #3
eor x14, x15, x14 eor x14, x14, x9, ror 34
sri v9.2d, v0.2d, #61 sri v9.2d, v0.2d, #61
ror x13, x5, #41 eor x12, x12, x5, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x9, #39 eor x15, x14, x9, ror 39
ushr v8.2d, v0.2d, #6 ushr v8.2d, v0.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v1.2d, v1.2d, v9.2d
add x8, x8, x12 add x8, x8, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x9, x10 eor x16, x9, x10
sri v8.2d, v10.2d, #1
eor x12, x6, x7
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x5
ushr v10.2d, v10.2d, #7
add x8, x8, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x7
add v1.2d, v1.2d, v9.2d add v1.2d, v1.2d, v9.2d
eor x12, x6, x7
ext v9.16b, v5.16b, v6.16b, #8
and x17, x16, x17
add v1.2d, v1.2d, v9.2d
and x12, x12, x5
shl v8.2d, v10.2d, #63
add x8, x8, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x7
tbl v9.16b, {v10.16b}, v11.16b
add x8, x8, x19 add x8, x8, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x10 eor x17, x17, x10
ushr v10.2d, v10.2d, #7
add x8, x8, x12 add x8, x8, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v1.2d, v1.2d, v9.2d
add x4, x4, x8 add x4, x4, x8
add x8, x8, x15 add x8, x8, x15
# Round 4 # Round 4
@ -309,14 +297,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x4, #14 ror x12, x4, #14
ror x14, x8, #28 ror x14, x8, #28
ror x13, x4, #18 eor x12, x12, x4, ror 18
ror x15, x8, #34 eor x14, x14, x8, ror 34
eor x12, x13, x12 eor x12, x12, x4, ror 41
eor x14, x15, x14 eor x15, x14, x8, ror 39
ror x13, x4, #41
ror x15, x8, #39
eor x12, x13, x12
eor x15, x15, x14
add x7, x7, x12 add x7, x7, x12
eor x17, x8, x9 eor x17, x8, x9
eor x12, x5, x6 eor x12, x5, x6
@ -335,43 +319,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v2.16b, v3.16b, #8 ext v10.16b, v2.16b, v3.16b, #8
ror x12, x11, #14 ror x12, x11, #14
ext v9.16b, v6.16b, v7.16b, #8
ror x14, x7, #28
add v2.2d, v2.2d, v9.2d
ror x13, x11, #18
shl v8.2d, v1.2d, #45 shl v8.2d, v1.2d, #45
ror x15, x7, #34 ror x14, x7, #28
sri v8.2d, v1.2d, #19 sri v8.2d, v1.2d, #19
eor x12, x13, x12 eor x12, x12, x11, ror 18
shl v9.2d, v1.2d, #3 shl v9.2d, v1.2d, #3
eor x14, x15, x14 eor x14, x14, x7, ror 34
sri v9.2d, v1.2d, #61 sri v9.2d, v1.2d, #61
ror x13, x11, #41 eor x12, x12, x11, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x7, #39 eor x15, x14, x7, ror 39
ushr v8.2d, v1.2d, #6 ushr v8.2d, v1.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v2.2d, v2.2d, v9.2d
add x6, x6, x12 add x6, x6, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x7, x8 eor x16, x7, x8
sri v8.2d, v10.2d, #1
eor x12, x4, x5
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x11
ushr v10.2d, v10.2d, #7
add x6, x6, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x5
add v2.2d, v2.2d, v9.2d add v2.2d, v2.2d, v9.2d
eor x12, x4, x5
ext v9.16b, v6.16b, v7.16b, #8
and x17, x16, x17
add v2.2d, v2.2d, v9.2d
and x12, x12, x11
shl v8.2d, v10.2d, #63
add x6, x6, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x5
tbl v9.16b, {v10.16b}, v11.16b
add x6, x6, x19 add x6, x6, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x8 eor x17, x17, x8
ushr v10.2d, v10.2d, #7
add x6, x6, x12 add x6, x6, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v2.2d, v2.2d, v9.2d
add x10, x10, x6 add x10, x10, x6
add x6, x6, x15 add x6, x6, x15
# Round 6 # Round 6
@ -379,14 +359,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x10, #14 ror x12, x10, #14
ror x14, x6, #28 ror x14, x6, #28
ror x13, x10, #18 eor x12, x12, x10, ror 18
ror x15, x6, #34 eor x14, x14, x6, ror 34
eor x12, x13, x12 eor x12, x12, x10, ror 41
eor x14, x15, x14 eor x15, x14, x6, ror 39
ror x13, x10, #41
ror x15, x6, #39
eor x12, x13, x12
eor x15, x15, x14
add x5, x5, x12 add x5, x5, x12
eor x17, x6, x7 eor x17, x6, x7
eor x12, x11, x4 eor x12, x11, x4
@ -405,43 +381,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v3.16b, v4.16b, #8 ext v10.16b, v3.16b, v4.16b, #8
ror x12, x9, #14 ror x12, x9, #14
ext v9.16b, v7.16b, v0.16b, #8
ror x14, x5, #28
add v3.2d, v3.2d, v9.2d
ror x13, x9, #18
shl v8.2d, v2.2d, #45 shl v8.2d, v2.2d, #45
ror x15, x5, #34 ror x14, x5, #28
sri v8.2d, v2.2d, #19 sri v8.2d, v2.2d, #19
eor x12, x13, x12 eor x12, x12, x9, ror 18
shl v9.2d, v2.2d, #3 shl v9.2d, v2.2d, #3
eor x14, x15, x14 eor x14, x14, x5, ror 34
sri v9.2d, v2.2d, #61 sri v9.2d, v2.2d, #61
ror x13, x9, #41 eor x12, x12, x9, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x5, #39 eor x15, x14, x5, ror 39
ushr v8.2d, v2.2d, #6 ushr v8.2d, v2.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v3.2d, v3.2d, v9.2d
add x4, x4, x12 add x4, x4, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x5, x6 eor x16, x5, x6
sri v8.2d, v10.2d, #1
eor x12, x10, x11
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x9
ushr v10.2d, v10.2d, #7
add x4, x4, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x11
add v3.2d, v3.2d, v9.2d add v3.2d, v3.2d, v9.2d
eor x12, x10, x11
ext v9.16b, v7.16b, v0.16b, #8
and x17, x16, x17
add v3.2d, v3.2d, v9.2d
and x12, x12, x9
shl v8.2d, v10.2d, #63
add x4, x4, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x11
tbl v9.16b, {v10.16b}, v11.16b
add x4, x4, x19 add x4, x4, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x6 eor x17, x17, x6
ushr v10.2d, v10.2d, #7
add x4, x4, x12 add x4, x4, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v3.2d, v3.2d, v9.2d
add x8, x8, x4 add x8, x8, x4
add x4, x4, x15 add x4, x4, x15
# Round 8 # Round 8
@ -449,14 +421,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x8, #14 ror x12, x8, #14
ror x14, x4, #28 ror x14, x4, #28
ror x13, x8, #18 eor x12, x12, x8, ror 18
ror x15, x4, #34 eor x14, x14, x4, ror 34
eor x12, x13, x12 eor x12, x12, x8, ror 41
eor x14, x15, x14 eor x15, x14, x4, ror 39
ror x13, x8, #41
ror x15, x4, #39
eor x12, x13, x12
eor x15, x15, x14
add x11, x11, x12 add x11, x11, x12
eor x17, x4, x5 eor x17, x4, x5
eor x12, x9, x10 eor x12, x9, x10
@ -475,43 +443,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v4.16b, v5.16b, #8 ext v10.16b, v4.16b, v5.16b, #8
ror x12, x7, #14 ror x12, x7, #14
ext v9.16b, v0.16b, v1.16b, #8
ror x14, x11, #28
add v4.2d, v4.2d, v9.2d
ror x13, x7, #18
shl v8.2d, v3.2d, #45 shl v8.2d, v3.2d, #45
ror x15, x11, #34 ror x14, x11, #28
sri v8.2d, v3.2d, #19 sri v8.2d, v3.2d, #19
eor x12, x13, x12 eor x12, x12, x7, ror 18
shl v9.2d, v3.2d, #3 shl v9.2d, v3.2d, #3
eor x14, x15, x14 eor x14, x14, x11, ror 34
sri v9.2d, v3.2d, #61 sri v9.2d, v3.2d, #61
ror x13, x7, #41 eor x12, x12, x7, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x11, #39 eor x15, x14, x11, ror 39
ushr v8.2d, v3.2d, #6 ushr v8.2d, v3.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v4.2d, v4.2d, v9.2d
add x10, x10, x12 add x10, x10, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x11, x4 eor x16, x11, x4
sri v8.2d, v10.2d, #1
eor x12, x8, x9
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x7
ushr v10.2d, v10.2d, #7
add x10, x10, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x9
add v4.2d, v4.2d, v9.2d add v4.2d, v4.2d, v9.2d
eor x12, x8, x9
ext v9.16b, v0.16b, v1.16b, #8
and x17, x16, x17
add v4.2d, v4.2d, v9.2d
and x12, x12, x7
shl v8.2d, v10.2d, #63
add x10, x10, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x9
tbl v9.16b, {v10.16b}, v11.16b
add x10, x10, x19 add x10, x10, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x4 eor x17, x17, x4
ushr v10.2d, v10.2d, #7
add x10, x10, x12 add x10, x10, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v4.2d, v4.2d, v9.2d
add x6, x6, x10 add x6, x6, x10
add x10, x10, x15 add x10, x10, x15
# Round 10 # Round 10
@ -519,14 +483,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x6, #14 ror x12, x6, #14
ror x14, x10, #28 ror x14, x10, #28
ror x13, x6, #18 eor x12, x12, x6, ror 18
ror x15, x10, #34 eor x14, x14, x10, ror 34
eor x12, x13, x12 eor x12, x12, x6, ror 41
eor x14, x15, x14 eor x15, x14, x10, ror 39
ror x13, x6, #41
ror x15, x10, #39
eor x12, x13, x12
eor x15, x15, x14
add x9, x9, x12 add x9, x9, x12
eor x17, x10, x11 eor x17, x10, x11
eor x12, x7, x8 eor x12, x7, x8
@ -545,43 +505,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v5.16b, v6.16b, #8 ext v10.16b, v5.16b, v6.16b, #8
ror x12, x5, #14 ror x12, x5, #14
ext v9.16b, v1.16b, v2.16b, #8
ror x14, x9, #28
add v5.2d, v5.2d, v9.2d
ror x13, x5, #18
shl v8.2d, v4.2d, #45 shl v8.2d, v4.2d, #45
ror x15, x9, #34 ror x14, x9, #28
sri v8.2d, v4.2d, #19 sri v8.2d, v4.2d, #19
eor x12, x13, x12 eor x12, x12, x5, ror 18
shl v9.2d, v4.2d, #3 shl v9.2d, v4.2d, #3
eor x14, x15, x14 eor x14, x14, x9, ror 34
sri v9.2d, v4.2d, #61 sri v9.2d, v4.2d, #61
ror x13, x5, #41 eor x12, x12, x5, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x9, #39 eor x15, x14, x9, ror 39
ushr v8.2d, v4.2d, #6 ushr v8.2d, v4.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v5.2d, v5.2d, v9.2d
add x8, x8, x12 add x8, x8, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x9, x10 eor x16, x9, x10
sri v8.2d, v10.2d, #1
eor x12, x6, x7
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x5
ushr v10.2d, v10.2d, #7
add x8, x8, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x7
add v5.2d, v5.2d, v9.2d add v5.2d, v5.2d, v9.2d
eor x12, x6, x7
ext v9.16b, v1.16b, v2.16b, #8
and x17, x16, x17
add v5.2d, v5.2d, v9.2d
and x12, x12, x5
shl v8.2d, v10.2d, #63
add x8, x8, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x7
tbl v9.16b, {v10.16b}, v11.16b
add x8, x8, x19 add x8, x8, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x10 eor x17, x17, x10
ushr v10.2d, v10.2d, #7
add x8, x8, x12 add x8, x8, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v5.2d, v5.2d, v9.2d
add x4, x4, x8 add x4, x4, x8
add x8, x8, x15 add x8, x8, x15
# Round 12 # Round 12
@ -589,14 +545,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x4, #14 ror x12, x4, #14
ror x14, x8, #28 ror x14, x8, #28
ror x13, x4, #18 eor x12, x12, x4, ror 18
ror x15, x8, #34 eor x14, x14, x8, ror 34
eor x12, x13, x12 eor x12, x12, x4, ror 41
eor x14, x15, x14 eor x15, x14, x8, ror 39
ror x13, x4, #41
ror x15, x8, #39
eor x12, x13, x12
eor x15, x15, x14
add x7, x7, x12 add x7, x7, x12
eor x17, x8, x9 eor x17, x8, x9
eor x12, x5, x6 eor x12, x5, x6
@ -615,43 +567,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v6.16b, v7.16b, #8 ext v10.16b, v6.16b, v7.16b, #8
ror x12, x11, #14 ror x12, x11, #14
ext v9.16b, v2.16b, v3.16b, #8
ror x14, x7, #28
add v6.2d, v6.2d, v9.2d
ror x13, x11, #18
shl v8.2d, v5.2d, #45 shl v8.2d, v5.2d, #45
ror x15, x7, #34 ror x14, x7, #28
sri v8.2d, v5.2d, #19 sri v8.2d, v5.2d, #19
eor x12, x13, x12 eor x12, x12, x11, ror 18
shl v9.2d, v5.2d, #3 shl v9.2d, v5.2d, #3
eor x14, x15, x14 eor x14, x14, x7, ror 34
sri v9.2d, v5.2d, #61 sri v9.2d, v5.2d, #61
ror x13, x11, #41 eor x12, x12, x11, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x7, #39 eor x15, x14, x7, ror 39
ushr v8.2d, v5.2d, #6 ushr v8.2d, v5.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v6.2d, v6.2d, v9.2d
add x6, x6, x12 add x6, x6, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x7, x8 eor x16, x7, x8
sri v8.2d, v10.2d, #1
eor x12, x4, x5
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x11
ushr v10.2d, v10.2d, #7
add x6, x6, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x5
add v6.2d, v6.2d, v9.2d add v6.2d, v6.2d, v9.2d
eor x12, x4, x5
ext v9.16b, v2.16b, v3.16b, #8
and x17, x16, x17
add v6.2d, v6.2d, v9.2d
and x12, x12, x11
shl v8.2d, v10.2d, #63
add x6, x6, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x5
tbl v9.16b, {v10.16b}, v11.16b
add x6, x6, x19 add x6, x6, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x8 eor x17, x17, x8
ushr v10.2d, v10.2d, #7
add x6, x6, x12 add x6, x6, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v6.2d, v6.2d, v9.2d
add x10, x10, x6 add x10, x10, x6
add x6, x6, x15 add x6, x6, x15
# Round 14 # Round 14
@ -659,14 +607,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x10, #14 ror x12, x10, #14
ror x14, x6, #28 ror x14, x6, #28
ror x13, x10, #18 eor x12, x12, x10, ror 18
ror x15, x6, #34 eor x14, x14, x6, ror 34
eor x12, x13, x12 eor x12, x12, x10, ror 41
eor x14, x15, x14 eor x15, x14, x6, ror 39
ror x13, x10, #41
ror x15, x6, #39
eor x12, x13, x12
eor x15, x15, x14
add x5, x5, x12 add x5, x5, x12
eor x17, x6, x7 eor x17, x6, x7
eor x12, x11, x4 eor x12, x11, x4
@ -685,43 +629,39 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ext v10.16b, v7.16b, v0.16b, #8 ext v10.16b, v7.16b, v0.16b, #8
ror x12, x9, #14 ror x12, x9, #14
ext v9.16b, v3.16b, v4.16b, #8
ror x14, x5, #28
add v7.2d, v7.2d, v9.2d
ror x13, x9, #18
shl v8.2d, v6.2d, #45 shl v8.2d, v6.2d, #45
ror x15, x5, #34 ror x14, x5, #28
sri v8.2d, v6.2d, #19 sri v8.2d, v6.2d, #19
eor x12, x13, x12 eor x12, x12, x9, ror 18
shl v9.2d, v6.2d, #3 shl v9.2d, v6.2d, #3
eor x14, x15, x14 eor x14, x14, x5, ror 34
sri v9.2d, v6.2d, #61 sri v9.2d, v6.2d, #61
ror x13, x9, #41 eor x12, x12, x9, ror 41
eor v9.16b, v9.16b, v8.16b eor v9.16b, v9.16b, v8.16b
ror x15, x5, #39 eor x15, x14, x5, ror 39
ushr v8.2d, v6.2d, #6 ushr v8.2d, v6.2d, #6
eor x12, x13, x12
eor v9.16b, v9.16b, v8.16b
eor x15, x15, x14
add v7.2d, v7.2d, v9.2d
add x4, x4, x12 add x4, x4, x12
shl v8.2d, v10.2d, #63 eor v9.16b, v9.16b, v8.16b
eor x16, x5, x6 eor x16, x5, x6
sri v8.2d, v10.2d, #1
eor x12, x10, x11
tbl v9.16b, { v10.16b }, v11.16b
and x17, x16, x17
eor v9.16b, v9.16b, v8.16b
and x12, x12, x9
ushr v10.2d, v10.2d, #7
add x4, x4, x18
eor v9.16b, v9.16b, v10.16b
eor x12, x12, x11
add v7.2d, v7.2d, v9.2d add v7.2d, v7.2d, v9.2d
eor x12, x10, x11
ext v9.16b, v3.16b, v4.16b, #8
and x17, x16, x17
add v7.2d, v7.2d, v9.2d
and x12, x12, x9
shl v8.2d, v10.2d, #63
add x4, x4, x18
sri v8.2d, v10.2d, #1
eor x12, x12, x11
tbl v9.16b, {v10.16b}, v11.16b
add x4, x4, x19 add x4, x4, x19
eor v9.16b, v9.16b, v8.16b
eor x17, x17, x6 eor x17, x17, x6
ushr v10.2d, v10.2d, #7
add x4, x4, x12 add x4, x4, x12
eor v9.16b, v9.16b, v10.16b
add x15, x15, x17 add x15, x15, x17
add v7.2d, v7.2d, v9.2d
add x8, x8, x4 add x8, x8, x4
add x4, x4, x15 add x4, x4, x15
subs x28, x28, #1 subs x28, x28, #1
@ -731,14 +671,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x8, #14 ror x12, x8, #14
ror x14, x4, #28 ror x14, x4, #28
ror x13, x8, #18 eor x12, x12, x8, ror 18
ror x15, x4, #34 eor x14, x14, x4, ror 34
eor x12, x13, x12 eor x12, x12, x8, ror 41
eor x14, x15, x14 eor x15, x14, x4, ror 39
ror x13, x8, #41
ror x15, x4, #39
eor x12, x13, x12
eor x15, x15, x14
add x11, x11, x12 add x11, x11, x12
eor x17, x4, x5 eor x17, x4, x5
eor x12, x9, x10 eor x12, x9, x10
@ -757,14 +693,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x7, #14 ror x12, x7, #14
ror x14, x11, #28 ror x14, x11, #28
ror x13, x7, #18 eor x12, x12, x7, ror 18
ror x15, x11, #34 eor x14, x14, x11, ror 34
eor x12, x13, x12 eor x12, x12, x7, ror 41
eor x14, x15, x14 eor x15, x14, x11, ror 39
ror x13, x7, #41
ror x15, x11, #39
eor x12, x13, x12
eor x15, x15, x14
add x10, x10, x12 add x10, x10, x12
eor x16, x11, x4 eor x16, x11, x4
eor x12, x8, x9 eor x12, x8, x9
@ -783,14 +715,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x6, #14 ror x12, x6, #14
ror x14, x10, #28 ror x14, x10, #28
ror x13, x6, #18 eor x12, x12, x6, ror 18
ror x15, x10, #34 eor x14, x14, x10, ror 34
eor x12, x13, x12 eor x12, x12, x6, ror 41
eor x14, x15, x14 eor x15, x14, x10, ror 39
ror x13, x6, #41
ror x15, x10, #39
eor x12, x13, x12
eor x15, x15, x14
add x9, x9, x12 add x9, x9, x12
eor x17, x10, x11 eor x17, x10, x11
eor x12, x7, x8 eor x12, x7, x8
@ -809,14 +737,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x5, #14 ror x12, x5, #14
ror x14, x9, #28 ror x14, x9, #28
ror x13, x5, #18 eor x12, x12, x5, ror 18
ror x15, x9, #34 eor x14, x14, x9, ror 34
eor x12, x13, x12 eor x12, x12, x5, ror 41
eor x14, x15, x14 eor x15, x14, x9, ror 39
ror x13, x5, #41
ror x15, x9, #39
eor x12, x13, x12
eor x15, x15, x14
add x8, x8, x12 add x8, x8, x12
eor x16, x9, x10 eor x16, x9, x10
eor x12, x6, x7 eor x12, x6, x7
@ -835,14 +759,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x4, #14 ror x12, x4, #14
ror x14, x8, #28 ror x14, x8, #28
ror x13, x4, #18 eor x12, x12, x4, ror 18
ror x15, x8, #34 eor x14, x14, x8, ror 34
eor x12, x13, x12 eor x12, x12, x4, ror 41
eor x14, x15, x14 eor x15, x14, x8, ror 39
ror x13, x4, #41
ror x15, x8, #39
eor x12, x13, x12
eor x15, x15, x14
add x7, x7, x12 add x7, x7, x12
eor x17, x8, x9 eor x17, x8, x9
eor x12, x5, x6 eor x12, x5, x6
@ -861,14 +781,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x11, #14 ror x12, x11, #14
ror x14, x7, #28 ror x14, x7, #28
ror x13, x11, #18 eor x12, x12, x11, ror 18
ror x15, x7, #34 eor x14, x14, x7, ror 34
eor x12, x13, x12 eor x12, x12, x11, ror 41
eor x14, x15, x14 eor x15, x14, x7, ror 39
ror x13, x11, #41
ror x15, x7, #39
eor x12, x13, x12
eor x15, x15, x14
add x6, x6, x12 add x6, x6, x12
eor x16, x7, x8 eor x16, x7, x8
eor x12, x4, x5 eor x12, x4, x5
@ -887,14 +803,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x10, #14 ror x12, x10, #14
ror x14, x6, #28 ror x14, x6, #28
ror x13, x10, #18 eor x12, x12, x10, ror 18
ror x15, x6, #34 eor x14, x14, x6, ror 34
eor x12, x13, x12 eor x12, x12, x10, ror 41
eor x14, x15, x14 eor x15, x14, x6, ror 39
ror x13, x10, #41
ror x15, x6, #39
eor x12, x13, x12
eor x15, x15, x14
add x5, x5, x12 add x5, x5, x12
eor x17, x6, x7 eor x17, x6, x7
eor x12, x11, x4 eor x12, x11, x4
@ -913,14 +825,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x9, #14 ror x12, x9, #14
ror x14, x5, #28 ror x14, x5, #28
ror x13, x9, #18 eor x12, x12, x9, ror 18
ror x15, x5, #34 eor x14, x14, x5, ror 34
eor x12, x13, x12 eor x12, x12, x9, ror 41
eor x14, x15, x14 eor x15, x14, x5, ror 39
ror x13, x9, #41
ror x15, x5, #39
eor x12, x13, x12
eor x15, x15, x14
add x4, x4, x12 add x4, x4, x12
eor x16, x5, x6 eor x16, x5, x6
eor x12, x10, x11 eor x12, x10, x11
@ -939,14 +847,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x8, #14 ror x12, x8, #14
ror x14, x4, #28 ror x14, x4, #28
ror x13, x8, #18 eor x12, x12, x8, ror 18
ror x15, x4, #34 eor x14, x14, x4, ror 34
eor x12, x13, x12 eor x12, x12, x8, ror 41
eor x14, x15, x14 eor x15, x14, x4, ror 39
ror x13, x8, #41
ror x15, x4, #39
eor x12, x13, x12
eor x15, x15, x14
add x11, x11, x12 add x11, x11, x12
eor x17, x4, x5 eor x17, x4, x5
eor x12, x9, x10 eor x12, x9, x10
@ -965,14 +869,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x7, #14 ror x12, x7, #14
ror x14, x11, #28 ror x14, x11, #28
ror x13, x7, #18 eor x12, x12, x7, ror 18
ror x15, x11, #34 eor x14, x14, x11, ror 34
eor x12, x13, x12 eor x12, x12, x7, ror 41
eor x14, x15, x14 eor x15, x14, x11, ror 39
ror x13, x7, #41
ror x15, x11, #39
eor x12, x13, x12
eor x15, x15, x14
add x10, x10, x12 add x10, x10, x12
eor x16, x11, x4 eor x16, x11, x4
eor x12, x8, x9 eor x12, x8, x9
@ -991,14 +891,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x6, #14 ror x12, x6, #14
ror x14, x10, #28 ror x14, x10, #28
ror x13, x6, #18 eor x12, x12, x6, ror 18
ror x15, x10, #34 eor x14, x14, x10, ror 34
eor x12, x13, x12 eor x12, x12, x6, ror 41
eor x14, x15, x14 eor x15, x14, x10, ror 39
ror x13, x6, #41
ror x15, x10, #39
eor x12, x13, x12
eor x15, x15, x14
add x9, x9, x12 add x9, x9, x12
eor x17, x10, x11 eor x17, x10, x11
eor x12, x7, x8 eor x12, x7, x8
@ -1017,14 +913,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x5, #14 ror x12, x5, #14
ror x14, x9, #28 ror x14, x9, #28
ror x13, x5, #18 eor x12, x12, x5, ror 18
ror x15, x9, #34 eor x14, x14, x9, ror 34
eor x12, x13, x12 eor x12, x12, x5, ror 41
eor x14, x15, x14 eor x15, x14, x9, ror 39
ror x13, x5, #41
ror x15, x9, #39
eor x12, x13, x12
eor x15, x15, x14
add x8, x8, x12 add x8, x8, x12
eor x16, x9, x10 eor x16, x9, x10
eor x12, x6, x7 eor x12, x6, x7
@ -1043,14 +935,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x4, #14 ror x12, x4, #14
ror x14, x8, #28 ror x14, x8, #28
ror x13, x4, #18 eor x12, x12, x4, ror 18
ror x15, x8, #34 eor x14, x14, x8, ror 34
eor x12, x13, x12 eor x12, x12, x4, ror 41
eor x14, x15, x14 eor x15, x14, x8, ror 39
ror x13, x4, #41
ror x15, x8, #39
eor x12, x13, x12
eor x15, x15, x14
add x7, x7, x12 add x7, x7, x12
eor x17, x8, x9 eor x17, x8, x9
eor x12, x5, x6 eor x12, x5, x6
@ -1069,14 +957,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x11, #14 ror x12, x11, #14
ror x14, x7, #28 ror x14, x7, #28
ror x13, x11, #18 eor x12, x12, x11, ror 18
ror x15, x7, #34 eor x14, x14, x7, ror 34
eor x12, x13, x12 eor x12, x12, x11, ror 41
eor x14, x15, x14 eor x15, x14, x7, ror 39
ror x13, x11, #41
ror x15, x7, #39
eor x12, x13, x12
eor x15, x15, x14
add x6, x6, x12 add x6, x6, x12
eor x16, x7, x8 eor x16, x7, x8
eor x12, x4, x5 eor x12, x4, x5
@ -1095,14 +979,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x10, #14 ror x12, x10, #14
ror x14, x6, #28 ror x14, x6, #28
ror x13, x10, #18 eor x12, x12, x10, ror 18
ror x15, x6, #34 eor x14, x14, x6, ror 34
eor x12, x13, x12 eor x12, x12, x10, ror 41
eor x14, x15, x14 eor x15, x14, x6, ror 39
ror x13, x10, #41
ror x15, x6, #39
eor x12, x13, x12
eor x15, x15, x14
add x5, x5, x12 add x5, x5, x12
eor x17, x6, x7 eor x17, x6, x7
eor x12, x11, x4 eor x12, x11, x4
@ -1121,14 +1001,10 @@ L_sha512_len_neon_start:
ldr x19, [x3], #8 ldr x19, [x3], #8
ror x12, x9, #14 ror x12, x9, #14
ror x14, x5, #28 ror x14, x5, #28
ror x13, x9, #18 eor x12, x12, x9, ror 18
ror x15, x5, #34 eor x14, x14, x5, ror 34
eor x12, x13, x12 eor x12, x12, x9, ror 41
eor x14, x15, x14 eor x15, x14, x5, ror 39
ror x13, x9, #41
ror x15, x5, #39
eor x12, x13, x12
eor x15, x15, x14
add x4, x4, x12 add x4, x4, x12
eor x16, x5, x6 eor x16, x5, x6
eor x12, x10, x11 eor x12, x10, x11
@ -1165,8 +1041,8 @@ L_sha512_len_neon_start:
ldp x24, x25, [x29, #72] ldp x24, x25, [x29, #72]
ldp x26, x27, [x29, #88] ldp x26, x27, [x29, #88]
ldr x28, [x29, #104] ldr x28, [x29, #104]
ldp d8, d9, [sp, #112] ldp d8, d9, [x29, #112]
ldp d10, d11, [sp, #128] ldp d10, d11, [x29, #128]
ldp x29, x30, [sp], #0x90 ldp x29, x30, [sp], #0x90
ret ret
.size Transform_Sha512_Len,.-Transform_Sha512_Len .size Transform_Sha512_Len,.-Transform_Sha512_Len

View File

@ -19,6 +19,10 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/ */
/* Generated using (from wolfssl):
* cd ../scripts
* ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
*/
#ifdef __aarch64__ #ifdef __aarch64__
#include <stdint.h> #include <stdint.h>
#include <wolfssl/wolfcrypt/sha512.h> #include <wolfssl/wolfcrypt/sha512.h>
@ -120,18 +124,18 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"adr x28, %[L_SHA512_transform_neon_len_ror8]\n\t" "adr x28, %[L_SHA512_transform_neon_len_ror8]\n\t"
"ld1 {v11.16b}, [x28]\n\t" "ld1 {v11.16b}, [x28]\n\t"
/* Load digest into working vars */ /* Load digest into working vars */
"ldp x4, x5, [%[sha512]]\n\t" "ldp x4, x5, [%x[sha512]]\n\t"
"ldp x6, x7, [%[sha512], #16]\n\t" "ldp x6, x7, [%x[sha512], #16]\n\t"
"ldp x8, x9, [%[sha512], #32]\n\t" "ldp x8, x9, [%x[sha512], #32]\n\t"
"ldp x10, x11, [%[sha512], #48]\n\t" "ldp x10, x11, [%x[sha512], #48]\n\t"
/* Start of loop processing a block */ /* Start of loop processing a block */
"\n" "\n"
"L_sha512_len_neon_begin_%=: \n\t" "L_sha512_len_neon_begin_%=: \n\t"
/* Load W */ /* Load W */
/* Copy digest to add in at end */ /* Copy digest to add in at end */
"ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[data]], #0x40\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t"
"mov x20, x4\n\t" "mov x20, x4\n\t"
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%[data]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t"
"mov x21, x5\n\t" "mov x21, x5\n\t"
"rev64 v0.16b, v0.16b\n\t" "rev64 v0.16b, v0.16b\n\t"
"mov x22, x6\n\t" "mov x22, x6\n\t"
@ -158,14 +162,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x8, #14\n\t" "ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t" "ror x14, x4, #28\n\t"
"ror x13, x8, #18\n\t" "eor x12, x12, x8, ror 18\n\t"
"ror x15, x4, #34\n\t" "eor x14, x14, x4, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x8, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x4, ror 39\n\t"
"ror x13, x8, #41\n\t"
"ror x15, x4, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x11, x11, x12\n\t" "add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t" "eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t" "eor x12, x9, x10\n\t"
@ -184,43 +184,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v0.16b, v1.16b, #8\n\t" "ext v10.16b, v0.16b, v1.16b, #8\n\t"
"ror x12, x7, #14\n\t" "ror x12, x7, #14\n\t"
"ext v9.16b, v4.16b, v5.16b, #8\n\t"
"ror x14, x11, #28\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"ror x13, x7, #18\n\t"
"shl v8.2d, v7.2d, #45\n\t" "shl v8.2d, v7.2d, #45\n\t"
"ror x15, x11, #34\n\t" "ror x14, x11, #28\n\t"
"sri v8.2d, v7.2d, #19\n\t" "sri v8.2d, v7.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x7, ror 18\n\t"
"shl v9.2d, v7.2d, #3\n\t" "shl v9.2d, v7.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x11, ror 34\n\t"
"sri v9.2d, v7.2d, #61\n\t" "sri v9.2d, v7.2d, #61\n\t"
"ror x13, x7, #41\n\t" "eor x12, x12, x7, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x11, #39\n\t" "eor x15, x14, x11, ror 39\n\t"
"ushr v8.2d, v7.2d, #6\n\t" "ushr v8.2d, v7.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"add x10, x10, x12\n\t" "add x10, x10, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x11, x4\n\t" "eor x16, x11, x4\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x8, x9\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x7\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x10, x10, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x9\n\t"
"add v0.2d, v0.2d, v9.2d\n\t" "add v0.2d, v0.2d, v9.2d\n\t"
"eor x12, x8, x9\n\t"
"ext v9.16b, v4.16b, v5.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"and x12, x12, x7\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x10, x10, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x9\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x10, x10, x19\n\t" "add x10, x10, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x4\n\t" "eor x17, x17, x4\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x10, x10, x12\n\t" "add x10, x10, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v0.2d, v0.2d, v9.2d\n\t"
"add x6, x6, x10\n\t" "add x6, x6, x10\n\t"
"add x10, x10, x15\n\t" "add x10, x10, x15\n\t"
/* Round 2 */ /* Round 2 */
@ -228,14 +224,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x6, #14\n\t" "ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t" "ror x14, x10, #28\n\t"
"ror x13, x6, #18\n\t" "eor x12, x12, x6, ror 18\n\t"
"ror x15, x10, #34\n\t" "eor x14, x14, x10, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x6, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x10, ror 39\n\t"
"ror x13, x6, #41\n\t"
"ror x15, x10, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x9, x9, x12\n\t" "add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t" "eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t" "eor x12, x7, x8\n\t"
@ -254,43 +246,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v1.16b, v2.16b, #8\n\t" "ext v10.16b, v1.16b, v2.16b, #8\n\t"
"ror x12, x5, #14\n\t" "ror x12, x5, #14\n\t"
"ext v9.16b, v5.16b, v6.16b, #8\n\t"
"ror x14, x9, #28\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"ror x13, x5, #18\n\t"
"shl v8.2d, v0.2d, #45\n\t" "shl v8.2d, v0.2d, #45\n\t"
"ror x15, x9, #34\n\t" "ror x14, x9, #28\n\t"
"sri v8.2d, v0.2d, #19\n\t" "sri v8.2d, v0.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x5, ror 18\n\t"
"shl v9.2d, v0.2d, #3\n\t" "shl v9.2d, v0.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x9, ror 34\n\t"
"sri v9.2d, v0.2d, #61\n\t" "sri v9.2d, v0.2d, #61\n\t"
"ror x13, x5, #41\n\t" "eor x12, x12, x5, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x9, #39\n\t" "eor x15, x14, x9, ror 39\n\t"
"ushr v8.2d, v0.2d, #6\n\t" "ushr v8.2d, v0.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"add x8, x8, x12\n\t" "add x8, x8, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x9, x10\n\t" "eor x16, x9, x10\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x6, x7\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x5\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x8, x8, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x7\n\t"
"add v1.2d, v1.2d, v9.2d\n\t" "add v1.2d, v1.2d, v9.2d\n\t"
"eor x12, x6, x7\n\t"
"ext v9.16b, v5.16b, v6.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"and x12, x12, x5\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x8, x8, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x7\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x8, x8, x19\n\t" "add x8, x8, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x10\n\t" "eor x17, x17, x10\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x8, x8, x12\n\t" "add x8, x8, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v1.2d, v1.2d, v9.2d\n\t"
"add x4, x4, x8\n\t" "add x4, x4, x8\n\t"
"add x8, x8, x15\n\t" "add x8, x8, x15\n\t"
/* Round 4 */ /* Round 4 */
@ -298,14 +286,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x4, #14\n\t" "ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t" "ror x14, x8, #28\n\t"
"ror x13, x4, #18\n\t" "eor x12, x12, x4, ror 18\n\t"
"ror x15, x8, #34\n\t" "eor x14, x14, x8, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x4, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x8, ror 39\n\t"
"ror x13, x4, #41\n\t"
"ror x15, x8, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x7, x7, x12\n\t" "add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t" "eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t" "eor x12, x5, x6\n\t"
@ -324,43 +308,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v2.16b, v3.16b, #8\n\t" "ext v10.16b, v2.16b, v3.16b, #8\n\t"
"ror x12, x11, #14\n\t" "ror x12, x11, #14\n\t"
"ext v9.16b, v6.16b, v7.16b, #8\n\t"
"ror x14, x7, #28\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"ror x13, x11, #18\n\t"
"shl v8.2d, v1.2d, #45\n\t" "shl v8.2d, v1.2d, #45\n\t"
"ror x15, x7, #34\n\t" "ror x14, x7, #28\n\t"
"sri v8.2d, v1.2d, #19\n\t" "sri v8.2d, v1.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x11, ror 18\n\t"
"shl v9.2d, v1.2d, #3\n\t" "shl v9.2d, v1.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x7, ror 34\n\t"
"sri v9.2d, v1.2d, #61\n\t" "sri v9.2d, v1.2d, #61\n\t"
"ror x13, x11, #41\n\t" "eor x12, x12, x11, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x7, #39\n\t" "eor x15, x14, x7, ror 39\n\t"
"ushr v8.2d, v1.2d, #6\n\t" "ushr v8.2d, v1.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"add x6, x6, x12\n\t" "add x6, x6, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x7, x8\n\t" "eor x16, x7, x8\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x4, x5\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x11\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x6, x6, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x5\n\t"
"add v2.2d, v2.2d, v9.2d\n\t" "add v2.2d, v2.2d, v9.2d\n\t"
"eor x12, x4, x5\n\t"
"ext v9.16b, v6.16b, v7.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"and x12, x12, x11\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x6, x6, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x5\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x6, x6, x19\n\t" "add x6, x6, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x8\n\t" "eor x17, x17, x8\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x6, x6, x12\n\t" "add x6, x6, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v2.2d, v2.2d, v9.2d\n\t"
"add x10, x10, x6\n\t" "add x10, x10, x6\n\t"
"add x6, x6, x15\n\t" "add x6, x6, x15\n\t"
/* Round 6 */ /* Round 6 */
@ -368,14 +348,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x10, #14\n\t" "ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t" "ror x14, x6, #28\n\t"
"ror x13, x10, #18\n\t" "eor x12, x12, x10, ror 18\n\t"
"ror x15, x6, #34\n\t" "eor x14, x14, x6, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x10, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x6, ror 39\n\t"
"ror x13, x10, #41\n\t"
"ror x15, x6, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x5, x5, x12\n\t" "add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t" "eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t" "eor x12, x11, x4\n\t"
@ -394,43 +370,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v3.16b, v4.16b, #8\n\t" "ext v10.16b, v3.16b, v4.16b, #8\n\t"
"ror x12, x9, #14\n\t" "ror x12, x9, #14\n\t"
"ext v9.16b, v7.16b, v0.16b, #8\n\t"
"ror x14, x5, #28\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"ror x13, x9, #18\n\t"
"shl v8.2d, v2.2d, #45\n\t" "shl v8.2d, v2.2d, #45\n\t"
"ror x15, x5, #34\n\t" "ror x14, x5, #28\n\t"
"sri v8.2d, v2.2d, #19\n\t" "sri v8.2d, v2.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x9, ror 18\n\t"
"shl v9.2d, v2.2d, #3\n\t" "shl v9.2d, v2.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x5, ror 34\n\t"
"sri v9.2d, v2.2d, #61\n\t" "sri v9.2d, v2.2d, #61\n\t"
"ror x13, x9, #41\n\t" "eor x12, x12, x9, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x5, #39\n\t" "eor x15, x14, x5, ror 39\n\t"
"ushr v8.2d, v2.2d, #6\n\t" "ushr v8.2d, v2.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"add x4, x4, x12\n\t" "add x4, x4, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x5, x6\n\t" "eor x16, x5, x6\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x10, x11\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x9\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x4, x4, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x11\n\t"
"add v3.2d, v3.2d, v9.2d\n\t" "add v3.2d, v3.2d, v9.2d\n\t"
"eor x12, x10, x11\n\t"
"ext v9.16b, v7.16b, v0.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"and x12, x12, x9\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x4, x4, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x11\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x4, x4, x19\n\t" "add x4, x4, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x6\n\t" "eor x17, x17, x6\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x4, x4, x12\n\t" "add x4, x4, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v3.2d, v3.2d, v9.2d\n\t"
"add x8, x8, x4\n\t" "add x8, x8, x4\n\t"
"add x4, x4, x15\n\t" "add x4, x4, x15\n\t"
/* Round 8 */ /* Round 8 */
@ -438,14 +410,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x8, #14\n\t" "ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t" "ror x14, x4, #28\n\t"
"ror x13, x8, #18\n\t" "eor x12, x12, x8, ror 18\n\t"
"ror x15, x4, #34\n\t" "eor x14, x14, x4, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x8, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x4, ror 39\n\t"
"ror x13, x8, #41\n\t"
"ror x15, x4, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x11, x11, x12\n\t" "add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t" "eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t" "eor x12, x9, x10\n\t"
@ -464,43 +432,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v4.16b, v5.16b, #8\n\t" "ext v10.16b, v4.16b, v5.16b, #8\n\t"
"ror x12, x7, #14\n\t" "ror x12, x7, #14\n\t"
"ext v9.16b, v0.16b, v1.16b, #8\n\t"
"ror x14, x11, #28\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"ror x13, x7, #18\n\t"
"shl v8.2d, v3.2d, #45\n\t" "shl v8.2d, v3.2d, #45\n\t"
"ror x15, x11, #34\n\t" "ror x14, x11, #28\n\t"
"sri v8.2d, v3.2d, #19\n\t" "sri v8.2d, v3.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x7, ror 18\n\t"
"shl v9.2d, v3.2d, #3\n\t" "shl v9.2d, v3.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x11, ror 34\n\t"
"sri v9.2d, v3.2d, #61\n\t" "sri v9.2d, v3.2d, #61\n\t"
"ror x13, x7, #41\n\t" "eor x12, x12, x7, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x11, #39\n\t" "eor x15, x14, x11, ror 39\n\t"
"ushr v8.2d, v3.2d, #6\n\t" "ushr v8.2d, v3.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"add x10, x10, x12\n\t" "add x10, x10, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x11, x4\n\t" "eor x16, x11, x4\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x8, x9\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x7\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x10, x10, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x9\n\t"
"add v4.2d, v4.2d, v9.2d\n\t" "add v4.2d, v4.2d, v9.2d\n\t"
"eor x12, x8, x9\n\t"
"ext v9.16b, v0.16b, v1.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"and x12, x12, x7\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x10, x10, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x9\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x10, x10, x19\n\t" "add x10, x10, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x4\n\t" "eor x17, x17, x4\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x10, x10, x12\n\t" "add x10, x10, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v4.2d, v4.2d, v9.2d\n\t"
"add x6, x6, x10\n\t" "add x6, x6, x10\n\t"
"add x10, x10, x15\n\t" "add x10, x10, x15\n\t"
/* Round 10 */ /* Round 10 */
@ -508,14 +472,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x6, #14\n\t" "ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t" "ror x14, x10, #28\n\t"
"ror x13, x6, #18\n\t" "eor x12, x12, x6, ror 18\n\t"
"ror x15, x10, #34\n\t" "eor x14, x14, x10, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x6, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x10, ror 39\n\t"
"ror x13, x6, #41\n\t"
"ror x15, x10, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x9, x9, x12\n\t" "add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t" "eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t" "eor x12, x7, x8\n\t"
@ -534,43 +494,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v5.16b, v6.16b, #8\n\t" "ext v10.16b, v5.16b, v6.16b, #8\n\t"
"ror x12, x5, #14\n\t" "ror x12, x5, #14\n\t"
"ext v9.16b, v1.16b, v2.16b, #8\n\t"
"ror x14, x9, #28\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"ror x13, x5, #18\n\t"
"shl v8.2d, v4.2d, #45\n\t" "shl v8.2d, v4.2d, #45\n\t"
"ror x15, x9, #34\n\t" "ror x14, x9, #28\n\t"
"sri v8.2d, v4.2d, #19\n\t" "sri v8.2d, v4.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x5, ror 18\n\t"
"shl v9.2d, v4.2d, #3\n\t" "shl v9.2d, v4.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x9, ror 34\n\t"
"sri v9.2d, v4.2d, #61\n\t" "sri v9.2d, v4.2d, #61\n\t"
"ror x13, x5, #41\n\t" "eor x12, x12, x5, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x9, #39\n\t" "eor x15, x14, x9, ror 39\n\t"
"ushr v8.2d, v4.2d, #6\n\t" "ushr v8.2d, v4.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"add x8, x8, x12\n\t" "add x8, x8, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x9, x10\n\t" "eor x16, x9, x10\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x6, x7\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x5\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x8, x8, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x7\n\t"
"add v5.2d, v5.2d, v9.2d\n\t" "add v5.2d, v5.2d, v9.2d\n\t"
"eor x12, x6, x7\n\t"
"ext v9.16b, v1.16b, v2.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"and x12, x12, x5\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x8, x8, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x7\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x8, x8, x19\n\t" "add x8, x8, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x10\n\t" "eor x17, x17, x10\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x8, x8, x12\n\t" "add x8, x8, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v5.2d, v5.2d, v9.2d\n\t"
"add x4, x4, x8\n\t" "add x4, x4, x8\n\t"
"add x8, x8, x15\n\t" "add x8, x8, x15\n\t"
/* Round 12 */ /* Round 12 */
@ -578,14 +534,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x4, #14\n\t" "ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t" "ror x14, x8, #28\n\t"
"ror x13, x4, #18\n\t" "eor x12, x12, x4, ror 18\n\t"
"ror x15, x8, #34\n\t" "eor x14, x14, x8, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x4, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x8, ror 39\n\t"
"ror x13, x4, #41\n\t"
"ror x15, x8, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x7, x7, x12\n\t" "add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t" "eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t" "eor x12, x5, x6\n\t"
@ -604,43 +556,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v6.16b, v7.16b, #8\n\t" "ext v10.16b, v6.16b, v7.16b, #8\n\t"
"ror x12, x11, #14\n\t" "ror x12, x11, #14\n\t"
"ext v9.16b, v2.16b, v3.16b, #8\n\t"
"ror x14, x7, #28\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"ror x13, x11, #18\n\t"
"shl v8.2d, v5.2d, #45\n\t" "shl v8.2d, v5.2d, #45\n\t"
"ror x15, x7, #34\n\t" "ror x14, x7, #28\n\t"
"sri v8.2d, v5.2d, #19\n\t" "sri v8.2d, v5.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x11, ror 18\n\t"
"shl v9.2d, v5.2d, #3\n\t" "shl v9.2d, v5.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x7, ror 34\n\t"
"sri v9.2d, v5.2d, #61\n\t" "sri v9.2d, v5.2d, #61\n\t"
"ror x13, x11, #41\n\t" "eor x12, x12, x11, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x7, #39\n\t" "eor x15, x14, x7, ror 39\n\t"
"ushr v8.2d, v5.2d, #6\n\t" "ushr v8.2d, v5.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"add x6, x6, x12\n\t" "add x6, x6, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x7, x8\n\t" "eor x16, x7, x8\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x4, x5\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x11\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x6, x6, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x5\n\t"
"add v6.2d, v6.2d, v9.2d\n\t" "add v6.2d, v6.2d, v9.2d\n\t"
"eor x12, x4, x5\n\t"
"ext v9.16b, v2.16b, v3.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"and x12, x12, x11\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x6, x6, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x5\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x6, x6, x19\n\t" "add x6, x6, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x8\n\t" "eor x17, x17, x8\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x6, x6, x12\n\t" "add x6, x6, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v6.2d, v6.2d, v9.2d\n\t"
"add x10, x10, x6\n\t" "add x10, x10, x6\n\t"
"add x6, x6, x15\n\t" "add x6, x6, x15\n\t"
/* Round 14 */ /* Round 14 */
@ -648,14 +596,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x10, #14\n\t" "ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t" "ror x14, x6, #28\n\t"
"ror x13, x10, #18\n\t" "eor x12, x12, x10, ror 18\n\t"
"ror x15, x6, #34\n\t" "eor x14, x14, x6, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x10, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x6, ror 39\n\t"
"ror x13, x10, #41\n\t"
"ror x15, x6, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x5, x5, x12\n\t" "add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t" "eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t" "eor x12, x11, x4\n\t"
@ -674,43 +618,39 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ext v10.16b, v7.16b, v0.16b, #8\n\t" "ext v10.16b, v7.16b, v0.16b, #8\n\t"
"ror x12, x9, #14\n\t" "ror x12, x9, #14\n\t"
"ext v9.16b, v3.16b, v4.16b, #8\n\t"
"ror x14, x5, #28\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"ror x13, x9, #18\n\t"
"shl v8.2d, v6.2d, #45\n\t" "shl v8.2d, v6.2d, #45\n\t"
"ror x15, x5, #34\n\t" "ror x14, x5, #28\n\t"
"sri v8.2d, v6.2d, #19\n\t" "sri v8.2d, v6.2d, #19\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x9, ror 18\n\t"
"shl v9.2d, v6.2d, #3\n\t" "shl v9.2d, v6.2d, #3\n\t"
"eor x14, x15, x14\n\t" "eor x14, x14, x5, ror 34\n\t"
"sri v9.2d, v6.2d, #61\n\t" "sri v9.2d, v6.2d, #61\n\t"
"ror x13, x9, #41\n\t" "eor x12, x12, x9, ror 41\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"ror x15, x5, #39\n\t" "eor x15, x14, x5, ror 39\n\t"
"ushr v8.2d, v6.2d, #6\n\t" "ushr v8.2d, v6.2d, #6\n\t"
"eor x12, x13, x12\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x15, x15, x14\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"add x4, x4, x12\n\t" "add x4, x4, x12\n\t"
"shl v8.2d, v10.2d, #63\n\t" "eor v9.16b, v9.16b, v8.16b\n\t"
"eor x16, x5, x6\n\t" "eor x16, x5, x6\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x10, x11\n\t"
"tbl v9.16b, { v10.16b }, v11.16b\n\t"
"and x17, x16, x17\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"and x12, x12, x9\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x4, x4, x18\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"eor x12, x12, x11\n\t"
"add v7.2d, v7.2d, v9.2d\n\t" "add v7.2d, v7.2d, v9.2d\n\t"
"eor x12, x10, x11\n\t"
"ext v9.16b, v3.16b, v4.16b, #8\n\t"
"and x17, x16, x17\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"and x12, x12, x9\n\t"
"shl v8.2d, v10.2d, #63\n\t"
"add x4, x4, x18\n\t"
"sri v8.2d, v10.2d, #1\n\t"
"eor x12, x12, x11\n\t"
"tbl v9.16b, {v10.16b}, v11.16b\n\t"
"add x4, x4, x19\n\t" "add x4, x4, x19\n\t"
"eor v9.16b, v9.16b, v8.16b\n\t"
"eor x17, x17, x6\n\t" "eor x17, x17, x6\n\t"
"ushr v10.2d, v10.2d, #7\n\t"
"add x4, x4, x12\n\t" "add x4, x4, x12\n\t"
"eor v9.16b, v9.16b, v10.16b\n\t"
"add x15, x15, x17\n\t" "add x15, x15, x17\n\t"
"add v7.2d, v7.2d, v9.2d\n\t"
"add x8, x8, x4\n\t" "add x8, x8, x4\n\t"
"add x4, x4, x15\n\t" "add x4, x4, x15\n\t"
"subs x28, x28, #1\n\t" "subs x28, x28, #1\n\t"
@ -720,14 +660,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x8, #14\n\t" "ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t" "ror x14, x4, #28\n\t"
"ror x13, x8, #18\n\t" "eor x12, x12, x8, ror 18\n\t"
"ror x15, x4, #34\n\t" "eor x14, x14, x4, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x8, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x4, ror 39\n\t"
"ror x13, x8, #41\n\t"
"ror x15, x4, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x11, x11, x12\n\t" "add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t" "eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t" "eor x12, x9, x10\n\t"
@ -746,14 +682,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x7, #14\n\t" "ror x12, x7, #14\n\t"
"ror x14, x11, #28\n\t" "ror x14, x11, #28\n\t"
"ror x13, x7, #18\n\t" "eor x12, x12, x7, ror 18\n\t"
"ror x15, x11, #34\n\t" "eor x14, x14, x11, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x7, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x11, ror 39\n\t"
"ror x13, x7, #41\n\t"
"ror x15, x11, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x10, x10, x12\n\t" "add x10, x10, x12\n\t"
"eor x16, x11, x4\n\t" "eor x16, x11, x4\n\t"
"eor x12, x8, x9\n\t" "eor x12, x8, x9\n\t"
@ -772,14 +704,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x6, #14\n\t" "ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t" "ror x14, x10, #28\n\t"
"ror x13, x6, #18\n\t" "eor x12, x12, x6, ror 18\n\t"
"ror x15, x10, #34\n\t" "eor x14, x14, x10, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x6, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x10, ror 39\n\t"
"ror x13, x6, #41\n\t"
"ror x15, x10, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x9, x9, x12\n\t" "add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t" "eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t" "eor x12, x7, x8\n\t"
@ -798,14 +726,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x5, #14\n\t" "ror x12, x5, #14\n\t"
"ror x14, x9, #28\n\t" "ror x14, x9, #28\n\t"
"ror x13, x5, #18\n\t" "eor x12, x12, x5, ror 18\n\t"
"ror x15, x9, #34\n\t" "eor x14, x14, x9, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x5, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x9, ror 39\n\t"
"ror x13, x5, #41\n\t"
"ror x15, x9, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x8, x8, x12\n\t" "add x8, x8, x12\n\t"
"eor x16, x9, x10\n\t" "eor x16, x9, x10\n\t"
"eor x12, x6, x7\n\t" "eor x12, x6, x7\n\t"
@ -824,14 +748,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x4, #14\n\t" "ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t" "ror x14, x8, #28\n\t"
"ror x13, x4, #18\n\t" "eor x12, x12, x4, ror 18\n\t"
"ror x15, x8, #34\n\t" "eor x14, x14, x8, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x4, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x8, ror 39\n\t"
"ror x13, x4, #41\n\t"
"ror x15, x8, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x7, x7, x12\n\t" "add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t" "eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t" "eor x12, x5, x6\n\t"
@ -850,14 +770,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x11, #14\n\t" "ror x12, x11, #14\n\t"
"ror x14, x7, #28\n\t" "ror x14, x7, #28\n\t"
"ror x13, x11, #18\n\t" "eor x12, x12, x11, ror 18\n\t"
"ror x15, x7, #34\n\t" "eor x14, x14, x7, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x11, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x7, ror 39\n\t"
"ror x13, x11, #41\n\t"
"ror x15, x7, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x6, x6, x12\n\t" "add x6, x6, x12\n\t"
"eor x16, x7, x8\n\t" "eor x16, x7, x8\n\t"
"eor x12, x4, x5\n\t" "eor x12, x4, x5\n\t"
@ -876,14 +792,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x10, #14\n\t" "ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t" "ror x14, x6, #28\n\t"
"ror x13, x10, #18\n\t" "eor x12, x12, x10, ror 18\n\t"
"ror x15, x6, #34\n\t" "eor x14, x14, x6, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x10, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x6, ror 39\n\t"
"ror x13, x10, #41\n\t"
"ror x15, x6, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x5, x5, x12\n\t" "add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t" "eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t" "eor x12, x11, x4\n\t"
@ -902,14 +814,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x9, #14\n\t" "ror x12, x9, #14\n\t"
"ror x14, x5, #28\n\t" "ror x14, x5, #28\n\t"
"ror x13, x9, #18\n\t" "eor x12, x12, x9, ror 18\n\t"
"ror x15, x5, #34\n\t" "eor x14, x14, x5, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x9, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x5, ror 39\n\t"
"ror x13, x9, #41\n\t"
"ror x15, x5, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x4, x4, x12\n\t" "add x4, x4, x12\n\t"
"eor x16, x5, x6\n\t" "eor x16, x5, x6\n\t"
"eor x12, x10, x11\n\t" "eor x12, x10, x11\n\t"
@ -928,14 +836,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x8, #14\n\t" "ror x12, x8, #14\n\t"
"ror x14, x4, #28\n\t" "ror x14, x4, #28\n\t"
"ror x13, x8, #18\n\t" "eor x12, x12, x8, ror 18\n\t"
"ror x15, x4, #34\n\t" "eor x14, x14, x4, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x8, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x4, ror 39\n\t"
"ror x13, x8, #41\n\t"
"ror x15, x4, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x11, x11, x12\n\t" "add x11, x11, x12\n\t"
"eor x17, x4, x5\n\t" "eor x17, x4, x5\n\t"
"eor x12, x9, x10\n\t" "eor x12, x9, x10\n\t"
@ -954,14 +858,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x7, #14\n\t" "ror x12, x7, #14\n\t"
"ror x14, x11, #28\n\t" "ror x14, x11, #28\n\t"
"ror x13, x7, #18\n\t" "eor x12, x12, x7, ror 18\n\t"
"ror x15, x11, #34\n\t" "eor x14, x14, x11, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x7, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x11, ror 39\n\t"
"ror x13, x7, #41\n\t"
"ror x15, x11, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x10, x10, x12\n\t" "add x10, x10, x12\n\t"
"eor x16, x11, x4\n\t" "eor x16, x11, x4\n\t"
"eor x12, x8, x9\n\t" "eor x12, x8, x9\n\t"
@ -980,14 +880,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x6, #14\n\t" "ror x12, x6, #14\n\t"
"ror x14, x10, #28\n\t" "ror x14, x10, #28\n\t"
"ror x13, x6, #18\n\t" "eor x12, x12, x6, ror 18\n\t"
"ror x15, x10, #34\n\t" "eor x14, x14, x10, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x6, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x10, ror 39\n\t"
"ror x13, x6, #41\n\t"
"ror x15, x10, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x9, x9, x12\n\t" "add x9, x9, x12\n\t"
"eor x17, x10, x11\n\t" "eor x17, x10, x11\n\t"
"eor x12, x7, x8\n\t" "eor x12, x7, x8\n\t"
@ -1006,14 +902,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x5, #14\n\t" "ror x12, x5, #14\n\t"
"ror x14, x9, #28\n\t" "ror x14, x9, #28\n\t"
"ror x13, x5, #18\n\t" "eor x12, x12, x5, ror 18\n\t"
"ror x15, x9, #34\n\t" "eor x14, x14, x9, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x5, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x9, ror 39\n\t"
"ror x13, x5, #41\n\t"
"ror x15, x9, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x8, x8, x12\n\t" "add x8, x8, x12\n\t"
"eor x16, x9, x10\n\t" "eor x16, x9, x10\n\t"
"eor x12, x6, x7\n\t" "eor x12, x6, x7\n\t"
@ -1032,14 +924,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x4, #14\n\t" "ror x12, x4, #14\n\t"
"ror x14, x8, #28\n\t" "ror x14, x8, #28\n\t"
"ror x13, x4, #18\n\t" "eor x12, x12, x4, ror 18\n\t"
"ror x15, x8, #34\n\t" "eor x14, x14, x8, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x4, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x8, ror 39\n\t"
"ror x13, x4, #41\n\t"
"ror x15, x8, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x7, x7, x12\n\t" "add x7, x7, x12\n\t"
"eor x17, x8, x9\n\t" "eor x17, x8, x9\n\t"
"eor x12, x5, x6\n\t" "eor x12, x5, x6\n\t"
@ -1058,14 +946,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x11, #14\n\t" "ror x12, x11, #14\n\t"
"ror x14, x7, #28\n\t" "ror x14, x7, #28\n\t"
"ror x13, x11, #18\n\t" "eor x12, x12, x11, ror 18\n\t"
"ror x15, x7, #34\n\t" "eor x14, x14, x7, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x11, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x7, ror 39\n\t"
"ror x13, x11, #41\n\t"
"ror x15, x7, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x6, x6, x12\n\t" "add x6, x6, x12\n\t"
"eor x16, x7, x8\n\t" "eor x16, x7, x8\n\t"
"eor x12, x4, x5\n\t" "eor x12, x4, x5\n\t"
@ -1084,14 +968,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x10, #14\n\t" "ror x12, x10, #14\n\t"
"ror x14, x6, #28\n\t" "ror x14, x6, #28\n\t"
"ror x13, x10, #18\n\t" "eor x12, x12, x10, ror 18\n\t"
"ror x15, x6, #34\n\t" "eor x14, x14, x6, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x10, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x6, ror 39\n\t"
"ror x13, x10, #41\n\t"
"ror x15, x6, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x5, x5, x12\n\t" "add x5, x5, x12\n\t"
"eor x17, x6, x7\n\t" "eor x17, x6, x7\n\t"
"eor x12, x11, x4\n\t" "eor x12, x11, x4\n\t"
@ -1110,14 +990,10 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"ldr x19, [x3], #8\n\t" "ldr x19, [x3], #8\n\t"
"ror x12, x9, #14\n\t" "ror x12, x9, #14\n\t"
"ror x14, x5, #28\n\t" "ror x14, x5, #28\n\t"
"ror x13, x9, #18\n\t" "eor x12, x12, x9, ror 18\n\t"
"ror x15, x5, #34\n\t" "eor x14, x14, x5, ror 34\n\t"
"eor x12, x13, x12\n\t" "eor x12, x12, x9, ror 41\n\t"
"eor x14, x15, x14\n\t" "eor x15, x14, x5, ror 39\n\t"
"ror x13, x9, #41\n\t"
"ror x15, x5, #39\n\t"
"eor x12, x13, x12\n\t"
"eor x15, x15, x14\n\t"
"add x4, x4, x12\n\t" "add x4, x4, x12\n\t"
"eor x16, x5, x6\n\t" "eor x16, x5, x6\n\t"
"eor x12, x10, x11\n\t" "eor x12, x10, x11\n\t"
@ -1140,17 +1016,17 @@ int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
"add x5, x5, x21\n\t" "add x5, x5, x21\n\t"
"add x4, x4, x20\n\t" "add x4, x4, x20\n\t"
"adr x3, %[L_SHA512_transform_neon_len_k]\n\t" "adr x3, %[L_SHA512_transform_neon_len_k]\n\t"
"subs %[len], %[len], #0x80\n\t" "subs %w[len], %w[len], #0x80\n\t"
"bne L_sha512_len_neon_begin_%=\n\t" "bne L_sha512_len_neon_begin_%=\n\t"
"stp x4, x5, [%[sha512]]\n\t" "stp x4, x5, [%x[sha512]]\n\t"
"stp x6, x7, [%[sha512], #16]\n\t" "stp x6, x7, [%x[sha512], #16]\n\t"
"stp x8, x9, [%[sha512], #32]\n\t" "stp x8, x9, [%x[sha512], #32]\n\t"
"stp x10, x11, [%[sha512], #48]\n\t" "stp x10, x11, [%x[sha512], #48]\n\t"
"eor x0, x0, x0\n\t" "eor x0, x0, x0\n\t"
"ldp x29, x30, [sp], #16\n\t" "ldp x29, x30, [sp], #16\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
: [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8) : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8)
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10" : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11"
); );
return (uint32_t)(size_t)sha512; return (uint32_t)(size_t)sha512;
} }