SHA-256 Aarch64: fix alignments on loads and stores

Input buffer must be loaded with a byte alignment.
Fix other loads and stores to be consistent.
This commit is contained in:
Sean Parkinson
2024-02-02 12:35:39 +10:00
parent f9bf96d9ba
commit c8b0aac144

View File

@ -281,7 +281,7 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data,
"CBZ w8, 2f \n" "CBZ w8, 2f \n"
"#load in message and schedule updates \n" "#load in message and schedule updates \n"
"LD1 {v0.2d-v3.2d}, [%[dataIn]], #64 \n" "LD1 {v0.16b-v3.16b}, [%[dataIn]], #64 \n"
"MOV v14.16b, v12.16b \n" "MOV v14.16b, v12.16b \n"
"MOV v15.16b, v13.16b \n" "MOV v15.16b, v13.16b \n"
"REV32 v0.16b, v0.16b \n" "REV32 v0.16b, v0.16b \n"
@ -291,7 +291,7 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data,
"B 1b \n" /* do another block */ "B 1b \n" /* do another block */
"2:\n" "2:\n"
"STP q12, q13, %[out] \n" "ST1 {v12.2d-v13.2d}, %[out] \n"
: [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks), : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks),
"=r" (data), "=r" (k) "=r" (data), "=r" (k)
@ -378,7 +378,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"MOV v16.16b, v20.16b \n" "MOV v16.16b, v20.16b \n"
"MOV v17.16b, v21.16b \n" "MOV v17.16b, v21.16b \n"
"LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n"
"SHA256SU0 v4.4s, v1.4s \n" "SHA256SU0 v4.4s, v1.4s \n"
"ADD v0.4s, v0.4s, v22.4s \n" "ADD v0.4s, v0.4s, v22.4s \n"
"MOV v6.16b, v2.16b \n" "MOV v6.16b, v2.16b \n"
@ -411,7 +411,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"SHA256H q16, q17, v3.4s \n" "SHA256H q16, q17, v3.4s \n"
"SHA256H2 q17, q18, v3.4s \n" "SHA256H2 q17, q18, v3.4s \n"
"LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n"
"SHA256SU0 v8.4s, v5.4s \n" "SHA256SU0 v8.4s, v5.4s \n"
"ADD v4.4s, v4.4s, v22.4s \n" "ADD v4.4s, v4.4s, v22.4s \n"
"MOV v18.16b, v16.16b \n" "MOV v18.16b, v16.16b \n"
@ -444,7 +444,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"SHA256H q16, q17, v7.4s \n" "SHA256H q16, q17, v7.4s \n"
"SHA256H2 q17, q18, v7.4s \n" "SHA256H2 q17, q18, v7.4s \n"
"LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n"
"SHA256SU0 v12.4s, v9.4s \n" "SHA256SU0 v12.4s, v9.4s \n"
"ADD v8.4s, v8.4s, v22.4s \n" "ADD v8.4s, v8.4s, v22.4s \n"
"MOV v18.16b, v16.16b \n" "MOV v18.16b, v16.16b \n"
@ -475,7 +475,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"SHA256H q16, q17, v11.4s \n" "SHA256H q16, q17, v11.4s \n"
"SHA256H2 q17, q18, v11.4s \n" "SHA256H2 q17, q18, v11.4s \n"
"LD1 {v22.16b-v25.16b}, [%[k]] \n" "LD1 {v22.4s-v25.4s}, [%[k]] \n"
"ADD v12.4s, v12.4s, v22.4s \n" "ADD v12.4s, v12.4s, v22.4s \n"
"MOV v18.16b, v16.16b \n" "MOV v18.16b, v16.16b \n"
"SHA256H q16, q17, v12.4s \n" "SHA256H q16, q17, v12.4s \n"
@ -499,7 +499,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"#Add working vars back into digest state \n" "#Add working vars back into digest state \n"
"ADD v16.4s, v16.4s, v20.4s \n" "ADD v16.4s, v16.4s, v20.4s \n"
"ADD v17.4s, v17.4s, v21.4s \n" "ADD v17.4s, v17.4s, v21.4s \n"
"STP q16, q17, %[out] \n" "ST1 {v16.2d-v17.2d}, %[out] \n"
: [out] "=m" (sha256->digest), [k] "+r" (k) : [out] "=m" (sha256->digest), [k] "+r" (k)
: [digest] "m" (sha256->digest), : [digest] "m" (sha256->digest),
@ -549,7 +549,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"MOV v16.16b, v20.16b \n" "MOV v16.16b, v20.16b \n"
"MOV v17.16b, v21.16b \n" "MOV v17.16b, v21.16b \n"
"LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n"
"SHA256SU0 v4.4s, v1.4s \n" "SHA256SU0 v4.4s, v1.4s \n"
"ADD v0.4s, v0.4s, v22.4s \n" "ADD v0.4s, v0.4s, v22.4s \n"
"MOV v6.16b, v2.16b \n" "MOV v6.16b, v2.16b \n"
@ -582,7 +582,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"SHA256H q16, q17, v3.4s \n" "SHA256H q16, q17, v3.4s \n"
"SHA256H2 q17, q18, v3.4s \n" "SHA256H2 q17, q18, v3.4s \n"
"LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n"
"SHA256SU0 v8.4s, v5.4s \n" "SHA256SU0 v8.4s, v5.4s \n"
"ADD v4.4s, v4.4s, v22.4s \n" "ADD v4.4s, v4.4s, v22.4s \n"
"MOV v18.16b, v16.16b \n" "MOV v18.16b, v16.16b \n"
@ -615,7 +615,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"SHA256H q16, q17, v7.4s \n" "SHA256H q16, q17, v7.4s \n"
"SHA256H2 q17, q18, v7.4s \n" "SHA256H2 q17, q18, v7.4s \n"
"LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n"
"SHA256SU0 v12.4s, v9.4s \n" "SHA256SU0 v12.4s, v9.4s \n"
"ADD v8.4s, v8.4s, v22.4s \n" "ADD v8.4s, v8.4s, v22.4s \n"
"MOV v18.16b, v16.16b \n" "MOV v18.16b, v16.16b \n"
@ -646,7 +646,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash)
"SHA256H q16, q17, v11.4s \n" "SHA256H q16, q17, v11.4s \n"
"SHA256H2 q17, q18, v11.4s \n" "SHA256H2 q17, q18, v11.4s \n"
"LD1 {v22.16b-v25.16b}, [%[k]] \n" "LD1 {v22.4s-v25.4s}, [%[k]] \n"
"ADD v12.4s, v12.4s, v22.4s \n" "ADD v12.4s, v12.4s, v22.4s \n"
"MOV v18.16b, v16.16b \n" "MOV v18.16b, v16.16b \n"
"SHA256H q16, q17, v12.4s \n" "SHA256H q16, q17, v12.4s \n"