From c8b0aac144821f8707598aa95fbc8291154a338c Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Fri, 2 Feb 2024 12:35:39 +1000 Subject: [PATCH] SHA-256 Aarch64: fix alignments on loads and stores Input buffer must be loaded with a byte alignment. Fix other loads and stores to be consistent. --- wolfcrypt/src/port/arm/armv8-sha256.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index 2fa9adfb3..e65e2104e 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -281,7 +281,7 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, "CBZ w8, 2f \n" "#load in message and schedule updates \n" - "LD1 {v0.2d-v3.2d}, [%[dataIn]], #64 \n" + "LD1 {v0.16b-v3.16b}, [%[dataIn]], #64 \n" "MOV v14.16b, v12.16b \n" "MOV v15.16b, v13.16b \n" "REV32 v0.16b, v0.16b \n" @@ -291,7 +291,7 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data, "B 1b \n" /* do another block */ "2:\n" - "STP q12, q13, %[out] \n" + "ST1 {v12.2d-v13.2d}, %[out] \n" : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks), "=r" (data), "=r" (k) @@ -378,7 +378,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "MOV v16.16b, v20.16b \n" "MOV v17.16b, v21.16b \n" - "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n" "SHA256SU0 v4.4s, v1.4s \n" "ADD v0.4s, v0.4s, v22.4s \n" "MOV v6.16b, v2.16b \n" @@ -411,7 +411,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "SHA256H q16, q17, v3.4s \n" "SHA256H2 q17, q18, v3.4s \n" - "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n" "SHA256SU0 v8.4s, v5.4s \n" "ADD v4.4s, v4.4s, v22.4s \n" "MOV v18.16b, v16.16b \n" @@ -444,7 +444,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "SHA256H q16, q17, v7.4s \n" "SHA256H2 q17, q18, v7.4s \n" - "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n" "SHA256SU0 v12.4s, v9.4s \n" "ADD v8.4s, v8.4s, v22.4s \n" "MOV v18.16b, v16.16b \n" @@ -475,7 +475,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "SHA256H q16, q17, v11.4s \n" "SHA256H2 q17, q18, v11.4s \n" - "LD1 {v22.16b-v25.16b}, [%[k]] \n" + "LD1 {v22.4s-v25.4s}, [%[k]] \n" "ADD v12.4s, v12.4s, v22.4s \n" "MOV v18.16b, v16.16b \n" "SHA256H q16, q17, v12.4s \n" @@ -499,7 +499,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "#Add working vars back into digest state \n" "ADD v16.4s, v16.4s, v20.4s \n" "ADD v17.4s, v17.4s, v21.4s \n" - "STP q16, q17, %[out] \n" + "ST1 {v16.2d-v17.2d}, %[out] \n" : [out] "=m" (sha256->digest), [k] "+r" (k) : [digest] "m" (sha256->digest), @@ -549,7 +549,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "MOV v16.16b, v20.16b \n" "MOV v17.16b, v21.16b \n" - "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n" "SHA256SU0 v4.4s, v1.4s \n" "ADD v0.4s, v0.4s, v22.4s \n" "MOV v6.16b, v2.16b \n" @@ -582,7 +582,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "SHA256H q16, q17, v3.4s \n" "SHA256H2 q17, q18, v3.4s \n" - "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n" "SHA256SU0 v8.4s, v5.4s \n" "ADD v4.4s, v4.4s, v22.4s \n" "MOV v18.16b, v16.16b \n" @@ -615,7 +615,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "SHA256H q16, q17, v7.4s \n" "SHA256H2 q17, q18, v7.4s \n" - "LD1 {v22.16b-v25.16b}, [%[k]], #64 \n" + "LD1 {v22.4s-v25.4s}, [%[k]], #64 \n" "SHA256SU0 v12.4s, v9.4s \n" "ADD v8.4s, v8.4s, v22.4s \n" "MOV v18.16b, v16.16b \n" @@ -646,7 +646,7 @@ static WC_INLINE int Sha256Final(wc_Sha256* sha256, byte* hash) "SHA256H q16, q17, v11.4s \n" "SHA256H2 q17, q18, v11.4s \n" - "LD1 {v22.16b-v25.16b}, [%[k]] \n" + "LD1 {v22.4s-v25.4s}, [%[k]] \n" "ADD v12.4s, v12.4s, v22.4s \n" "MOV v18.16b, v16.16b \n" "SHA256H q16, q17, v12.4s \n"