diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index c95334219..e59bd2571 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -476,6 +476,8 @@ int wc_InitAes_h(Aes* aes, void* h) /* do as many block size ops as possible */ if (numBlocks > 0) { + word32* key = aes->key; + word32* reg = aes->reg; /* AESE exor's input with round key shift rows of exor'ed result @@ -487,10 +489,10 @@ int wc_InitAes_h(Aes* aes, void* h) case 10: /* AES 128 BLOCK */ __asm__ __volatile__ ( "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" - "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" - "LD1 {v9.2d-v11.2d},%[Key], #48 \n" - "LD1 {v0.2d}, %[reg] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n" + "LD1 {v0.2d}, [%[reg]] \n" "LD1 {v12.2d}, [%[input]], #16 \n" "1:\n" @@ -525,11 +527,11 @@ int wc_InitAes_h(Aes* aes, void* h) "2:\n" "#store current counter value at the end \n" - "ST1 {v0.2d}, %[regOut] \n" + "ST1 {v0.2d}, [%[regOut]] \n" - :[out] "=r" (out), [regOut] "=m" (aes->reg) - :"0" (out), [Key] "m" (aes->key), [input] "r" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" ); @@ -584,8 +586,8 @@ int wc_InitAes_h(Aes* aes, void* h) "ST1 {v0.2d}, %[regOut] \n" - :[out] "=r" (out), [regOut] "=m" (aes->reg) - :"0" (out), [Key] "m" (aes->key), [input] "r" (in), + :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in) + :"0" (out), [Key] "m" (aes->key), [input] "2" (in), [blocks] "r" (numBlocks), [reg] "m" (aes->reg) : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" @@ -646,8 +648,8 @@ int wc_InitAes_h(Aes* aes, void* h) "ST1 {v0.2d}, %[regOut] \n" - :[out] "=r" (out), [regOut] "=m" (aes->reg) - :"0" (out), [Key] "m" (aes->key), [input] "r" (in), + :[out] "=r" (out), [regOut] "=m" (aes->reg), "=r" (in) + :"0" (out), [Key] "m" (aes->key), [input] "2" (in), [blocks] "r" (numBlocks), [reg] "m" (aes->reg) : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", @@ -675,14 +677,17 @@ int wc_InitAes_h(Aes* aes, void* h) /* do as many block size ops as possible */ if (numBlocks > 0) { + word32* key = aes->key; + word32* reg = aes->reg; + switch(aes->rounds) { case 10: /* AES 128 BLOCK */ __asm__ __volatile__ ( "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" - "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" - "LD1 {v9.2d-v11.2d},%[Key], #48 \n" - "LD1 {v13.2d}, %[reg] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v11.2d},[%[Key]], #48 \n" + "LD1 {v13.2d}, [%[reg]] \n" "1:\n" "LD1 {v0.2d}, [%[input]], #16 \n" @@ -718,11 +723,11 @@ int wc_InitAes_h(Aes* aes, void* h) "2: \n" "#store current counter value at the end \n" - "ST1 {v13.2d}, %[regOut] \n" + "ST1 {v13.2d}, [%[regOut]] \n" - :[out] "=r" (out), [regOut] "=m" (aes->reg) - :"0" (out), [Key] "m" (aes->key), [input] "r" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13" ); @@ -731,11 +736,11 @@ int wc_InitAes_h(Aes* aes, void* h) case 12: /* AES 192 BLOCK */ __asm__ __volatile__ ( "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" - "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" - "LD1 {v9.2d-v12.2d},%[Key], #64 \n" - "LD1 {v13.16b}, %[Key], #16 \n" - "LD1 {v15.2d}, %[reg] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v12.2d},[%[Key]], #64 \n" + "LD1 {v13.16b}, [%[Key]], #16 \n" + "LD1 {v15.2d}, [%[reg]] \n" "LD1 {v0.2d}, [%[input]], #16 \n" "1: \n" @@ -776,11 +781,11 @@ int wc_InitAes_h(Aes* aes, void* h) "2:\n" "#store current counter value at the end \n" - "ST1 {v15.2d}, %[regOut] \n" + "ST1 {v15.2d}, [%[regOut]] \n" - :[out] "=r" (out), [regOut] "=m" (aes->reg) - :"0" (out), [Key] "m" (aes->key), [input] "r" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15" ); @@ -789,11 +794,11 @@ int wc_InitAes_h(Aes* aes, void* h) case 14: /* AES 256 BLOCK */ __asm__ __volatile__ ( "MOV w11, %w[blocks] \n" - "LD1 {v1.2d-v4.2d}, %[Key], #64 \n" - "LD1 {v5.2d-v8.2d}, %[Key], #64 \n" - "LD1 {v9.2d-v12.2d}, %[Key], #64 \n" - "LD1 {v13.2d-v15.2d}, %[Key], #48 \n" - "LD1 {v17.2d}, %[reg] \n" + "LD1 {v1.2d-v4.2d}, [%[Key]], #64 \n" + "LD1 {v5.2d-v8.2d}, [%[Key]], #64 \n" + "LD1 {v9.2d-v12.2d}, [%[Key]], #64 \n" + "LD1 {v13.2d-v15.2d}, [%[Key]], #48 \n" + "LD1 {v17.2d}, [%[reg]] \n" "LD1 {v0.2d}, [%[input]], #16 \n" "1: \n" @@ -838,11 +843,11 @@ int wc_InitAes_h(Aes* aes, void* h) "2:\n" "#store current counter value at the end \n" - "ST1 {v17.2d}, %[regOut] \n" + "ST1 {v17.2d}, [%[regOut]] \n" - :[out] "=r" (out), [regOut] "=m" (aes->reg) - :"0" (out), [Key] "m" (aes->key), [input] "r" (in), - [blocks] "r" (numBlocks), [reg] "m" (aes->reg) + :[out] "=r" (out), [regOut] "=r" (reg), "=r" (in) + :"0" (out), [Key] "r" (key), [input] "2" (in), + [blocks] "r" (numBlocks), [reg] "1" (reg) : "cc", "memory", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14","v15", "v16", "v17" @@ -2039,7 +2044,8 @@ static int Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, ,[inX] "4" (xPt), [inY] "m" (aes->H) : "cc", "w11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14" - ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24" + ,"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24","v25","v26","v27","v28","v29","v30","v31" ); } @@ -2473,7 +2479,8 @@ static int Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, [ctr] "2" (iCtr) , [h] "m" (aes->H) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10","v11","v12","v13","v14", - "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23","v24" + "v15", "v16", "v17","v18", "v19", "v20","v21","v22","v23", + "v24","v25","v26","v27","v28","v29","v30","v31" ); @@ -4061,7 +4068,8 @@ static void GMULT(byte* X, byte* Y) : [xOut] "=r" (X), [yOut] "=r" (Y) : [x] "0" (X), [y] "1" (Y) - : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6" ,"q7", "q8", + "q9", "q10", "q11" ,"q12", "q13", "q14", "q15" ); } @@ -4597,7 +4605,7 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) "ST1 {v0.16b}, [%[out]] \n" : [out] "=r" (pt) : [h] "0" (pt) - : "cc", "memory" + : "cc", "memory", "v0" ); } #else @@ -4610,7 +4618,7 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) "VST1.32 {q0}, [%[out]] \n" : [out] "=r" (pt) : [h] "0" (pt) - : "cc", "memory" + : "cc", "memory", "q0" ); } #endif diff --git a/wolfcrypt/src/port/arm/armv8-sha256.c b/wolfcrypt/src/port/arm/armv8-sha256.c index cceb8c865..fdf2634bf 100644 --- a/wolfcrypt/src/port/arm/armv8-sha256.c +++ b/wolfcrypt/src/port/arm/armv8-sha256.c @@ -133,6 +133,8 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len) numBlocks = (len + sha256->buffLen)/SHA256_BLOCK_SIZE; if (numBlocks > 0) { + word32* k = (word32*)K; + /* get leftover amount after blocks */ add = (len + sha256->buffLen) - numBlocks * SHA256_BLOCK_SIZE; __asm__ volatile ( @@ -300,12 +302,14 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len) "STP q12, q13, %[out] \n" : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks), - "=r" (data) - : [k] "r" (K), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), + "=r" (data), "=r" (k) + : [k] "4" (k), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), [blocks] "2" (numBlocks), [dataIn] "3" (data) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", - "v15", "w8" + "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25", "v26", "v27", "v28", + "v29", "v30", "v31", "w8" ); AddLength(sha256, SHA256_BLOCK_SIZE * numBlocks); @@ -488,6 +492,7 @@ int wc_Sha256Final(Sha256* sha256, byte* hash) [buffer] "m" (sha256->buffer) : "cc", "memory", "v0", "v1", "v2", "v3", "v8", "v9", "v10", "v11" , "v12", "v13", "v14", "v15", "v16", "v17", "v18" + , "v19", "v20", "v21", "v22", "v23", "v24", "v25" ); sha256->buffLen = 0; @@ -510,7 +515,7 @@ int wc_Sha256Final(Sha256* sha256, byte* hash) "ST1 {v0.2d-v3.2d}, %[out] \n" : [out] "=m" (sha256->buffer) : [in] "m" (sha256->buffer) - : "cc", "memory" + : "cc", "memory", "v0", "v1", "v2", "v3" ); #endif /* ! length ordering dependent on digest endian type ! */ @@ -666,7 +671,8 @@ int wc_Sha256Final(Sha256* sha256, byte* hash) "0" (hash) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", - "v15", "v16", "v17", "v18" + "v15", "v16", "v17", "v18", "v19", "v20", "v21", + "v22", "v23", "v24", "v25" ); return wc_InitSha256(sha256); /* reset state */ @@ -1119,7 +1125,7 @@ int wc_Sha256Final(Sha256* sha256, byte* hash) "VST1.32 {q3}, [%[out]] \n" : [out] "=r" (bufPt) : [in] "0" (bufPt) - : "cc", "memory" + : "cc", "memory", "q0", "q1", "q2", "q3" ); #endif /* ! length ordering dependent on digest endian type ! */