Merge pull request #593 from JacobBarthelmeh/ARMv8

ARMv8 : sanity checks
This commit is contained in:
toddouska
2016-10-12 11:23:27 -07:00
committed by GitHub
3 changed files with 395 additions and 356 deletions

View File

@ -120,7 +120,6 @@
#include <sys/syscall.h> #include <sys/syscall.h>
#include <unistd.h> #include <unistd.h>
#define HAVE_GET_CYCLES
static word64 begin_cycles; static word64 begin_cycles;
static word64 total_cycles; static word64 total_cycles;
static int cycles = -1; static int cycles = -1;
@ -2534,7 +2533,7 @@ void bench_ed25519KeySign(void)
#endif /* _WIN32 */ #endif /* _WIN32 */
#ifdef HAVE_GET_CYCLES #if defined(HAVE_GET_CYCLES)
static INLINE word64 get_intel_cycles(void) static INLINE word64 get_intel_cycles(void)
{ {

View File

@ -2511,10 +2511,10 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
{ {
/* sanity checks */ /* sanity checks */
if (aes == NULL || (iv == NULL && ivSz > 0) || if (aes == NULL || (iv == NULL && ivSz > 0) ||
(authTag == NULL && authTagSz > 0) || (authTag == NULL) ||
(authIn == NULL && authInSz > 0) || (authIn == NULL) ||
(in == NULL && sz > 0) || (in == NULL && sz > 0) ||
(out == NULL && authTag == NULL)) { (out == NULL && sz > 0)) {
WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
return BAD_FUNC_ARG; return BAD_FUNC_ARG;
} }
@ -2571,10 +2571,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
/* sanity checks */ /* sanity checks */
if (aes == NULL || (iv == NULL && ivSz > 0) || if (aes == NULL || (iv == NULL && ivSz > 0) ||
(authTag == NULL && authTagSz > 0) || (authTag == NULL) ||
(authIn == NULL && authInSz > 0) || (authIn == NULL) ||
(in == NULL && sz > 0) || (in == NULL && sz > 0) ||
(out == NULL && authTag == NULL)) { (out == NULL && sz > 0)) {
WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
return BAD_FUNC_ARG; return BAD_FUNC_ARG;
} }
@ -2827,6 +2827,12 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
if (partial != 0) { if (partial != 0) {
IncrementGcmCounter(ctr); IncrementGcmCounter(ctr);
wc_AesEncrypt(aes, ctr, scratch); wc_AesEncrypt(aes, ctr, scratch);
/* check if pointer is null after main AES-GCM blocks
* helps static analysis */
if (p == NULL || c == NULL) {
return BAD_STATE_E;
}
xorbuf(scratch, c, partial); xorbuf(scratch, c, partial);
XMEMCPY(p, scratch, partial); XMEMCPY(p, scratch, partial);
} }
@ -4157,10 +4163,10 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
/* sanity checks */ /* sanity checks */
if (aes == NULL || (iv == NULL && ivSz > 0) || if (aes == NULL || (iv == NULL && ivSz > 0) ||
(authTag == NULL && authTagSz > 0) || (authTag == NULL) ||
(authIn == NULL && authInSz > 0) || (authIn == NULL) ||
(in == NULL && sz > 0) || (in == NULL && sz > 0) ||
(out == NULL && authTag == NULL)) { (out == NULL && sz > 0)) {
WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0"); WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
return BAD_FUNC_ARG; return BAD_FUNC_ARG;
} }
@ -4234,6 +4240,16 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
byte scratch[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE];
ctr = counter ; ctr = counter ;
/* sanity checks */
if (aes == NULL || (iv == NULL && ivSz > 0) ||
(authTag == NULL) ||
(authIn == NULL) ||
(in == NULL && sz > 0) ||
(out == NULL && sz > 0)) {
WOLFSSL_MSG("a NULL parameter passed in when size is larger than 0");
return BAD_FUNC_ARG;
}
XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); XMEMSET(initialCounter, 0, AES_BLOCK_SIZE);
if (ivSz == NONCE_SZ) { if (ivSz == NONCE_SZ) {
XMEMCPY(initialCounter, iv, ivSz); XMEMCPY(initialCounter, iv, ivSz);
@ -4270,6 +4286,12 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
if (partial != 0) { if (partial != 0) {
IncrementGcmCounter(ctr); IncrementGcmCounter(ctr);
wc_AesEncrypt(aes, ctr, scratch); wc_AesEncrypt(aes, ctr, scratch);
/* check if pointer is null after main AES-GCM blocks
* helps static analysis */
if (p == NULL || c == NULL) {
return BAD_STATE_E;
}
xorbuf(scratch, c, partial); xorbuf(scratch, c, partial);
XMEMCPY(p, scratch, partial); XMEMCPY(p, scratch, partial);
} }

View File

@ -80,6 +80,10 @@ int wc_InitSha256(Sha256* sha256)
{ {
int ret = 0; int ret = 0;
if (sha256 == NULL) {
return BAD_FUNC_ARG;
}
sha256->digest[0] = 0x6A09E667L; sha256->digest[0] = 0x6A09E667L;
sha256->digest[1] = 0xBB67AE85L; sha256->digest[1] = 0xBB67AE85L;
sha256->digest[2] = 0x3C6EF372L; sha256->digest[2] = 0x3C6EF372L;
@ -116,199 +120,206 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
return BAD_FUNC_ARG; return BAD_FUNC_ARG;
} }
/* fill leftover buffer with data */ /* only perform actions if a buffer is passed in */
add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen); if (len > 0) {
XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); /* fill leftover buffer with data */
sha256->buffLen += add; add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
data += add; XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add);
len -= add; sha256->buffLen += add;
data += add;
len -= add;
/* number of blocks in a row to complete */ /* number of blocks in a row to complete */
numBlocks = (len + sha256->buffLen)/SHA256_BLOCK_SIZE; numBlocks = (len + sha256->buffLen)/SHA256_BLOCK_SIZE;
if (numBlocks > 0) { if (numBlocks > 0) {
/* get leftover amount after blocks */ /* get leftover amount after blocks */
add = (len + sha256->buffLen) - numBlocks * SHA256_BLOCK_SIZE; add = (len + sha256->buffLen) - numBlocks * SHA256_BLOCK_SIZE;
__asm__ volatile ( __asm__ volatile (
"#load leftover data\n" "#load leftover data\n"
"LD1 {v0.2d-v3.2d}, %[buffer] \n" "LD1 {v0.2d-v3.2d}, %[buffer] \n"
"#load current digest\n" "#load current digest\n"
"LD1 {v12.2d-v13.2d}, %[digest] \n" "LD1 {v12.2d-v13.2d}, %[digest] \n"
"MOV w8, %w[blocks] \n" "MOV w8, %w[blocks] \n"
"REV32 v0.16b, v0.16b \n" "REV32 v0.16b, v0.16b \n"
"REV32 v1.16b, v1.16b \n" "REV32 v1.16b, v1.16b \n"
"REV32 v2.16b, v2.16b \n" "REV32 v2.16b, v2.16b \n"
"REV32 v3.16b, v3.16b \n" "REV32 v3.16b, v3.16b \n"
"#load K values in \n" "#load K values in \n"
"LD1 {v16.4s-v19.4s}, [%[k]], #64 \n" "LD1 {v16.4s-v19.4s}, [%[k]], #64 \n"
"LD1 {v20.4s-v23.4s}, [%[k]], #64 \n" "LD1 {v20.4s-v23.4s}, [%[k]], #64 \n"
"MOV v14.16b, v12.16b \n" /* store digest for add at the end */ "MOV v14.16b, v12.16b \n" /* store digest for add at the end */
"MOV v15.16b, v13.16b \n" "MOV v15.16b, v13.16b \n"
"LD1 {v24.4s-v27.4s}, [%[k]], #64 \n" "LD1 {v24.4s-v27.4s}, [%[k]], #64 \n"
"LD1 {v28.4s-v31.4s}, [%[k]], #64 \n" "LD1 {v28.4s-v31.4s}, [%[k]], #64 \n"
/* begining of SHA256 block operation */ /* begining of SHA256 block operation */
"sha256Start:\n" "sha256Start:\n"
/* Round 1 */ /* Round 1 */
"MOV v4.16b, v0.16b \n" "MOV v4.16b, v0.16b \n"
"ADD v0.4s, v0.4s, v16.4s \n" "ADD v0.4s, v0.4s, v16.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 2 */ /* Round 2 */
"SHA256SU0 v4.4s, v1.4s \n" "SHA256SU0 v4.4s, v1.4s \n"
"ADD v0.4s, v1.4s, v17.4s \n" "ADD v0.4s, v1.4s, v17.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v4.4s, v2.4s, v3.4s \n" "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 3 */ /* Round 3 */
"SHA256SU0 v1.4s, v2.4s \n" "SHA256SU0 v1.4s, v2.4s \n"
"ADD v0.4s, v2.4s, v18.4s \n" "ADD v0.4s, v2.4s, v18.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v1.4s, v3.4s, v4.4s \n" "SHA256SU1 v1.4s, v3.4s, v4.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 4 */ /* Round 4 */
"SHA256SU0 v2.4s, v3.4s \n" "SHA256SU0 v2.4s, v3.4s \n"
"ADD v0.4s, v3.4s, v19.4s \n" "ADD v0.4s, v3.4s, v19.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v2.4s, v4.4s, v1.4s \n" "SHA256SU1 v2.4s, v4.4s, v1.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 5 */ /* Round 5 */
"SHA256SU0 v3.4s, v4.4s \n" "SHA256SU0 v3.4s, v4.4s \n"
"ADD v0.4s, v4.4s, v20.4s \n" "ADD v0.4s, v4.4s, v20.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v3.4s, v1.4s, v2.4s \n" "SHA256SU1 v3.4s, v1.4s, v2.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 6 */ /* Round 6 */
"SHA256SU0 v4.4s, v1.4s \n" "SHA256SU0 v4.4s, v1.4s \n"
"ADD v0.4s, v1.4s, v21.4s \n" "ADD v0.4s, v1.4s, v21.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v4.4s, v2.4s, v3.4s \n" "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 7 */ /* Round 7 */
"SHA256SU0 v1.4s, v2.4s \n" "SHA256SU0 v1.4s, v2.4s \n"
"ADD v0.4s, v2.4s, v22.4s \n" "ADD v0.4s, v2.4s, v22.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v1.4s, v3.4s, v4.4s \n" "SHA256SU1 v1.4s, v3.4s, v4.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 8 */ /* Round 8 */
"SHA256SU0 v2.4s, v3.4s \n" "SHA256SU0 v2.4s, v3.4s \n"
"ADD v0.4s, v3.4s, v23.4s \n" "ADD v0.4s, v3.4s, v23.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v2.4s, v4.4s, v1.4s \n" "SHA256SU1 v2.4s, v4.4s, v1.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 9 */ /* Round 9 */
"SHA256SU0 v3.4s, v4.4s \n" "SHA256SU0 v3.4s, v4.4s \n"
"ADD v0.4s, v4.4s, v24.4s \n" "ADD v0.4s, v4.4s, v24.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v3.4s, v1.4s, v2.4s \n" "SHA256SU1 v3.4s, v1.4s, v2.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 10 */ /* Round 10 */
"SHA256SU0 v4.4s, v1.4s \n" "SHA256SU0 v4.4s, v1.4s \n"
"ADD v0.4s, v1.4s, v25.4s \n" "ADD v0.4s, v1.4s, v25.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v4.4s, v2.4s, v3.4s \n" "SHA256SU1 v4.4s, v2.4s, v3.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 11 */ /* Round 11 */
"SHA256SU0 v1.4s, v2.4s \n" "SHA256SU0 v1.4s, v2.4s \n"
"ADD v0.4s, v2.4s, v26.4s \n" "ADD v0.4s, v2.4s, v26.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v1.4s, v3.4s, v4.4s \n" "SHA256SU1 v1.4s, v3.4s, v4.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 12 */ /* Round 12 */
"SHA256SU0 v2.4s, v3.4s \n" "SHA256SU0 v2.4s, v3.4s \n"
"ADD v0.4s, v3.4s, v27.4s \n" "ADD v0.4s, v3.4s, v27.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v2.4s, v4.4s, v1.4s \n" "SHA256SU1 v2.4s, v4.4s, v1.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 13 */ /* Round 13 */
"SHA256SU0 v3.4s, v4.4s \n" "SHA256SU0 v3.4s, v4.4s \n"
"ADD v0.4s, v4.4s, v28.4s \n" "ADD v0.4s, v4.4s, v28.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256SU1 v3.4s, v1.4s, v2.4s \n" "SHA256SU1 v3.4s, v1.4s, v2.4s \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 14 */ /* Round 14 */
"ADD v0.4s, v1.4s, v29.4s \n" "ADD v0.4s, v1.4s, v29.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 15 */ /* Round 15 */
"ADD v0.4s, v2.4s, v30.4s \n" "ADD v0.4s, v2.4s, v30.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
/* Round 16 */ /* Round 16 */
"ADD v0.4s, v3.4s, v31.4s \n" "ADD v0.4s, v3.4s, v31.4s \n"
"MOV v11.16b, v12.16b \n" "MOV v11.16b, v12.16b \n"
"SHA256H q12, q13, v0.4s \n" "SHA256H q12, q13, v0.4s \n"
"SHA256H2 q13, q11, v0.4s \n" "SHA256H2 q13, q11, v0.4s \n"
"#Add working vars back into digest state \n" "#Add working vars back into digest state \n"
"SUB w8, w8, #1 \n" "SUB w8, w8, #1 \n"
"ADD v12.4s, v12.4s, v14.4s \n" "ADD v12.4s, v12.4s, v14.4s \n"
"ADD v13.4s, v13.4s, v15.4s \n" "ADD v13.4s, v13.4s, v15.4s \n"
"#check if more blocks should be done\n" "#check if more blocks should be done\n"
"CBZ w8, sha256End \n" "CBZ w8, sha256End \n"
"#load in message and schedual updates \n" "#load in message and schedual updates \n"
"LD1 {v0.2d-v3.2d}, [%[dataIn]], #64 \n" "LD1 {v0.2d-v3.2d}, [%[dataIn]], #64 \n"
"MOV v14.16b, v12.16b \n" "MOV v14.16b, v12.16b \n"
"MOV v15.16b, v13.16b \n" "MOV v15.16b, v13.16b \n"
"REV32 v0.16b, v0.16b \n" "REV32 v0.16b, v0.16b \n"
"REV32 v1.16b, v1.16b \n" "REV32 v1.16b, v1.16b \n"
"REV32 v2.16b, v2.16b \n" "REV32 v2.16b, v2.16b \n"
"REV32 v3.16b, v3.16b \n" "REV32 v3.16b, v3.16b \n"
"B sha256Start \n" /* do another block */ "B sha256Start \n" /* do another block */
"sha256End:\n" "sha256End:\n"
"STP q12, q13, %[out] \n" "STP q12, q13, %[out] \n"
: [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks), : [out] "=m" (sha256->digest), "=m" (sha256->buffer), "=r" (numBlocks),
"=r" (data) "=r" (data)
: [k] "r" (K), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer), : [k] "r" (K), [digest] "m" (sha256->digest), [buffer] "m" (sha256->buffer),
[blocks] "2" (numBlocks), [dataIn] "3" (data) [blocks] "2" (numBlocks), [dataIn] "3" (data)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v8", "v9", "v10", "v11", "v12", "v13", "v14",
"v15", "w8" "v15", "w8"
); );
AddLength(sha256, SHA256_BLOCK_SIZE * numBlocks); AddLength(sha256, SHA256_BLOCK_SIZE * numBlocks);
/* copy over any remaining data leftover */ /* copy over any remaining data leftover */
XMEMCPY(sha256->buffer, data, add); XMEMCPY(sha256->buffer, data, add);
sha256->buffLen = add; sha256->buffLen = add;
}
} }
/* account for possiblity of not used if len = 0 */
(void)add;
(void)numBlocks;
return 0; return 0;
} }
@ -672,217 +683,224 @@ int wc_Sha256Update(Sha256* sha256, const byte* data, word32 len)
return BAD_FUNC_ARG; return BAD_FUNC_ARG;
} }
/* fill leftover buffer with data */ /* only perform actions if a buffer is passed in */
add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen); if (len > 0) {
XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add); /* fill leftover buffer with data */
sha256->buffLen += add; add = min(len, SHA256_BLOCK_SIZE - sha256->buffLen);
data += add; XMEMCPY((byte*)(sha256->buffer) + sha256->buffLen, data, add);
len -= add; sha256->buffLen += add;
data += add;
len -= add;
/* number of blocks in a row to complete */ /* number of blocks in a row to complete */
numBlocks = (len + sha256->buffLen)/SHA256_BLOCK_SIZE; numBlocks = (len + sha256->buffLen)/SHA256_BLOCK_SIZE;
if (numBlocks > 0) { if (numBlocks > 0) {
word32* bufPt = sha256->buffer; word32* bufPt = sha256->buffer;
word32* digPt = sha256->digest; word32* digPt = sha256->digest;
/* get leftover amount after blocks */ /* get leftover amount after blocks */
add = (len + sha256->buffLen) - numBlocks * SHA256_BLOCK_SIZE; add = (len + sha256->buffLen) - numBlocks * SHA256_BLOCK_SIZE;
__asm__ volatile ( __asm__ volatile (
"#load leftover data\n" "#load leftover data\n"
"VLDM %[buffer]!, {q0-q3} \n" "VLDM %[buffer]!, {q0-q3} \n"
"#load current digest\n" "#load current digest\n"
"VLDM %[digest], {q12-q13} \n" "VLDM %[digest], {q12-q13} \n"
"MOV r8, %r[blocks] \n" "MOV r8, %r[blocks] \n"
"VREV32.8 q0, q0 \n" "VREV32.8 q0, q0 \n"
"VREV32.8 q1, q1 \n" "VREV32.8 q1, q1 \n"
"VREV32.8 q2, q2 \n" "VREV32.8 q2, q2 \n"
"VREV32.8 q3, q3 \n" "VREV32.8 q3, q3 \n"
"VLDM %[k]! ,{q5-q8} \n" "VLDM %[k]! ,{q5-q8} \n"
"VLDM %[k]! ,{q9}\n" "VLDM %[k]! ,{q9}\n"
"VMOV.32 q14, q12 \n" /* store digest for add at the end */ "VMOV.32 q14, q12 \n" /* store digest for add at the end */
"VMOV.32 q15, q13 \n" "VMOV.32 q15, q13 \n"
/* begining of SHA256 block operation */ /* begining of SHA256 block operation */
"sha256Start:\n" "sha256Start:\n"
/* Round 1 */ /* Round 1 */
"VMOV.32 q4, q0 \n" "VMOV.32 q4, q0 \n"
"VADD.i32 q0, q0, q5 \n" "VADD.i32 q0, q0, q5 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 2 */ /* Round 2 */
"SHA256SU0.32 q4, q1 \n" "SHA256SU0.32 q4, q1 \n"
"VADD.i32 q0, q1, q6 \n" "VADD.i32 q0, q1, q6 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q4, q2, q3 \n" "SHA256SU1.32 q4, q2, q3 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 3 */ /* Round 3 */
"SHA256SU0.32 q1, q2 \n" "SHA256SU0.32 q1, q2 \n"
"VADD.i32 q0, q2, q7 \n" "VADD.i32 q0, q2, q7 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q1, q3, q4 \n" "SHA256SU1.32 q1, q3, q4 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 4 */ /* Round 4 */
"SHA256SU0.32 q2, q3 \n" "SHA256SU0.32 q2, q3 \n"
"VADD.i32 q0, q3, q8 \n" "VADD.i32 q0, q3, q8 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q2, q4, q1 \n" "SHA256SU1.32 q2, q4, q1 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 5 */ /* Round 5 */
"SHA256SU0.32 q3, q4 \n" "SHA256SU0.32 q3, q4 \n"
"VADD.i32 q0, q4, q9 \n" "VADD.i32 q0, q4, q9 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q3, q1, q2 \n" "SHA256SU1.32 q3, q1, q2 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 6 */ /* Round 6 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q4, q1 \n" "SHA256SU0.32 q4, q1 \n"
"VADD.i32 q0, q1, q10 \n" "VADD.i32 q0, q1, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q4, q2, q3 \n" "SHA256SU1.32 q4, q2, q3 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 7 */ /* Round 7 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q1, q2 \n" "SHA256SU0.32 q1, q2 \n"
"VADD.i32 q0, q2, q10 \n" "VADD.i32 q0, q2, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q1, q3, q4 \n" "SHA256SU1.32 q1, q3, q4 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 8 */ /* Round 8 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q2, q3 \n" "SHA256SU0.32 q2, q3 \n"
"VADD.i32 q0, q3, q10 \n" "VADD.i32 q0, q3, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q2, q4, q1 \n" "SHA256SU1.32 q2, q4, q1 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 9 */ /* Round 9 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q3, q4 \n" "SHA256SU0.32 q3, q4 \n"
"VADD.i32 q0, q4, q10 \n" "VADD.i32 q0, q4, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q3, q1, q2 \n" "SHA256SU1.32 q3, q1, q2 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 10 */ /* Round 10 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q4, q1 \n" "SHA256SU0.32 q4, q1 \n"
"VADD.i32 q0, q1, q10 \n" "VADD.i32 q0, q1, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q4, q2, q3 \n" "SHA256SU1.32 q4, q2, q3 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 11 */ /* Round 11 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q1, q2 \n" "SHA256SU0.32 q1, q2 \n"
"VADD.i32 q0, q2, q10 \n" "VADD.i32 q0, q2, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q1, q3, q4 \n" "SHA256SU1.32 q1, q3, q4 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 12 */ /* Round 12 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q2, q3 \n" "SHA256SU0.32 q2, q3 \n"
"VADD.i32 q0, q3, q10 \n" "VADD.i32 q0, q3, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q2, q4, q1 \n" "SHA256SU1.32 q2, q4, q1 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 13 */ /* Round 13 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"SHA256SU0.32 q3, q4 \n" "SHA256SU0.32 q3, q4 \n"
"VADD.i32 q0, q4, q10 \n" "VADD.i32 q0, q4, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256SU1.32 q3, q1, q2 \n" "SHA256SU1.32 q3, q1, q2 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 14 */ /* Round 14 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"VADD.i32 q0, q1, q10 \n" "VADD.i32 q0, q1, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 15 */ /* Round 15 */
"VLD1.32 {q10}, [%[k]]! \n" "VLD1.32 {q10}, [%[k]]! \n"
"VADD.i32 q0, q2, q10 \n" "VADD.i32 q0, q2, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
/* Round 16 */ /* Round 16 */
"VLD1.32 {q10}, [%[k]] \n" "VLD1.32 {q10}, [%[k]] \n"
"SUB r8, r8, #1 \n" "SUB r8, r8, #1 \n"
"VADD.i32 q0, q3, q10 \n" "VADD.i32 q0, q3, q10 \n"
"VMOV.32 q11, q12 \n" "VMOV.32 q11, q12 \n"
"SHA256H.32 q12, q13, q0 \n" "SHA256H.32 q12, q13, q0 \n"
"SHA256H2.32 q13, q11, q0 \n" "SHA256H2.32 q13, q11, q0 \n"
"#Add working vars back into digest state \n" "#Add working vars back into digest state \n"
"VADD.i32 q12, q12, q14 \n" "VADD.i32 q12, q12, q14 \n"
"VADD.i32 q13, q13, q15 \n" "VADD.i32 q13, q13, q15 \n"
"#check if more blocks should be done\n" "#check if more blocks should be done\n"
"CMP r8, #0 \n" "CMP r8, #0 \n"
"BEQ sha256End \n" "BEQ sha256End \n"
"#load in message and schedual updates \n" "#load in message and schedual updates \n"
"VLD1.32 {q0}, [%[dataIn]]! \n" "VLD1.32 {q0}, [%[dataIn]]! \n"
"VLD1.32 {q1}, [%[dataIn]]! \n" "VLD1.32 {q1}, [%[dataIn]]! \n"
"VLD1.32 {q2}, [%[dataIn]]! \n" "VLD1.32 {q2}, [%[dataIn]]! \n"
"VLD1.32 {q3}, [%[dataIn]]! \n" "VLD1.32 {q3}, [%[dataIn]]! \n"
/* reset K pointer */ /* reset K pointer */
"SUB %[k], %[k], #160 \n" "SUB %[k], %[k], #160 \n"
"VREV32.8 q0, q0 \n" "VREV32.8 q0, q0 \n"
"VREV32.8 q1, q1 \n" "VREV32.8 q1, q1 \n"
"VREV32.8 q2, q2 \n" "VREV32.8 q2, q2 \n"
"VREV32.8 q3, q3 \n" "VREV32.8 q3, q3 \n"
"VMOV.32 q14, q12 \n" "VMOV.32 q14, q12 \n"
"VMOV.32 q15, q13 \n" "VMOV.32 q15, q13 \n"
"B sha256Start \n" /* do another block */ "B sha256Start \n" /* do another block */
"sha256End:\n" "sha256End:\n"
"VST1.32 {q12, q13}, [%[out]] \n" "VST1.32 {q12, q13}, [%[out]] \n"
: [out] "=r" (digPt), "=r" (bufPt), "=r" (numBlocks), : [out] "=r" (digPt), "=r" (bufPt), "=r" (numBlocks),
"=r" (data) "=r" (data)
: [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt), : [k] "r" (K), [digest] "0" (digPt), [buffer] "1" (bufPt),
[blocks] "2" (numBlocks), [dataIn] "3" (data) [blocks] "2" (numBlocks), [dataIn] "3" (data)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
"q15", "r8" "q15", "r8"
); );
AddLength(sha256, SHA256_BLOCK_SIZE * numBlocks); AddLength(sha256, SHA256_BLOCK_SIZE * numBlocks);
/* copy over any remaining data leftover */ /* copy over any remaining data leftover */
XMEMCPY(sha256->buffer, data, add); XMEMCPY(sha256->buffer, data, add);
sha256->buffLen = add; sha256->buffLen = add;
}
} }
/* account for possiblity of not used if len = 0 */
(void)add;
(void)numBlocks;
return 0; return 0;
} }