diff --git a/configure.ac b/configure.ac index 6a43bcc6b..6c7c01025 100644 --- a/configure.ac +++ b/configure.ac @@ -1083,8 +1083,14 @@ then *) case $host_cpu in *aarch64*) - # +crypto needed for hardware acceleration - AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto" + case $host_os in + *darwin*) + ;; + *) + # +crypto needed for hardware acceleration + AM_CPPFLAGS="$AM_CPPFLAGS -mcpu=generic+crypto" + ;; + esac # Include options.h AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN" diff --git a/wolfcrypt/src/port/arm/armv8-chacha.c b/wolfcrypt/src/port/arm/armv8-chacha.c index 93387cd81..05cb30b00 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha.c +++ b/wolfcrypt/src/port/arm/armv8-chacha.c @@ -92,6 +92,7 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter) XMEMCPY(temp, inIv, CHACHA_IV_BYTES); + ctx->left = 0; ctx->X[CHACHA_IV_BYTES+0] = counter; /* block counter */ ctx->X[CHACHA_IV_BYTES+1] = LITTLE32(temp[0]); /* fixed variable from nonce */ ctx->X[CHACHA_IV_BYTES+2] = LITTLE32(temp[1]); /* counter from nonce */ @@ -166,6 +167,7 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz) ctx->X[ 1] = constants[1]; ctx->X[ 2] = constants[2]; ctx->X[ 3] = constants[3]; + ctx->left = 0; return 0; } @@ -1673,7 +1675,7 @@ static WC_INLINE int wc_Chacha_encrypt_128(const word32 input[CHACHA_CHUNK_WORDS } static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, - byte* c, word32 bytes) + byte* c, word32 bytes, byte* over) { #ifdef CHACHA_TEST printf("Entering wc_Chacha_encrypt_64 with %d bytes\n", bytes); @@ -2154,6 +2156,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "B L_chacha20_arm64_64_done_%= \n\t" "\n" "L_chacha20_arm64_64_lt_64_%=: \n\t" + "ST1 {v0.4s-v3.4s}, [%[over]]\n\t" "CMP %[bytes], #32 \n\t" "BLT L_chacha20_arm64_64_lt_32_%= \n\t" "LD1 {v4.4S, v5.4S}, [%[m]], #32 \n\t" @@ -2199,7 +2202,8 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "L_chacha20_arm64_64_done_%=: \n\t" : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes64) : [L_chacha20_neon_rol8] "r" (L_chacha20_neon_rol8), - [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word) + [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word), + [over] "r" (over) : "memory", "x4", "x5", "x6", "x7", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); @@ -2719,6 +2723,7 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "B L_chacha20_arm32_64_done_%= \n\t" "\n" "L_chacha20_arm32_64_lt_64_%=: \n\t" + "VSTM %[over], {q0-q3} \n\t" /* XOR 32 bytes */ "CMP %[bytes], #32 \n\t" "BLT L_chacha20_arm32_64_lt_32_%= \n\t" @@ -2785,13 +2790,16 @@ static WC_INLINE void wc_Chacha_encrypt_64(const word32* input, const byte* m, "\n" "L_chacha20_arm32_64_done_%=: \n\t" : [input] "+r" (input), [m] "+r" (m), [c] "+r" (c), [bytes] "+r" (bytes) - : [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word) + : [L_chacha20_neon_inc_first_word] "r" (L_chacha20_neon_inc_first_word), + [over] "r" (over) : "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q14", "r12", "r14" ); #endif /* __aarch64__ */ } + + /** * Encrypt a stream of bytes */ @@ -2830,9 +2838,11 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, ctx->X[CHACHA_IV_BYTES] = PLUS(ctx->X[CHACHA_IV_BYTES], processed / CHACHA_CHUNK_BYTES); } if (bytes > 0) { - wc_Chacha_encrypt_64(ctx->X, m, c, bytes); + wc_Chacha_encrypt_64(ctx->X, m, c, bytes, (byte*)ctx->over); if (bytes > 64) ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]); + else + ctx->left = CHACHA_CHUNK_BYTES - bytes; ctx->X[CHACHA_IV_BYTES] = PLUSONE(ctx->X[CHACHA_IV_BYTES]); } } @@ -2846,6 +2856,26 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, if (ctx == NULL || output == NULL || input == NULL) return BAD_FUNC_ARG; + /* handle left overs */ + if (msglen > 0 && ctx->left > 0) { + byte* out; + word32 i; + + out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left; + for (i = 0; i < msglen && i < ctx->left; i++) { + output[i] = (byte)(input[i] ^ out[i]); + } + ctx->left -= i; + + msglen -= i; + output += i; + input += i; + } + + if (msglen == 0) { + return 0; + } + wc_Chacha_encrypt_bytes(ctx, input, output, msglen); return 0; diff --git a/wolfssl/wolfcrypt/chacha.h b/wolfssl/wolfcrypt/chacha.h index 42f83cb23..584cd845d 100644 --- a/wolfssl/wolfcrypt/chacha.h +++ b/wolfssl/wolfcrypt/chacha.h @@ -79,7 +79,7 @@ typedef struct ChaCha { byte extra[12]; #endif word32 left; /* number of bytes leftover */ -#ifdef USE_INTEL_CHACHA_SPEEDUP +#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM) word32 over[CHACHA_CHUNK_WORDS]; #endif } ChaCha;