diff --git a/wolfcrypt/src/chacha.c b/wolfcrypt/src/chacha.c index 9c39ddd2a..a7ca6ef95 100644 --- a/wolfcrypt/src/chacha.c +++ b/wolfcrypt/src/chacha.c @@ -265,13 +265,14 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c, for (i = 0; i < bytes && i < ctx->left; i++) { c[i] = (byte)(m[i] ^ output[i]); } - ctx->left = ctx->left - i; + ctx->left -= i; /* Used up all of the stream that was left, increment the counter */ if (ctx->left == 0) { - ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]); + ctx->X[CHACHA_MATRIX_CNT_IV] = + PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]); } - bytes = bytes - i; + bytes -= i; c += i; m += i; } @@ -311,6 +312,26 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input, return BAD_FUNC_ARG; #ifdef USE_INTEL_CHACHA_SPEEDUP + /* handle left overs */ + if (msglen > 0 && ctx->left > 0) { + byte* out; + word32 i; + + out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left; + for (i = 0; i < msglen && i < ctx->left; i++) { + output[i] = (byte)(input[i] ^ out[i]); + } + ctx->left -= i; + + msglen -= i; + output += i; + input += i; + } + + if (msglen == 0) { + return 0; + } + if (!cpuidFlagsSet) { cpuidFlags = cpuid_get_flags(); cpuidFlagsSet = 1; diff --git a/wolfcrypt/src/chacha_asm.S b/wolfcrypt/src/chacha_asm.S index 729446220..b58c6184e 100644 --- a/wolfcrypt/src/chacha_asm.S +++ b/wolfcrypt/src/chacha_asm.S @@ -391,18 +391,19 @@ L_chacha_x64_partial_crypt_start: addl 52(%rdi), %r13d addl 56(%rdi), %r14d addl 60(%rdi), %r15d - movl %eax, 48(%rsp) - movl %ebx, 52(%rsp) - movl %ecx, 56(%rsp) - movl %edx, 60(%rsp) - movl %r8d, 64(%rsp) - movl %r9d, 68(%rsp) - movl %r10d, 72(%rsp) - movl %r11d, 76(%rsp) - movl %r12d, 96(%rsp) - movl %r13d, 100(%rsp) - movl %r14d, 104(%rsp) - movl %r15d, 108(%rsp) + leaq 80(%rdi), %rbp + movl %eax, (%rbp) + movl %ebx, 4(%rbp) + movl %ecx, 8(%rbp) + movl %edx, 12(%rbp) + movl %r8d, 16(%rbp) + movl %r9d, 20(%rbp) + movl %r10d, 24(%rbp) + movl %r11d, 28(%rbp) + movl %r12d, 48(%rbp) + movl %r13d, 52(%rbp) + movl %r14d, 56(%rbp) + movl %r15d, 60(%rbp) movl 8(%rsp), %eax movl 12(%rsp), %ebx movl 16(%rsp), %ecx @@ -411,10 +412,10 @@ L_chacha_x64_partial_crypt_start: addl 36(%rdi), %ebx addl 40(%rdi), %ecx addl 44(%rdi), %edx - movl %eax, 80(%rsp) - movl %ebx, 84(%rsp) - movl %ecx, 88(%rsp) - movl %edx, 92(%rsp) + movl %eax, 32(%rbp) + movl %ebx, 36(%rbp) + movl %ecx, 40(%rbp) + movl %edx, 44(%rbp) movq 24(%rsp), %rdx movq 40(%rsp), %rcx addl $0x01, 48(%rdi) @@ -424,7 +425,7 @@ L_chacha_x64_partial_crypt_start: andl $7, %r8d jz L_chacha_x64_partial_start64 L_chacha_x64_partial_start8: - movzbl (%rsp,%rbx,1), %eax + movzbl (%rbp,%rbx,1), %eax xorb (%rsi,%rbx,1), %al movb %al, (%rdx,%rbx,1) incl %ebx @@ -432,13 +433,16 @@ L_chacha_x64_partial_start8: jne L_chacha_x64_partial_start8 je L_chacha_x64_partial_end64 L_chacha_x64_partial_start64: - movq (%rsp,%rbx,1), %rax + movq (%rbp,%rbx,1), %rax xorq (%rsi,%rbx,1), %rax movq %rax, (%rdx,%rbx,1) addl $8, %ebx L_chacha_x64_partial_end64: cmpl %ecx, %ebx jne L_chacha_x64_partial_start64 + movl $0x40, %ecx + subl %ebx, %ecx + movl %ecx, 76(%rdi) L_chacha_x64_done: addq $0x40, %rsp popq %r15 @@ -915,6 +919,7 @@ L_chacha20_avx1_block_crypt_start: L_chacha20_avx1_block_done: cmpl $0x00, %ecx je L_chacha20_avx1_partial_done + leaq 80(%rdi), %r10 vmovdqu (%rdi), %xmm0 vmovdqu 16(%rdi), %xmm1 vmovdqu 32(%rdi), %xmm2 @@ -994,6 +999,9 @@ L_chacha20_avx1_partial_start64: L_chacha20_avx1_partial_end64: cmpl %ecx, %r11d jne L_chacha20_avx1_partial_start64 + movl $0x40, %r8d + subl %r11d, %r8d + movl %r8d, 76(%rdi) L_chacha20_avx1_partial_done: addq $0x190, %rsp repz retq diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index b4d3e605a..b12fb0992 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -4811,6 +4811,48 @@ int chacha_test(void) return -4524-i; } + /* Streaming test */ + for (i = 1; i <= (int)CHACHA_CHUNK_BYTES; i++) { + int j, rem; + + ret = wc_Chacha_SetKey(&enc, keys[0], keySz); + if (ret != 0) + return -4550; + ret = wc_Chacha_SetKey(&dec, keys[0], keySz); + if (ret != 0) + return -4551; + + ret = wc_Chacha_SetIV(&enc, ivs[2], 0); + if (ret != 0) + return -4552; + ret = wc_Chacha_SetIV(&dec, ivs[2], 0); + if (ret != 0) + return -4553; + + for (j = 0; j < CHACHA_BIG_TEST_SIZE - i; j+= i) { + ret = wc_Chacha_Process(&enc, cipher_big + j, plain_big + j, i); + if (ret != 0) + return -4554; + ret = wc_Chacha_Process(&dec, plain_big + j, cipher_big + j, i); + if (ret != 0) + return -4555; + } + + rem = CHACHA_BIG_TEST_SIZE - j; + ret = wc_Chacha_Process(&enc, cipher_big + j, plain_big + j, rem); + if (ret != 0) + return -4556; + ret = wc_Chacha_Process(&dec, plain_big + j, cipher_big + j, rem); + if (ret != 0) + return -4557; + + if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE)) + return -4558; + + if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE)) + return -4559; + } + #ifdef WOLFSSL_SMALL_STACK XFREE(cipher_big, NULL, DYNAMIC_TYPE_TMP_BUFFER); XFREE(plain_big, NULL, DYNAMIC_TYPE_TMP_BUFFER); diff --git a/wolfssl/wolfcrypt/chacha.h b/wolfssl/wolfcrypt/chacha.h index 82bdd7a65..42f83cb23 100644 --- a/wolfssl/wolfcrypt/chacha.h +++ b/wolfssl/wolfcrypt/chacha.h @@ -74,10 +74,13 @@ enum { typedef struct ChaCha { word32 X[CHACHA_CHUNK_WORDS]; /* state of cipher */ - word32 left; /* number of bytes leftover */ #ifdef HAVE_INTEL_AVX1 /* vpshufd reads 16 bytes but we only use bottom 4. */ byte extra[12]; +#endif + word32 left; /* number of bytes leftover */ +#ifdef USE_INTEL_CHACHA_SPEEDUP + word32 over[CHACHA_CHUNK_WORDS]; #endif } ChaCha;