forked from wolfSSL/wolfssl
Merge pull request #3253 from SparkiDev/chacha20_stream_fix
ChaCha20: Enable streaming with Intel x86_64 asm
This commit is contained in:
@ -265,13 +265,14 @@ static void wc_Chacha_encrypt_bytes(ChaCha* ctx, const byte* m, byte* c,
|
|||||||
for (i = 0; i < bytes && i < ctx->left; i++) {
|
for (i = 0; i < bytes && i < ctx->left; i++) {
|
||||||
c[i] = (byte)(m[i] ^ output[i]);
|
c[i] = (byte)(m[i] ^ output[i]);
|
||||||
}
|
}
|
||||||
ctx->left = ctx->left - i;
|
ctx->left -= i;
|
||||||
|
|
||||||
/* Used up all of the stream that was left, increment the counter */
|
/* Used up all of the stream that was left, increment the counter */
|
||||||
if (ctx->left == 0) {
|
if (ctx->left == 0) {
|
||||||
ctx->X[CHACHA_MATRIX_CNT_IV] = PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
|
ctx->X[CHACHA_MATRIX_CNT_IV] =
|
||||||
|
PLUSONE(ctx->X[CHACHA_MATRIX_CNT_IV]);
|
||||||
}
|
}
|
||||||
bytes = bytes - i;
|
bytes -= i;
|
||||||
c += i;
|
c += i;
|
||||||
m += i;
|
m += i;
|
||||||
}
|
}
|
||||||
@ -311,6 +312,26 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
|
|||||||
return BAD_FUNC_ARG;
|
return BAD_FUNC_ARG;
|
||||||
|
|
||||||
#ifdef USE_INTEL_CHACHA_SPEEDUP
|
#ifdef USE_INTEL_CHACHA_SPEEDUP
|
||||||
|
/* handle left overs */
|
||||||
|
if (msglen > 0 && ctx->left > 0) {
|
||||||
|
byte* out;
|
||||||
|
word32 i;
|
||||||
|
|
||||||
|
out = (byte*)ctx->over + CHACHA_CHUNK_BYTES - ctx->left;
|
||||||
|
for (i = 0; i < msglen && i < ctx->left; i++) {
|
||||||
|
output[i] = (byte)(input[i] ^ out[i]);
|
||||||
|
}
|
||||||
|
ctx->left -= i;
|
||||||
|
|
||||||
|
msglen -= i;
|
||||||
|
output += i;
|
||||||
|
input += i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msglen == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!cpuidFlagsSet) {
|
if (!cpuidFlagsSet) {
|
||||||
cpuidFlags = cpuid_get_flags();
|
cpuidFlags = cpuid_get_flags();
|
||||||
cpuidFlagsSet = 1;
|
cpuidFlagsSet = 1;
|
||||||
|
@ -391,18 +391,19 @@ L_chacha_x64_partial_crypt_start:
|
|||||||
addl 52(%rdi), %r13d
|
addl 52(%rdi), %r13d
|
||||||
addl 56(%rdi), %r14d
|
addl 56(%rdi), %r14d
|
||||||
addl 60(%rdi), %r15d
|
addl 60(%rdi), %r15d
|
||||||
movl %eax, 48(%rsp)
|
leaq 80(%rdi), %rbp
|
||||||
movl %ebx, 52(%rsp)
|
movl %eax, (%rbp)
|
||||||
movl %ecx, 56(%rsp)
|
movl %ebx, 4(%rbp)
|
||||||
movl %edx, 60(%rsp)
|
movl %ecx, 8(%rbp)
|
||||||
movl %r8d, 64(%rsp)
|
movl %edx, 12(%rbp)
|
||||||
movl %r9d, 68(%rsp)
|
movl %r8d, 16(%rbp)
|
||||||
movl %r10d, 72(%rsp)
|
movl %r9d, 20(%rbp)
|
||||||
movl %r11d, 76(%rsp)
|
movl %r10d, 24(%rbp)
|
||||||
movl %r12d, 96(%rsp)
|
movl %r11d, 28(%rbp)
|
||||||
movl %r13d, 100(%rsp)
|
movl %r12d, 48(%rbp)
|
||||||
movl %r14d, 104(%rsp)
|
movl %r13d, 52(%rbp)
|
||||||
movl %r15d, 108(%rsp)
|
movl %r14d, 56(%rbp)
|
||||||
|
movl %r15d, 60(%rbp)
|
||||||
movl 8(%rsp), %eax
|
movl 8(%rsp), %eax
|
||||||
movl 12(%rsp), %ebx
|
movl 12(%rsp), %ebx
|
||||||
movl 16(%rsp), %ecx
|
movl 16(%rsp), %ecx
|
||||||
@ -411,10 +412,10 @@ L_chacha_x64_partial_crypt_start:
|
|||||||
addl 36(%rdi), %ebx
|
addl 36(%rdi), %ebx
|
||||||
addl 40(%rdi), %ecx
|
addl 40(%rdi), %ecx
|
||||||
addl 44(%rdi), %edx
|
addl 44(%rdi), %edx
|
||||||
movl %eax, 80(%rsp)
|
movl %eax, 32(%rbp)
|
||||||
movl %ebx, 84(%rsp)
|
movl %ebx, 36(%rbp)
|
||||||
movl %ecx, 88(%rsp)
|
movl %ecx, 40(%rbp)
|
||||||
movl %edx, 92(%rsp)
|
movl %edx, 44(%rbp)
|
||||||
movq 24(%rsp), %rdx
|
movq 24(%rsp), %rdx
|
||||||
movq 40(%rsp), %rcx
|
movq 40(%rsp), %rcx
|
||||||
addl $0x01, 48(%rdi)
|
addl $0x01, 48(%rdi)
|
||||||
@ -424,7 +425,7 @@ L_chacha_x64_partial_crypt_start:
|
|||||||
andl $7, %r8d
|
andl $7, %r8d
|
||||||
jz L_chacha_x64_partial_start64
|
jz L_chacha_x64_partial_start64
|
||||||
L_chacha_x64_partial_start8:
|
L_chacha_x64_partial_start8:
|
||||||
movzbl (%rsp,%rbx,1), %eax
|
movzbl (%rbp,%rbx,1), %eax
|
||||||
xorb (%rsi,%rbx,1), %al
|
xorb (%rsi,%rbx,1), %al
|
||||||
movb %al, (%rdx,%rbx,1)
|
movb %al, (%rdx,%rbx,1)
|
||||||
incl %ebx
|
incl %ebx
|
||||||
@ -432,13 +433,16 @@ L_chacha_x64_partial_start8:
|
|||||||
jne L_chacha_x64_partial_start8
|
jne L_chacha_x64_partial_start8
|
||||||
je L_chacha_x64_partial_end64
|
je L_chacha_x64_partial_end64
|
||||||
L_chacha_x64_partial_start64:
|
L_chacha_x64_partial_start64:
|
||||||
movq (%rsp,%rbx,1), %rax
|
movq (%rbp,%rbx,1), %rax
|
||||||
xorq (%rsi,%rbx,1), %rax
|
xorq (%rsi,%rbx,1), %rax
|
||||||
movq %rax, (%rdx,%rbx,1)
|
movq %rax, (%rdx,%rbx,1)
|
||||||
addl $8, %ebx
|
addl $8, %ebx
|
||||||
L_chacha_x64_partial_end64:
|
L_chacha_x64_partial_end64:
|
||||||
cmpl %ecx, %ebx
|
cmpl %ecx, %ebx
|
||||||
jne L_chacha_x64_partial_start64
|
jne L_chacha_x64_partial_start64
|
||||||
|
movl $0x40, %ecx
|
||||||
|
subl %ebx, %ecx
|
||||||
|
movl %ecx, 76(%rdi)
|
||||||
L_chacha_x64_done:
|
L_chacha_x64_done:
|
||||||
addq $0x40, %rsp
|
addq $0x40, %rsp
|
||||||
popq %r15
|
popq %r15
|
||||||
@ -915,6 +919,7 @@ L_chacha20_avx1_block_crypt_start:
|
|||||||
L_chacha20_avx1_block_done:
|
L_chacha20_avx1_block_done:
|
||||||
cmpl $0x00, %ecx
|
cmpl $0x00, %ecx
|
||||||
je L_chacha20_avx1_partial_done
|
je L_chacha20_avx1_partial_done
|
||||||
|
leaq 80(%rdi), %r10
|
||||||
vmovdqu (%rdi), %xmm0
|
vmovdqu (%rdi), %xmm0
|
||||||
vmovdqu 16(%rdi), %xmm1
|
vmovdqu 16(%rdi), %xmm1
|
||||||
vmovdqu 32(%rdi), %xmm2
|
vmovdqu 32(%rdi), %xmm2
|
||||||
@ -994,6 +999,9 @@ L_chacha20_avx1_partial_start64:
|
|||||||
L_chacha20_avx1_partial_end64:
|
L_chacha20_avx1_partial_end64:
|
||||||
cmpl %ecx, %r11d
|
cmpl %ecx, %r11d
|
||||||
jne L_chacha20_avx1_partial_start64
|
jne L_chacha20_avx1_partial_start64
|
||||||
|
movl $0x40, %r8d
|
||||||
|
subl %r11d, %r8d
|
||||||
|
movl %r8d, 76(%rdi)
|
||||||
L_chacha20_avx1_partial_done:
|
L_chacha20_avx1_partial_done:
|
||||||
addq $0x190, %rsp
|
addq $0x190, %rsp
|
||||||
repz retq
|
repz retq
|
||||||
|
@ -4811,6 +4811,48 @@ int chacha_test(void)
|
|||||||
return -4524-i;
|
return -4524-i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Streaming test */
|
||||||
|
for (i = 1; i <= (int)CHACHA_CHUNK_BYTES; i++) {
|
||||||
|
int j, rem;
|
||||||
|
|
||||||
|
ret = wc_Chacha_SetKey(&enc, keys[0], keySz);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4550;
|
||||||
|
ret = wc_Chacha_SetKey(&dec, keys[0], keySz);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4551;
|
||||||
|
|
||||||
|
ret = wc_Chacha_SetIV(&enc, ivs[2], 0);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4552;
|
||||||
|
ret = wc_Chacha_SetIV(&dec, ivs[2], 0);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4553;
|
||||||
|
|
||||||
|
for (j = 0; j < CHACHA_BIG_TEST_SIZE - i; j+= i) {
|
||||||
|
ret = wc_Chacha_Process(&enc, cipher_big + j, plain_big + j, i);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4554;
|
||||||
|
ret = wc_Chacha_Process(&dec, plain_big + j, cipher_big + j, i);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4555;
|
||||||
|
}
|
||||||
|
|
||||||
|
rem = CHACHA_BIG_TEST_SIZE - j;
|
||||||
|
ret = wc_Chacha_Process(&enc, cipher_big + j, plain_big + j, rem);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4556;
|
||||||
|
ret = wc_Chacha_Process(&dec, plain_big + j, cipher_big + j, rem);
|
||||||
|
if (ret != 0)
|
||||||
|
return -4557;
|
||||||
|
|
||||||
|
if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE))
|
||||||
|
return -4558;
|
||||||
|
|
||||||
|
if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE))
|
||||||
|
return -4559;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef WOLFSSL_SMALL_STACK
|
#ifdef WOLFSSL_SMALL_STACK
|
||||||
XFREE(cipher_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
XFREE(cipher_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||||
XFREE(plain_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
XFREE(plain_big, NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||||
|
@ -74,10 +74,13 @@ enum {
|
|||||||
|
|
||||||
typedef struct ChaCha {
|
typedef struct ChaCha {
|
||||||
word32 X[CHACHA_CHUNK_WORDS]; /* state of cipher */
|
word32 X[CHACHA_CHUNK_WORDS]; /* state of cipher */
|
||||||
word32 left; /* number of bytes leftover */
|
|
||||||
#ifdef HAVE_INTEL_AVX1
|
#ifdef HAVE_INTEL_AVX1
|
||||||
/* vpshufd reads 16 bytes but we only use bottom 4. */
|
/* vpshufd reads 16 bytes but we only use bottom 4. */
|
||||||
byte extra[12];
|
byte extra[12];
|
||||||
|
#endif
|
||||||
|
word32 left; /* number of bytes leftover */
|
||||||
|
#ifdef USE_INTEL_CHACHA_SPEEDUP
|
||||||
|
word32 over[CHACHA_CHUNK_WORDS];
|
||||||
#endif
|
#endif
|
||||||
} ChaCha;
|
} ChaCha;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user