forked from wolfSSL/wolfssl
@@ -525,7 +525,41 @@ void bench_aes(int show)
|
||||
#endif
|
||||
|
||||
if (show) {
|
||||
printf("AES %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
|
||||
printf("AES enc %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
|
||||
blockType, total, persec);
|
||||
SHOW_INTEL_CYCLES
|
||||
printf("\n");
|
||||
}
|
||||
#ifdef HAVE_CAVIUM
|
||||
wc_AesFreeCavium(&enc);
|
||||
if (wc_AesInitCavium(&enc, CAVIUM_DEV_ID) != 0) {
|
||||
printf("aes init cavium failed\n");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = wc_AesSetKey(&enc, key, 16, iv, AES_DECRYPTION);
|
||||
if (ret != 0) {
|
||||
printf("AesSetKey failed, ret = %d\n", ret);
|
||||
return;
|
||||
}
|
||||
start = current_time(1);
|
||||
BEGIN_INTEL_CYCLES
|
||||
|
||||
for(i = 0; i < numBlocks; i++)
|
||||
wc_AesCbcDecrypt(&enc, plain, cipher, sizeof(plain));
|
||||
|
||||
END_INTEL_CYCLES
|
||||
total = current_time(0) - start;
|
||||
|
||||
persec = 1 / total * numBlocks;
|
||||
#ifdef BENCH_EMBEDDED
|
||||
/* since using kB, convert to MB/s */
|
||||
persec = persec / 1024;
|
||||
#endif
|
||||
|
||||
if (show) {
|
||||
printf("AES dec %d %s took %5.3f seconds, %8.3f MB/s", numBlocks,
|
||||
blockType, total, persec);
|
||||
SHOW_INTEL_CYCLES
|
||||
printf("\n");
|
||||
|
@@ -1094,10 +1094,22 @@ void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
|
||||
XASM_LINK("AES_CBC_encrypt");
|
||||
|
||||
#ifdef HAVE_AES_DECRYPT
|
||||
void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
|
||||
unsigned char* ivec, unsigned long length,
|
||||
const unsigned char* KS, int nr)
|
||||
XASM_LINK("AES_CBC_decrypt");
|
||||
#if defined(WOLFSSL_AESNI_BY4)
|
||||
void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out,
|
||||
unsigned char* ivec, unsigned long length,
|
||||
const unsigned char* KS, int nr)
|
||||
XASM_LINK("AES_CBC_decrypt_by4");
|
||||
#elif defined(WOLFSSL_AESNI_BY6)
|
||||
void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out,
|
||||
unsigned char* ivec, unsigned long length,
|
||||
const unsigned char* KS, int nr)
|
||||
XASM_LINK("AES_CBC_decrypt_by6");
|
||||
#else /* WOLFSSL_AESNI_BYx */
|
||||
void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out,
|
||||
unsigned char* ivec, unsigned long length,
|
||||
const unsigned char* KS, int nr)
|
||||
XASM_LINK("AES_CBC_decrypt_by8");
|
||||
#endif /* WOLFSSL_AESNI_BYx */
|
||||
#endif /* HAVE_AES_DECRYPT */
|
||||
#endif /* HAVE_AES_CBC */
|
||||
|
||||
@@ -2549,8 +2561,16 @@ int wc_AesSetIV(Aes* aes, const byte* iv)
|
||||
|
||||
/* if input and output same will overwrite input iv */
|
||||
XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
|
||||
AES_CBC_decrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
|
||||
#if defined(WOLFSSL_AESNI_BY4)
|
||||
AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
|
||||
aes->rounds);
|
||||
#elif defined(WOLFSSL_AESNI_BY6)
|
||||
AES_CBC_decrypt_by6(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
|
||||
aes->rounds);
|
||||
#else /* WOLFSSL_AESNI_BYx */
|
||||
AES_CBC_decrypt_by8(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
|
||||
aes->rounds);
|
||||
#endif /* WOLFSSL_AESNI_BYx */
|
||||
/* store iv for next call */
|
||||
XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
|
||||
return 0;
|
||||
|
@@ -101,220 +101,753 @@ LAST:
|
||||
AES_CBC_encrypt ENDP
|
||||
|
||||
|
||||
; void AES_CBC_decrypt_by4(const unsigned char* in,
|
||||
; unsigned char* out,
|
||||
; unsigned char ivec[16],
|
||||
; unsigned long length,
|
||||
; const unsigned char* KS,
|
||||
; int nr)
|
||||
AES_CBC_decrypt_by4 PROC
|
||||
; parameter 1: rdi
|
||||
; parameter 2: rsi
|
||||
; parameter 3: rdx
|
||||
; parameter 4: rcx
|
||||
; parameter 5: r8
|
||||
; parameter 6: r9d
|
||||
|
||||
; /*
|
||||
; AES_CBC_decrypt[const ,unsigned char*in
|
||||
; unsigned ,char*out
|
||||
; unsigned ,char ivec+16
|
||||
; unsigned ,long length
|
||||
; const ,unsigned char*KS
|
||||
; int nr]
|
||||
; */
|
||||
; . globl AES_CBC_decrypt
|
||||
AES_CBC_decrypt PROC
|
||||
;# parameter 1: rdi
|
||||
;# parameter 2: rsi
|
||||
;# parameter 3: rdx
|
||||
;# parameter 4: rcx
|
||||
;# parameter 5: r8
|
||||
;# parameter 6: r9d
|
||||
|
||||
; save rdi and rsi to rax and r11, restore before ret
|
||||
mov rax,rdi
|
||||
mov r11,rsi
|
||||
|
||||
; convert to what we had for att&t convention
|
||||
mov rdi,rcx
|
||||
mov rsi,rdx
|
||||
mov rdx,r8
|
||||
mov rcx,r9
|
||||
mov r8,[rsp+40]
|
||||
mov r9d,[rsp+48]
|
||||
|
||||
; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
|
||||
sub rsp,8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
|
||||
movdqa [rsp+0], xmm6
|
||||
movdqa [rsp+16], xmm7
|
||||
movdqa [rsp+32], xmm8
|
||||
movdqa [rsp+48], xmm9
|
||||
movdqa [rsp+64], xmm10
|
||||
movdqa [rsp+80], xmm11
|
||||
movdqa [rsp+96], xmm12
|
||||
movdqa [rsp+112], xmm15
|
||||
|
||||
mov r10,rcx
|
||||
shr rcx,4
|
||||
shl r10,60
|
||||
je DNO_PARTS_4
|
||||
add rcx,1
|
||||
; save rdi and rsi to rax and r11, restore before ret
|
||||
mov rax, rdi
|
||||
mov r11, rsi
|
||||
; convert to what we had for att&t convention
|
||||
mov rdi, rcx
|
||||
mov rsi, rdx
|
||||
mov rdx, r8
|
||||
mov rcx,r9
|
||||
mov r8, [rsp+40]
|
||||
mov r9d, [rsp+48]
|
||||
; on microsoft xmm6-xmm15 are non volatile,
|
||||
; let's save on stack and restore at end
|
||||
sub rsp, 8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
|
||||
movdqa [rsp+0], xmm6
|
||||
movdqa [rsp+16], xmm7
|
||||
movdqa [rsp+32], xmm8
|
||||
movdqa [rsp+48], xmm9
|
||||
movdqa [rsp+64], xmm10
|
||||
movdqa [rsp+80], xmm11
|
||||
movdqa [rsp+96], xmm12
|
||||
movdqa [rsp+112], xmm15
|
||||
; back to our original code, more or less
|
||||
mov r10, rcx
|
||||
shr rcx, 4
|
||||
shl r10, 60
|
||||
je DNO_PARTS_4
|
||||
add rcx, 1
|
||||
DNO_PARTS_4:
|
||||
mov r10,rcx
|
||||
shl r10,62
|
||||
shr r10,62
|
||||
shr rcx,2
|
||||
movdqu xmm5,[rdx]
|
||||
je DREMAINDER_4
|
||||
sub rsi,64
|
||||
mov r10, rcx
|
||||
shl r10, 62
|
||||
shr r10, 62
|
||||
shr rcx, 2
|
||||
movdqu xmm5, [rdx]
|
||||
je DREMAINDER_4
|
||||
sub rsi, 64
|
||||
DLOOP_4:
|
||||
movdqu xmm1,[rdi]
|
||||
movdqu xmm2,16[rdi]
|
||||
movdqu xmm3,32[rdi]
|
||||
movdqu xmm4,48[rdi]
|
||||
movdqa xmm6,xmm1
|
||||
movdqa xmm7,xmm2
|
||||
movdqa xmm8,xmm3
|
||||
movdqa xmm15,xmm4
|
||||
movdqa xmm9,[r8]
|
||||
movdqa xmm10,16[r8]
|
||||
movdqa xmm11,32[r8]
|
||||
movdqa xmm12,48[r8]
|
||||
pxor xmm1,xmm9
|
||||
pxor xmm2,xmm9
|
||||
pxor xmm3,xmm9
|
||||
|
||||
pxor xmm4,xmm9
|
||||
aesdec xmm1,xmm10
|
||||
aesdec xmm2,xmm10
|
||||
aesdec xmm3,xmm10
|
||||
aesdec xmm4,xmm10
|
||||
aesdec xmm1,xmm11
|
||||
aesdec xmm2,xmm11
|
||||
aesdec xmm3,xmm11
|
||||
aesdec xmm4,xmm11
|
||||
aesdec xmm1,xmm12
|
||||
aesdec xmm2,xmm12
|
||||
aesdec xmm3,xmm12
|
||||
aesdec xmm4,xmm12
|
||||
movdqa xmm9,64[r8]
|
||||
movdqa xmm10,80[r8]
|
||||
movdqa xmm11,96[r8]
|
||||
movdqa xmm12,112[r8]
|
||||
aesdec xmm1,xmm9
|
||||
aesdec xmm2,xmm9
|
||||
aesdec xmm3,xmm9
|
||||
aesdec xmm4,xmm9
|
||||
aesdec xmm1,xmm10
|
||||
aesdec xmm2,xmm10
|
||||
aesdec xmm3,xmm10
|
||||
aesdec xmm4,xmm10
|
||||
aesdec xmm1,xmm11
|
||||
aesdec xmm2,xmm11
|
||||
aesdec xmm3,xmm11
|
||||
aesdec xmm4,xmm11
|
||||
aesdec xmm1,xmm12
|
||||
aesdec xmm2,xmm12
|
||||
aesdec xmm3,xmm12
|
||||
aesdec xmm4,xmm12
|
||||
movdqa xmm9,128[r8]
|
||||
movdqa xmm10,144[r8]
|
||||
movdqa xmm11,160[r8]
|
||||
cmp r9d,12
|
||||
aesdec xmm1,xmm9
|
||||
aesdec xmm2,xmm9
|
||||
aesdec xmm3,xmm9
|
||||
aesdec xmm4,xmm9
|
||||
aesdec xmm1,xmm10
|
||||
aesdec xmm2,xmm10
|
||||
aesdec xmm3,xmm10
|
||||
aesdec xmm4,xmm10
|
||||
jb DLAST_4
|
||||
movdqa xmm9,160[r8]
|
||||
movdqa xmm10,176[r8]
|
||||
movdqa xmm11,192[r8]
|
||||
cmp r9d,14
|
||||
aesdec xmm1,xmm9
|
||||
aesdec xmm2,xmm9
|
||||
aesdec xmm3,xmm9
|
||||
aesdec xmm4,xmm9
|
||||
aesdec xmm1,xmm10
|
||||
aesdec xmm2,xmm10
|
||||
aesdec xmm3,xmm10
|
||||
aesdec xmm4,xmm10
|
||||
jb DLAST_4
|
||||
|
||||
movdqa xmm9,192[r8]
|
||||
movdqa xmm10,208[r8]
|
||||
movdqa xmm11,224[r8]
|
||||
aesdec xmm1,xmm9
|
||||
aesdec xmm2,xmm9
|
||||
aesdec xmm3,xmm9
|
||||
aesdec xmm4,xmm9
|
||||
aesdec xmm1,xmm10
|
||||
aesdec xmm2,xmm10
|
||||
aesdec xmm3,xmm10
|
||||
aesdec xmm4,xmm10
|
||||
movdqu xmm1, [rdi]
|
||||
movdqu xmm2, 16[rdi]
|
||||
movdqu xmm3, 32[rdi]
|
||||
movdqu xmm4, 48[rdi]
|
||||
movdqa xmm6, xmm1
|
||||
movdqa xmm7, xmm2
|
||||
movdqa xmm8, xmm3
|
||||
movdqa xmm15, xmm4
|
||||
movdqa xmm9, [r8]
|
||||
movdqa xmm10, 16[r8]
|
||||
movdqa xmm11, 32[r8]
|
||||
movdqa xmm12, 48[r8]
|
||||
pxor xmm1, xmm9
|
||||
pxor xmm2, xmm9
|
||||
pxor xmm3, xmm9
|
||||
pxor xmm4, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm1, xmm12
|
||||
aesdec xmm2, xmm12
|
||||
aesdec xmm3, xmm12
|
||||
aesdec xmm4, xmm12
|
||||
movdqa xmm9, 64[r8]
|
||||
movdqa xmm10, 80[r8]
|
||||
movdqa xmm11, 96[r8]
|
||||
movdqa xmm12, 112[r8]
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm1, xmm12
|
||||
aesdec xmm2, xmm12
|
||||
aesdec xmm3, xmm12
|
||||
aesdec xmm4, xmm12
|
||||
movdqa xmm9, 128[r8]
|
||||
movdqa xmm10, 144[r8]
|
||||
movdqa xmm11, 160[r8]
|
||||
cmp r9d, 12
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
jb DLAST_4
|
||||
movdqa xmm9, 160[r8]
|
||||
movdqa xmm10, 176[r8]
|
||||
movdqa xmm11, 192[r8]
|
||||
cmp r9d, 14
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
jb DLAST_4
|
||||
movdqa xmm9, 192[r8]
|
||||
movdqa xmm10, 208[r8]
|
||||
movdqa xmm11, 224[r8]
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
DLAST_4:
|
||||
add rdi,64
|
||||
add rsi,64
|
||||
dec rcx
|
||||
aesdeclast xmm1,xmm11
|
||||
aesdeclast xmm2,xmm11
|
||||
aesdeclast xmm3,xmm11
|
||||
aesdeclast xmm4,xmm11
|
||||
pxor xmm1,xmm5
|
||||
pxor xmm2,xmm6
|
||||
pxor xmm3,xmm7
|
||||
pxor xmm4,xmm8
|
||||
movdqu [rsi],xmm1
|
||||
movdqu 16[rsi],xmm2
|
||||
movdqu 32[rsi],xmm3
|
||||
movdqu 48[rsi],xmm4
|
||||
movdqa xmm5,xmm15
|
||||
jne DLOOP_4
|
||||
add rsi,64
|
||||
add rdi, 64
|
||||
add rsi, 64
|
||||
dec rcx
|
||||
aesdeclast xmm1, xmm11
|
||||
aesdeclast xmm2, xmm11
|
||||
aesdeclast xmm3, xmm11
|
||||
aesdeclast xmm4, xmm11
|
||||
pxor xmm1, xmm5
|
||||
pxor xmm2, xmm6
|
||||
pxor xmm3, xmm7
|
||||
pxor xmm4, xmm8
|
||||
movdqu [rsi], xmm1
|
||||
movdqu 16[rsi], xmm2
|
||||
movdqu 32[rsi], xmm3
|
||||
movdqu 48[rsi], xmm4
|
||||
movdqa xmm5, xmm15
|
||||
jne DLOOP_4
|
||||
add rsi, 64
|
||||
DREMAINDER_4:
|
||||
cmp r10,0
|
||||
je DEND_4
|
||||
cmp r10, 0
|
||||
je DEND_4
|
||||
DLOOP_4_2:
|
||||
movdqu xmm1,[rdi]
|
||||
movdqa xmm15,xmm1
|
||||
add rdi,16
|
||||
pxor xmm1,[r8]
|
||||
movdqu xmm2,160[r8]
|
||||
cmp r9d,12
|
||||
aesdec xmm1,16[r8]
|
||||
aesdec xmm1,32[r8]
|
||||
aesdec xmm1,48[r8]
|
||||
aesdec xmm1,64[r8]
|
||||
aesdec xmm1,80[r8]
|
||||
aesdec xmm1,96[r8]
|
||||
aesdec xmm1,112[r8]
|
||||
aesdec xmm1,128[r8]
|
||||
aesdec xmm1,144[r8]
|
||||
jb DLAST_4_2
|
||||
movdqu xmm2,192[r8]
|
||||
cmp r9d,14
|
||||
aesdec xmm1,160[r8]
|
||||
aesdec xmm1,176[r8]
|
||||
jb DLAST_4_2
|
||||
movdqu xmm2,224[r8]
|
||||
aesdec xmm1,192[r8]
|
||||
aesdec xmm1,208[r8]
|
||||
movdqu xmm1, [rdi]
|
||||
movdqa xmm15, xmm1
|
||||
add rdi, 16
|
||||
pxor xmm1, [r8]
|
||||
movdqu xmm2, 160[r8]
|
||||
cmp r9d, 12
|
||||
aesdec xmm1, 16[r8]
|
||||
aesdec xmm1, 32[r8]
|
||||
aesdec xmm1, 48[r8]
|
||||
aesdec xmm1, 64[r8]
|
||||
aesdec xmm1, 80[r8]
|
||||
aesdec xmm1, 96[r8]
|
||||
aesdec xmm1, 112[r8]
|
||||
aesdec xmm1, 128[r8]
|
||||
aesdec xmm1, 144[r8]
|
||||
jb DLAST_4_2
|
||||
movdqu xmm2, 192[r8]
|
||||
cmp r9d, 14
|
||||
aesdec xmm1, 160[r8]
|
||||
aesdec xmm1, 176[r8]
|
||||
jb DLAST_4_2
|
||||
movdqu xmm2, 224[r8]
|
||||
aesdec xmm1, 192[r8]
|
||||
aesdec xmm1, 208[r8]
|
||||
DLAST_4_2:
|
||||
aesdeclast xmm1,xmm2
|
||||
pxor xmm1,xmm5
|
||||
movdqa xmm5,xmm15
|
||||
movdqu [rsi],xmm1
|
||||
|
||||
add rsi,16
|
||||
dec r10
|
||||
jne DLOOP_4_2
|
||||
aesdeclast xmm1, xmm2
|
||||
pxor xmm1, xmm5
|
||||
movdqa xmm5, xmm15
|
||||
movdqu [rsi], xmm1
|
||||
add rsi, 16
|
||||
dec r10
|
||||
jne DLOOP_4_2
|
||||
DEND_4:
|
||||
; restore non volatile rdi,rsi
|
||||
mov rdi,rax
|
||||
mov rsi,r11
|
||||
; restore non volatile xmms from stack
|
||||
movdqa xmm6, [rsp+0]
|
||||
movdqa xmm7, [rsp+16]
|
||||
movdqa xmm8, [rsp+32]
|
||||
movdqa xmm9, [rsp+48]
|
||||
movdqa xmm10, [rsp+64]
|
||||
movdqa xmm11, [rsp+80]
|
||||
movdqa xmm12, [rsp+96]
|
||||
movdqa xmm15, [rsp+112]
|
||||
add rsp,8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
|
||||
ret
|
||||
AES_CBC_decrypt ENDP
|
||||
; restore non volatile rdi,rsi
|
||||
mov rdi, rax
|
||||
mov rsi, r11
|
||||
; restore non volatile xmms from stack
|
||||
movdqa xmm6, [rsp+0]
|
||||
movdqa xmm7, [rsp+16]
|
||||
movdqa xmm8, [rsp+32]
|
||||
movdqa xmm9, [rsp+48]
|
||||
movdqa xmm10, [rsp+64]
|
||||
movdqa xmm11, [rsp+80]
|
||||
movdqa xmm12, [rsp+96]
|
||||
movdqa xmm15, [rsp+112]
|
||||
add rsp, 8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
|
||||
ret
|
||||
AES_CBC_decrypt_by4 ENDP
|
||||
|
||||
|
||||
; void AES_CBC_decrypt_by6(const unsigned char *in,
|
||||
; unsigned char *out,
|
||||
; unsigned char ivec[16],
|
||||
; unsigned long length,
|
||||
; const unsigned char *KS,
|
||||
; int nr)
|
||||
AES_CBC_decrypt_by6 PROC
|
||||
; parameter 1: rdi - in
|
||||
; parameter 2: rsi - out
|
||||
; parameter 3: rdx - ivec
|
||||
; parameter 4: rcx - length
|
||||
; parameter 5: r8 - KS
|
||||
; parameter 6: r9d - nr
|
||||
|
||||
; save rdi and rsi to rax and r11, restore before ret
|
||||
mov rax, rdi
|
||||
mov r11, rsi
|
||||
; convert to what we had for att&t convention
|
||||
mov rdi, rcx
|
||||
mov rsi, rdx
|
||||
mov rdx, r8
|
||||
mov rcx, r9
|
||||
mov r8, [rsp+40]
|
||||
mov r9d, [rsp+48]
|
||||
; on microsoft xmm6-xmm15 are non volatile,
|
||||
; let's save on stack and restore at end
|
||||
sub rsp, 8+9*16 ; 8 = align stack , 9 xmm6-14 16 bytes each
|
||||
movdqa [rsp+0], xmm6
|
||||
movdqa [rsp+16], xmm7
|
||||
movdqa [rsp+32], xmm8
|
||||
movdqa [rsp+48], xmm9
|
||||
movdqa [rsp+64], xmm10
|
||||
movdqa [rsp+80], xmm11
|
||||
movdqa [rsp+96], xmm12
|
||||
movdqa [rsp+112], xmm13
|
||||
movdqa [rsp+128], xmm14
|
||||
; back to our original code, more or less
|
||||
mov r10, rcx
|
||||
shr rcx, 4
|
||||
shl r10, 60
|
||||
je DNO_PARTS_6
|
||||
add rcx, 1
|
||||
DNO_PARTS_6:
|
||||
mov r12, rax
|
||||
mov r13, rdx
|
||||
mov r14, rbx
|
||||
mov rdx, 0
|
||||
mov rax, rcx
|
||||
mov rbx, 6
|
||||
div rbx
|
||||
mov rcx, rax
|
||||
mov r10, rdx
|
||||
mov rax, r12
|
||||
mov rdx, r13
|
||||
mov rbx, r14
|
||||
cmp rcx, 0
|
||||
movdqu xmm7, [rdx]
|
||||
je DREMAINDER_6
|
||||
sub rsi, 96
|
||||
DLOOP_6:
|
||||
movdqu xmm1, [rdi]
|
||||
movdqu xmm2, 16[rdi]
|
||||
movdqu xmm3, 32[rdi]
|
||||
movdqu xmm4, 48[rdi]
|
||||
movdqu xmm5, 64[rdi]
|
||||
movdqu xmm6, 80[rdi]
|
||||
movdqa xmm8, [r8]
|
||||
movdqa xmm9, 16[r8]
|
||||
movdqa xmm10, 32[r8]
|
||||
movdqa xmm11, 48[r8]
|
||||
pxor xmm1, xmm8
|
||||
pxor xmm2, xmm8
|
||||
pxor xmm3, xmm8
|
||||
pxor xmm4, xmm8
|
||||
pxor xmm5, xmm8
|
||||
pxor xmm6, xmm8
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm5, xmm9
|
||||
aesdec xmm6, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm5, xmm10
|
||||
aesdec xmm6, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
movdqa xmm8, 64[r8]
|
||||
movdqa xmm9, 80[r8]
|
||||
movdqa xmm10, 96[r8]
|
||||
movdqa xmm11, 112[r8]
|
||||
aesdec xmm1, xmm8
|
||||
aesdec xmm2, xmm8
|
||||
aesdec xmm3, xmm8
|
||||
aesdec xmm4, xmm8
|
||||
aesdec xmm5, xmm8
|
||||
aesdec xmm6, xmm8
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm5, xmm9
|
||||
aesdec xmm6, xmm9
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm5, xmm10
|
||||
aesdec xmm6, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
movdqa xmm8, 128[r8]
|
||||
movdqa xmm9, 144[r8]
|
||||
movdqa xmm10, 160[r8]
|
||||
cmp r9d, 12
|
||||
aesdec xmm1, xmm8
|
||||
aesdec xmm2, xmm8
|
||||
aesdec xmm3, xmm8
|
||||
aesdec xmm4, xmm8
|
||||
aesdec xmm5, xmm8
|
||||
aesdec xmm6, xmm8
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm5, xmm9
|
||||
aesdec xmm6, xmm9
|
||||
jb DLAST_6
|
||||
movdqa xmm8, 160[r8]
|
||||
movdqa xmm9, 176[r8]
|
||||
movdqa xmm10, 192[r8]
|
||||
cmp r9d, 14
|
||||
aesdec xmm1, xmm8
|
||||
aesdec xmm2, xmm8
|
||||
aesdec xmm3, xmm8
|
||||
aesdec xmm4, xmm8
|
||||
aesdec xmm5, xmm8
|
||||
aesdec xmm6, xmm8
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm5, xmm9
|
||||
aesdec xmm6, xmm9
|
||||
jb DLAST_6
|
||||
movdqa xmm8, 192[r8]
|
||||
movdqa xmm9, 208[r8]
|
||||
movdqa xmm10, 224[r8]
|
||||
aesdec xmm1, xmm8
|
||||
aesdec xmm2, xmm8
|
||||
aesdec xmm3, xmm8
|
||||
aesdec xmm4, xmm8
|
||||
aesdec xmm5, xmm8
|
||||
aesdec xmm6, xmm8
|
||||
aesdec xmm1, xmm9
|
||||
aesdec xmm2, xmm9
|
||||
aesdec xmm3, xmm9
|
||||
aesdec xmm4, xmm9
|
||||
aesdec xmm5, xmm9
|
||||
aesdec xmm6, xmm9
|
||||
DLAST_6:
|
||||
add rsi, 96
|
||||
aesdeclast xmm1, xmm10
|
||||
aesdeclast xmm2, xmm10
|
||||
aesdeclast xmm3, xmm10
|
||||
aesdeclast xmm4, xmm10
|
||||
aesdeclast xmm5, xmm10
|
||||
aesdeclast xmm6, xmm10
|
||||
movdqu xmm8, [rdi]
|
||||
movdqu xmm9, 16[rdi]
|
||||
movdqu xmm10, 32[rdi]
|
||||
movdqu xmm11, 48[rdi]
|
||||
movdqu xmm12, 64[rdi]
|
||||
movdqu xmm13, 80[rdi]
|
||||
pxor xmm1, xmm7
|
||||
pxor xmm2, xmm8
|
||||
pxor xmm3, xmm9
|
||||
pxor xmm4, xmm10
|
||||
pxor xmm5, xmm11
|
||||
pxor xmm6, xmm12
|
||||
movdqu xmm7, xmm13
|
||||
movdqu [rsi], xmm1
|
||||
movdqu 16[rsi], xmm2
|
||||
movdqu 32[rsi], xmm3
|
||||
movdqu 48[rsi], xmm4
|
||||
movdqu 64[rsi], xmm5
|
||||
movdqu 80[rsi], xmm6
|
||||
add rdi, 96
|
||||
dec rcx
|
||||
jne DLOOP_6
|
||||
add rsi, 96
|
||||
DREMAINDER_6:
|
||||
cmp r10, 0
|
||||
je DEND_6
|
||||
DLOOP_6_2:
|
||||
movdqu xmm1, [rdi]
|
||||
movdqa xmm10, xmm1
|
||||
add rdi, 16
|
||||
pxor xmm1, [r8]
|
||||
movdqu xmm2, 160[r8]
|
||||
cmp r9d, 12
|
||||
aesdec xmm1, 16[r8]
|
||||
aesdec xmm1, 32[r8]
|
||||
aesdec xmm1, 48[r8]
|
||||
aesdec xmm1, 64[r8]
|
||||
aesdec xmm1, 80[r8]
|
||||
aesdec xmm1, 96[r8]
|
||||
aesdec xmm1, 112[r8]
|
||||
aesdec xmm1, 128[r8]
|
||||
aesdec xmm1, 144[r8]
|
||||
jb DLAST_6_2
|
||||
movdqu xmm2, 192[r8]
|
||||
cmp r9d, 14
|
||||
aesdec xmm1, 160[r8]
|
||||
aesdec xmm1, 176[r8]
|
||||
jb DLAST_6_2
|
||||
movdqu xmm2, 224[r8]
|
||||
aesdec xmm1, 192[r8]
|
||||
aesdec xmm1, 208[r8]
|
||||
DLAST_6_2:
|
||||
aesdeclast xmm1, xmm2
|
||||
pxor xmm1, xmm7
|
||||
movdqa xmm7, xmm10
|
||||
movdqu [rsi], xmm1
|
||||
add rsi, 16
|
||||
dec r10
|
||||
jne DLOOP_6_2
|
||||
DEND_6:
|
||||
; restore non volatile rdi,rsi
|
||||
mov rdi, rax
|
||||
mov rsi, r11
|
||||
; restore non volatile xmms from stack
|
||||
movdqa xmm6, [rsp+0]
|
||||
movdqa xmm7, [rsp+16]
|
||||
movdqa xmm8, [rsp+32]
|
||||
movdqa xmm9, [rsp+48]
|
||||
movdqa xmm10, [rsp+64]
|
||||
movdqa xmm11, [rsp+80]
|
||||
movdqa xmm12, [rsp+96]
|
||||
movdqa xmm13, [rsp+112]
|
||||
movdqa xmm14, [rsp+128]
|
||||
add rsp, 8+9*16 ; 8 = align stack , 9 xmm6-14 16 bytes each
|
||||
ret
|
||||
AES_CBC_decrypt_by6 ENDP
|
||||
|
||||
|
||||
; void AES_CBC_decrypt_by8(const unsigned char *in,
|
||||
; unsigned char *out,
|
||||
; unsigned char ivec[16],
|
||||
; unsigned long length,
|
||||
; const unsigned char *KS,
|
||||
; int nr)
|
||||
AES_CBC_decrypt_by8 PROC
|
||||
; parameter 1: rdi - in
|
||||
; parameter 2: rsi - out
|
||||
; parameter 3: rdx - ivec
|
||||
; parameter 4: rcx - length
|
||||
; parameter 5: r8 - KS
|
||||
; parameter 6: r9d - nr
|
||||
|
||||
; save rdi and rsi to rax and r11, restore before ret
|
||||
mov rax, rdi
|
||||
mov r11, rsi
|
||||
; convert to what we had for att&t convention
|
||||
mov rdi, rcx
|
||||
mov rsi, rdx
|
||||
mov rdx, r8
|
||||
mov rcx,r9
|
||||
mov r8, [rsp+40]
|
||||
mov r9d, [rsp+48]
|
||||
; on microsoft xmm6-xmm15 are non volatile,
|
||||
; let's save on stack and restore at end
|
||||
sub rsp, 8+8*16 ; 8 = align stack , 8 xmm6-13 16 bytes each
|
||||
movdqa [rsp+0], xmm6
|
||||
movdqa [rsp+16], xmm7
|
||||
movdqa [rsp+32], xmm8
|
||||
movdqa [rsp+48], xmm9
|
||||
movdqa [rsp+64], xmm10
|
||||
movdqa [rsp+80], xmm11
|
||||
movdqa [rsp+96], xmm12
|
||||
movdqa [rsp+112], xmm13
|
||||
; back to our original code, more or less
|
||||
mov r10, rcx
|
||||
shr rcx, 4
|
||||
shl r10, 60
|
||||
je DNO_PARTS_8
|
||||
add rcx, 1
|
||||
DNO_PARTS_8:
|
||||
mov r10, rcx
|
||||
shl r10, 61
|
||||
shr r10, 61
|
||||
shr rcx, 3
|
||||
movdqu xmm9, [rdx]
|
||||
je DREMAINDER_8
|
||||
sub rsi, 128
|
||||
DLOOP_8:
|
||||
movdqu xmm1, [rdi]
|
||||
movdqu xmm2, 16[rdi]
|
||||
movdqu xmm3, 32[rdi]
|
||||
movdqu xmm4, 48[rdi]
|
||||
movdqu xmm5, 64[rdi]
|
||||
movdqu xmm6, 80[rdi]
|
||||
movdqu xmm7, 96[rdi]
|
||||
movdqu xmm8, 112[rdi]
|
||||
movdqa xmm10, [r8]
|
||||
movdqa xmm11, 16[r8]
|
||||
movdqa xmm12, 32[r8]
|
||||
movdqa xmm13, 48[r8]
|
||||
pxor xmm1, xmm10
|
||||
pxor xmm2, xmm10
|
||||
pxor xmm3, xmm10
|
||||
pxor xmm4, xmm10
|
||||
pxor xmm5, xmm10
|
||||
pxor xmm6, xmm10
|
||||
pxor xmm7, xmm10
|
||||
pxor xmm8, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
aesdec xmm7, xmm11
|
||||
aesdec xmm8, xmm11
|
||||
aesdec xmm1, xmm12
|
||||
aesdec xmm2, xmm12
|
||||
aesdec xmm3, xmm12
|
||||
aesdec xmm4, xmm12
|
||||
aesdec xmm5, xmm12
|
||||
aesdec xmm6, xmm12
|
||||
aesdec xmm7, xmm12
|
||||
aesdec xmm8, xmm12
|
||||
aesdec xmm1, xmm13
|
||||
aesdec xmm2, xmm13
|
||||
aesdec xmm3, xmm13
|
||||
aesdec xmm4, xmm13
|
||||
aesdec xmm5, xmm13
|
||||
aesdec xmm6, xmm13
|
||||
aesdec xmm7, xmm13
|
||||
aesdec xmm8, xmm13
|
||||
movdqa xmm10, 64[r8]
|
||||
movdqa xmm11, 80[r8]
|
||||
movdqa xmm12, 96[r8]
|
||||
movdqa xmm13, 112[r8]
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm5, xmm10
|
||||
aesdec xmm6, xmm10
|
||||
aesdec xmm7, xmm10
|
||||
aesdec xmm8, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
aesdec xmm7, xmm11
|
||||
aesdec xmm8, xmm11
|
||||
aesdec xmm1, xmm12
|
||||
aesdec xmm2, xmm12
|
||||
aesdec xmm3, xmm12
|
||||
aesdec xmm4, xmm12
|
||||
aesdec xmm5, xmm12
|
||||
aesdec xmm6, xmm12
|
||||
aesdec xmm7, xmm12
|
||||
aesdec xmm8, xmm12
|
||||
aesdec xmm1, xmm13
|
||||
aesdec xmm2, xmm13
|
||||
aesdec xmm3, xmm13
|
||||
aesdec xmm4, xmm13
|
||||
aesdec xmm5, xmm13
|
||||
aesdec xmm6, xmm13
|
||||
aesdec xmm7, xmm13
|
||||
aesdec xmm8, xmm13
|
||||
movdqa xmm10, 128[r8]
|
||||
movdqa xmm11, 144[r8]
|
||||
movdqa xmm12, 160[r8]
|
||||
cmp r9d, 12
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm5, xmm10
|
||||
aesdec xmm6, xmm10
|
||||
aesdec xmm7, xmm10
|
||||
aesdec xmm8, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
aesdec xmm7, xmm11
|
||||
aesdec xmm8, xmm11
|
||||
jb DLAST_8
|
||||
movdqa xmm10, 160[r8]
|
||||
movdqa xmm11, 176[r8]
|
||||
movdqa xmm12, 192[r8]
|
||||
cmp r9d, 14
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm5, xmm10
|
||||
aesdec xmm6, xmm10
|
||||
aesdec xmm7, xmm10
|
||||
aesdec xmm8, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
aesdec xmm7, xmm11
|
||||
aesdec xmm8, xmm11
|
||||
jb DLAST_8
|
||||
movdqa xmm10, 192[r8]
|
||||
movdqa xmm11, 208[r8]
|
||||
movdqa xmm12, 224[r8]
|
||||
aesdec xmm1, xmm10
|
||||
aesdec xmm2, xmm10
|
||||
aesdec xmm3, xmm10
|
||||
aesdec xmm4, xmm10
|
||||
aesdec xmm5, xmm10
|
||||
aesdec xmm6, xmm10
|
||||
aesdec xmm7, xmm10
|
||||
aesdec xmm8, xmm10
|
||||
aesdec xmm1, xmm11
|
||||
aesdec xmm2, xmm11
|
||||
aesdec xmm3, xmm11
|
||||
aesdec xmm4, xmm11
|
||||
aesdec xmm5, xmm11
|
||||
aesdec xmm6, xmm11
|
||||
aesdec xmm7, xmm11
|
||||
aesdec xmm8, xmm11
|
||||
DLAST_8:
|
||||
add rsi, 128
|
||||
aesdeclast xmm1, xmm12
|
||||
aesdeclast xmm2, xmm12
|
||||
aesdeclast xmm3, xmm12
|
||||
aesdeclast xmm4, xmm12
|
||||
aesdeclast xmm5, xmm12
|
||||
aesdeclast xmm6, xmm12
|
||||
aesdeclast xmm7, xmm12
|
||||
aesdeclast xmm8, xmm12
|
||||
movdqu xmm10, [rdi]
|
||||
movdqu xmm11, 16[rdi]
|
||||
movdqu xmm12, 32[rdi]
|
||||
movdqu xmm13, 48[rdi]
|
||||
pxor xmm1, xmm9
|
||||
pxor xmm2, xmm10
|
||||
pxor xmm3, xmm11
|
||||
pxor xmm4, xmm12
|
||||
pxor xmm5, xmm13
|
||||
movdqu xmm10, 64[rdi]
|
||||
movdqu xmm11, 80[rdi]
|
||||
movdqu xmm12, 96[rdi]
|
||||
movdqu xmm9, 112[rdi]
|
||||
pxor xmm6, xmm10
|
||||
pxor xmm7, xmm11
|
||||
pxor xmm8, xmm12
|
||||
movdqu [rsi], xmm1
|
||||
movdqu 16[rsi], xmm2
|
||||
movdqu 32[rsi], xmm3
|
||||
movdqu 48[rsi], xmm4
|
||||
movdqu 64[rsi], xmm5
|
||||
movdqu 80[rsi], xmm6
|
||||
movdqu 96[rsi], xmm7
|
||||
movdqu 112[rsi], xmm8
|
||||
add rdi, 128
|
||||
dec rcx
|
||||
jne DLOOP_8
|
||||
add rsi, 128
|
||||
DREMAINDER_8:
|
||||
cmp r10, 0
|
||||
je DEND_8
|
||||
DLOOP_8_2:
|
||||
movdqu xmm1, [rdi]
|
||||
movdqa xmm10, xmm1
|
||||
add rdi, 16
|
||||
pxor xmm1, [r8]
|
||||
movdqu xmm2, 160[r8]
|
||||
cmp r9d, 12
|
||||
aesdec xmm1, 16[r8]
|
||||
aesdec xmm1, 32[r8]
|
||||
aesdec xmm1, 48[r8]
|
||||
aesdec xmm1, 64[r8]
|
||||
aesdec xmm1, 80[r8]
|
||||
aesdec xmm1, 96[r8]
|
||||
aesdec xmm1, 112[r8]
|
||||
aesdec xmm1, 128[r8]
|
||||
aesdec xmm1, 144[r8]
|
||||
jb DLAST_8_2
|
||||
movdqu xmm2, 192[r8]
|
||||
cmp r9d, 14
|
||||
aesdec xmm1, 160[r8]
|
||||
aesdec xmm1, 176[r8]
|
||||
jb DLAST_8_2
|
||||
movdqu xmm2, 224[r8]
|
||||
aesdec xmm1, 192[r8]
|
||||
aesdec xmm1, 208[r8]
|
||||
DLAST_8_2:
|
||||
aesdeclast xmm1, xmm2
|
||||
pxor xmm1, xmm9
|
||||
movdqa xmm9, xmm10
|
||||
movdqu [rsi], xmm1
|
||||
add rsi, 16
|
||||
dec r10
|
||||
jne DLOOP_8_2
|
||||
DEND_8:
|
||||
; restore non volatile rdi,rsi
|
||||
mov rdi, rax
|
||||
mov rsi, r11
|
||||
; restore non volatile xmms from stack
|
||||
movdqa xmm6, [rsp+0]
|
||||
movdqa xmm7, [rsp+16]
|
||||
movdqa xmm8, [rsp+32]
|
||||
movdqa xmm9, [rsp+48]
|
||||
movdqa xmm10, [rsp+64]
|
||||
movdqa xmm11, [rsp+80]
|
||||
movdqa xmm12, [rsp+96]
|
||||
movdqa xmm13, [rsp+112]
|
||||
add rsp, 8+8*16 ; 8 = align stack , 8 xmm6-13 16 bytes each
|
||||
ret
|
||||
AES_CBC_decrypt_by8 ENDP
|
||||
|
||||
|
||||
; /*
|
||||
; AES_ECB_encrypt[const ,unsigned char*in
|
||||
|
@@ -86,18 +86,18 @@ jne LOOP
|
||||
ret
|
||||
|
||||
|
||||
|
||||
#if defined(WOLFSSL_AESNI_BY4)
|
||||
|
||||
/*
|
||||
AES_CBC_decrypt (const unsigned char *in,
|
||||
AES_CBC_decrypt_by4 (const unsigned char *in,
|
||||
unsigned char *out,
|
||||
unsigned char ivec[16],
|
||||
unsigned long length,
|
||||
const unsigned char *KS,
|
||||
int nr)
|
||||
*/
|
||||
.globl AES_CBC_decrypt
|
||||
AES_CBC_decrypt:
|
||||
.globl AES_CBC_decrypt_by4
|
||||
AES_CBC_decrypt_by4:
|
||||
# parameter 1: %rdi
|
||||
# parameter 2: %rsi
|
||||
# parameter 3: %rdx
|
||||
@@ -105,165 +105,638 @@ AES_CBC_decrypt:
|
||||
# parameter 5: %r8
|
||||
# parameter 6: %r9d
|
||||
|
||||
movq %rcx, %r10
|
||||
shrq $4, %rcx
|
||||
shlq $60, %r10
|
||||
je DNO_PARTS_4
|
||||
addq $1, %rcx
|
||||
movq %rcx, %r10
|
||||
shrq $4, %rcx
|
||||
shlq $60, %r10
|
||||
je DNO_PARTS_4
|
||||
addq $1, %rcx
|
||||
DNO_PARTS_4:
|
||||
movq %rcx, %r10
|
||||
shlq $62, %r10
|
||||
shrq $62, %r10
|
||||
shrq $2, %rcx
|
||||
movdqu (%rdx),%xmm5
|
||||
je DREMAINDER_4
|
||||
subq $64, %rsi
|
||||
movq %rcx, %r10
|
||||
shlq $62, %r10
|
||||
shrq $62, %r10
|
||||
shrq $2, %rcx
|
||||
movdqu (%rdx),%xmm5
|
||||
je DREMAINDER_4
|
||||
subq $64, %rsi
|
||||
DLOOP_4:
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqu 16(%rdi), %xmm2
|
||||
movdqu 32(%rdi), %xmm3
|
||||
movdqu 48(%rdi), %xmm4
|
||||
movdqa %xmm1, %xmm6
|
||||
movdqa %xmm2, %xmm7
|
||||
movdqa %xmm3, %xmm8
|
||||
movdqa %xmm4, %xmm15
|
||||
movdqa (%r8), %xmm9
|
||||
movdqa 16(%r8), %xmm10
|
||||
movdqa 32(%r8), %xmm11
|
||||
movdqa 48(%r8), %xmm12
|
||||
pxor %xmm9, %xmm1
|
||||
pxor %xmm9, %xmm2
|
||||
pxor %xmm9, %xmm3
|
||||
|
||||
pxor %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm12, %xmm1
|
||||
aesdec %xmm12, %xmm2
|
||||
aesdec %xmm12, %xmm3
|
||||
aesdec %xmm12, %xmm4
|
||||
movdqa 64(%r8), %xmm9
|
||||
movdqa 80(%r8), %xmm10
|
||||
movdqa 96(%r8), %xmm11
|
||||
movdqa 112(%r8), %xmm12
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm12, %xmm1
|
||||
aesdec %xmm12, %xmm2
|
||||
aesdec %xmm12, %xmm3
|
||||
aesdec %xmm12, %xmm4
|
||||
movdqa 128(%r8), %xmm9
|
||||
movdqa 144(%r8), %xmm10
|
||||
movdqa 160(%r8), %xmm11
|
||||
cmpl $12, %r9d
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
jb DLAST_4
|
||||
movdqa 160(%r8), %xmm9
|
||||
movdqa 176(%r8), %xmm10
|
||||
movdqa 192(%r8), %xmm11
|
||||
cmpl $14, %r9d
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
jb DLAST_4
|
||||
|
||||
movdqa 192(%r8), %xmm9
|
||||
movdqa 208(%r8), %xmm10
|
||||
movdqa 224(%r8), %xmm11
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqu 16(%rdi), %xmm2
|
||||
movdqu 32(%rdi), %xmm3
|
||||
movdqu 48(%rdi), %xmm4
|
||||
movdqa %xmm1, %xmm6
|
||||
movdqa %xmm2, %xmm7
|
||||
movdqa %xmm3, %xmm8
|
||||
movdqa %xmm4, %xmm15
|
||||
movdqa (%r8), %xmm9
|
||||
movdqa 16(%r8), %xmm10
|
||||
movdqa 32(%r8), %xmm11
|
||||
movdqa 48(%r8), %xmm12
|
||||
pxor %xmm9, %xmm1
|
||||
pxor %xmm9, %xmm2
|
||||
pxor %xmm9, %xmm3
|
||||
pxor %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm12, %xmm1
|
||||
aesdec %xmm12, %xmm2
|
||||
aesdec %xmm12, %xmm3
|
||||
aesdec %xmm12, %xmm4
|
||||
movdqa 64(%r8), %xmm9
|
||||
movdqa 80(%r8), %xmm10
|
||||
movdqa 96(%r8), %xmm11
|
||||
movdqa 112(%r8), %xmm12
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm12, %xmm1
|
||||
aesdec %xmm12, %xmm2
|
||||
aesdec %xmm12, %xmm3
|
||||
aesdec %xmm12, %xmm4
|
||||
movdqa 128(%r8), %xmm9
|
||||
movdqa 144(%r8), %xmm10
|
||||
movdqa 160(%r8), %xmm11
|
||||
cmpl $12, %r9d
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
jb DLAST_4
|
||||
movdqa 160(%r8), %xmm9
|
||||
movdqa 176(%r8), %xmm10
|
||||
movdqa 192(%r8), %xmm11
|
||||
cmpl $14, %r9d
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
jb DLAST_4
|
||||
movdqa 192(%r8), %xmm9
|
||||
movdqa 208(%r8), %xmm10
|
||||
movdqa 224(%r8), %xmm11
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
DLAST_4:
|
||||
addq $64, %rdi
|
||||
addq $64, %rsi
|
||||
decq %rcx
|
||||
aesdeclast %xmm11, %xmm1
|
||||
aesdeclast %xmm11, %xmm2
|
||||
aesdeclast %xmm11, %xmm3
|
||||
aesdeclast %xmm11, %xmm4
|
||||
pxor %xmm5 ,%xmm1
|
||||
pxor %xmm6 ,%xmm2
|
||||
pxor %xmm7 ,%xmm3
|
||||
pxor %xmm8 ,%xmm4
|
||||
movdqu %xmm1, (%rsi)
|
||||
movdqu %xmm2, 16(%rsi)
|
||||
movdqu %xmm3, 32(%rsi)
|
||||
movdqu %xmm4, 48(%rsi)
|
||||
movdqa %xmm15,%xmm5
|
||||
jne DLOOP_4
|
||||
addq $64, %rsi
|
||||
addq $64, %rdi
|
||||
addq $64, %rsi
|
||||
decq %rcx
|
||||
aesdeclast %xmm11, %xmm1
|
||||
aesdeclast %xmm11, %xmm2
|
||||
aesdeclast %xmm11, %xmm3
|
||||
aesdeclast %xmm11, %xmm4
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
pxor %xmm8, %xmm4
|
||||
movdqu %xmm1, (%rsi)
|
||||
movdqu %xmm2, 16(%rsi)
|
||||
movdqu %xmm3, 32(%rsi)
|
||||
movdqu %xmm4, 48(%rsi)
|
||||
movdqa %xmm15,%xmm5
|
||||
jne DLOOP_4
|
||||
addq $64, %rsi
|
||||
DREMAINDER_4:
|
||||
cmpq $0, %r10
|
||||
je DEND_4
|
||||
cmpq $0, %r10
|
||||
je DEND_4
|
||||
DLOOP_4_2:
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqa %xmm1 ,%xmm15
|
||||
addq $16, %rdi
|
||||
pxor (%r8), %xmm1
|
||||
movdqu 160(%r8), %xmm2
|
||||
cmpl $12, %r9d
|
||||
aesdec 16(%r8), %xmm1
|
||||
aesdec 32(%r8), %xmm1
|
||||
aesdec 48(%r8), %xmm1
|
||||
aesdec 64(%r8), %xmm1
|
||||
aesdec 80(%r8), %xmm1
|
||||
aesdec 96(%r8), %xmm1
|
||||
aesdec 112(%r8), %xmm1
|
||||
aesdec 128(%r8), %xmm1
|
||||
aesdec 144(%r8), %xmm1
|
||||
jb DLAST_4_2
|
||||
movdqu 192(%r8), %xmm2
|
||||
cmpl $14, %r9d
|
||||
aesdec 160(%r8), %xmm1
|
||||
aesdec 176(%r8), %xmm1
|
||||
jb DLAST_4_2
|
||||
movdqu 224(%r8), %xmm2
|
||||
aesdec 192(%r8), %xmm1
|
||||
aesdec 208(%r8), %xmm1
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqa %xmm1, %xmm15
|
||||
addq $16, %rdi
|
||||
pxor (%r8), %xmm1
|
||||
movdqu 160(%r8), %xmm2
|
||||
cmpl $12, %r9d
|
||||
aesdec 16(%r8), %xmm1
|
||||
aesdec 32(%r8), %xmm1
|
||||
aesdec 48(%r8), %xmm1
|
||||
aesdec 64(%r8), %xmm1
|
||||
aesdec 80(%r8), %xmm1
|
||||
aesdec 96(%r8), %xmm1
|
||||
aesdec 112(%r8), %xmm1
|
||||
aesdec 128(%r8), %xmm1
|
||||
aesdec 144(%r8), %xmm1
|
||||
jb DLAST_4_2
|
||||
movdqu 192(%r8), %xmm2
|
||||
cmpl $14, %r9d
|
||||
aesdec 160(%r8), %xmm1
|
||||
aesdec 176(%r8), %xmm1
|
||||
jb DLAST_4_2
|
||||
movdqu 224(%r8), %xmm2
|
||||
aesdec 192(%r8), %xmm1
|
||||
aesdec 208(%r8), %xmm1
|
||||
DLAST_4_2:
|
||||
aesdeclast %xmm2, %xmm1
|
||||
pxor %xmm5, %xmm1
|
||||
movdqa %xmm15, %xmm5
|
||||
movdqu %xmm1, (%rsi)
|
||||
|
||||
addq $16, %rsi
|
||||
decq %r10
|
||||
jne DLOOP_4_2
|
||||
aesdeclast %xmm2, %xmm1
|
||||
pxor %xmm5, %xmm1
|
||||
movdqa %xmm15, %xmm5
|
||||
movdqu %xmm1, (%rsi)
|
||||
addq $16, %rsi
|
||||
decq %r10
|
||||
jne DLOOP_4_2
|
||||
DEND_4:
|
||||
ret
|
||||
ret
|
||||
|
||||
#elif defined(WOLFSSL_AESNI_BY6)
|
||||
|
||||
/*
|
||||
AES_CBC_decrypt_by6 (const unsigned char *in,
|
||||
unsigned char *out,
|
||||
unsigned char ivec[16],
|
||||
unsigned long length,
|
||||
const unsigned char *KS,
|
||||
int nr)
|
||||
*/
|
||||
.globl AES_CBC_decrypt_by6
|
||||
AES_CBC_decrypt_by6:
|
||||
# parameter 1: %rdi - in
|
||||
# parameter 2: %rsi - out
|
||||
# parameter 3: %rdx - ivec
|
||||
# parameter 4: %rcx - length
|
||||
# parameter 5: %r8 - KS
|
||||
# parameter 6: %r9d - nr
|
||||
|
||||
movq %rcx, %r10
|
||||
shrq $4, %rcx
|
||||
shlq $60, %r10
|
||||
je DNO_PARTS_6
|
||||
addq $1, %rcx
|
||||
DNO_PARTS_6:
|
||||
movq %rax, %r12
|
||||
movq %rdx, %r13
|
||||
movq %rbx, %r14
|
||||
movq $0, %rdx
|
||||
movq %rcx, %rax
|
||||
movq $6, %rbx
|
||||
div %rbx
|
||||
movq %rax, %rcx
|
||||
movq %rdx, %r10
|
||||
movq %r12, %rax
|
||||
movq %r13, %rdx
|
||||
movq %r14, %rbx
|
||||
cmpq $0, %rcx
|
||||
movdqu (%rdx), %xmm7
|
||||
je DREMAINDER_6
|
||||
subq $96, %rsi
|
||||
DLOOP_6:
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqu 16(%rdi), %xmm2
|
||||
movdqu 32(%rdi), %xmm3
|
||||
movdqu 48(%rdi), %xmm4
|
||||
movdqu 64(%rdi), %xmm5
|
||||
movdqu 80(%rdi), %xmm6
|
||||
movdqa (%r8), %xmm8
|
||||
movdqa 16(%r8), %xmm9
|
||||
movdqa 32(%r8), %xmm10
|
||||
movdqa 48(%r8), %xmm11
|
||||
pxor %xmm8, %xmm1
|
||||
pxor %xmm8, %xmm2
|
||||
pxor %xmm8, %xmm3
|
||||
pxor %xmm8, %xmm4
|
||||
pxor %xmm8, %xmm5
|
||||
pxor %xmm8, %xmm6
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm9, %xmm5
|
||||
aesdec %xmm9, %xmm6
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm10, %xmm5
|
||||
aesdec %xmm10, %xmm6
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
movdqa 64(%r8), %xmm8
|
||||
movdqa 80(%r8), %xmm9
|
||||
movdqa 96(%r8), %xmm10
|
||||
movdqa 112(%r8), %xmm11
|
||||
aesdec %xmm8, %xmm1
|
||||
aesdec %xmm8, %xmm2
|
||||
aesdec %xmm8, %xmm3
|
||||
aesdec %xmm8, %xmm4
|
||||
aesdec %xmm8, %xmm5
|
||||
aesdec %xmm8, %xmm6
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm9, %xmm5
|
||||
aesdec %xmm9, %xmm6
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm10, %xmm5
|
||||
aesdec %xmm10, %xmm6
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
movdqa 128(%r8), %xmm8
|
||||
movdqa 144(%r8), %xmm9
|
||||
movdqa 160(%r8), %xmm10
|
||||
cmpl $12, %r9d
|
||||
aesdec %xmm8, %xmm1
|
||||
aesdec %xmm8, %xmm2
|
||||
aesdec %xmm8, %xmm3
|
||||
aesdec %xmm8, %xmm4
|
||||
aesdec %xmm8, %xmm5
|
||||
aesdec %xmm8, %xmm6
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm9, %xmm5
|
||||
aesdec %xmm9, %xmm6
|
||||
jb DLAST_6
|
||||
movdqa 160(%r8), %xmm8
|
||||
movdqa 176(%r8), %xmm9
|
||||
movdqa 192(%r8), %xmm10
|
||||
cmpl $14, %r9d
|
||||
aesdec %xmm8, %xmm1
|
||||
aesdec %xmm8, %xmm2
|
||||
aesdec %xmm8, %xmm3
|
||||
aesdec %xmm8, %xmm4
|
||||
aesdec %xmm8, %xmm5
|
||||
aesdec %xmm8, %xmm6
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm9, %xmm5
|
||||
aesdec %xmm9, %xmm6
|
||||
jb DLAST_6
|
||||
movdqa 192(%r8), %xmm8
|
||||
movdqa 208(%r8), %xmm9
|
||||
movdqa 224(%r8), %xmm10
|
||||
aesdec %xmm8, %xmm1
|
||||
aesdec %xmm8, %xmm2
|
||||
aesdec %xmm8, %xmm3
|
||||
aesdec %xmm8, %xmm4
|
||||
aesdec %xmm8, %xmm5
|
||||
aesdec %xmm8, %xmm6
|
||||
aesdec %xmm9, %xmm1
|
||||
aesdec %xmm9, %xmm2
|
||||
aesdec %xmm9, %xmm3
|
||||
aesdec %xmm9, %xmm4
|
||||
aesdec %xmm9, %xmm5
|
||||
aesdec %xmm9, %xmm6
|
||||
DLAST_6:
|
||||
addq $96, %rsi
|
||||
aesdeclast %xmm10, %xmm1
|
||||
aesdeclast %xmm10, %xmm2
|
||||
aesdeclast %xmm10, %xmm3
|
||||
aesdeclast %xmm10, %xmm4
|
||||
aesdeclast %xmm10, %xmm5
|
||||
aesdeclast %xmm10, %xmm6
|
||||
movdqu (%rdi), %xmm8
|
||||
movdqu 16(%rdi), %xmm9
|
||||
movdqu 32(%rdi), %xmm10
|
||||
movdqu 48(%rdi), %xmm11
|
||||
movdqu 64(%rdi), %xmm12
|
||||
movdqu 80(%rdi), %xmm13
|
||||
pxor %xmm7, %xmm1
|
||||
pxor %xmm8, %xmm2
|
||||
pxor %xmm9, %xmm3
|
||||
pxor %xmm10, %xmm4
|
||||
pxor %xmm11, %xmm5
|
||||
pxor %xmm12, %xmm6
|
||||
movdqu %xmm13, %xmm7
|
||||
movdqu %xmm1, (%rsi)
|
||||
movdqu %xmm2, 16(%rsi)
|
||||
movdqu %xmm3, 32(%rsi)
|
||||
movdqu %xmm4, 48(%rsi)
|
||||
movdqu %xmm5, 64(%rsi)
|
||||
movdqu %xmm6, 80(%rsi)
|
||||
addq $96, %rdi
|
||||
decq %rcx
|
||||
jne DLOOP_6
|
||||
addq $96, %rsi
|
||||
DREMAINDER_6:
|
||||
cmpq $0, %r10
|
||||
je DEND_6
|
||||
DLOOP_6_2:
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqa %xmm1, %xmm10
|
||||
addq $16, %rdi
|
||||
pxor (%r8), %xmm1
|
||||
movdqu 160(%r8), %xmm2
|
||||
cmpl $12, %r9d
|
||||
aesdec 16(%r8), %xmm1
|
||||
aesdec 32(%r8), %xmm1
|
||||
aesdec 48(%r8), %xmm1
|
||||
aesdec 64(%r8), %xmm1
|
||||
aesdec 80(%r8), %xmm1
|
||||
aesdec 96(%r8), %xmm1
|
||||
aesdec 112(%r8), %xmm1
|
||||
aesdec 128(%r8), %xmm1
|
||||
aesdec 144(%r8), %xmm1
|
||||
jb DLAST_6_2
|
||||
movdqu 192(%r8), %xmm2
|
||||
cmpl $14, %r9d
|
||||
aesdec 160(%r8), %xmm1
|
||||
aesdec 176(%r8), %xmm1
|
||||
jb DLAST_6_2
|
||||
movdqu 224(%r8), %xmm2
|
||||
aesdec 192(%r8), %xmm1
|
||||
aesdec 208(%r8), %xmm1
|
||||
DLAST_6_2:
|
||||
aesdeclast %xmm2, %xmm1
|
||||
pxor %xmm7, %xmm1
|
||||
movdqa %xmm10, %xmm7
|
||||
movdqu %xmm1, (%rsi)
|
||||
addq $16, %rsi
|
||||
decq %r10
|
||||
jne DLOOP_6_2
|
||||
DEND_6:
|
||||
ret
|
||||
|
||||
#else /* WOLFSSL_AESNI_BYx */
|
||||
|
||||
/*
|
||||
AES_CBC_decrypt_by8 (const unsigned char *in,
|
||||
unsigned char *out,
|
||||
unsigned char ivec[16],
|
||||
unsigned long length,
|
||||
const unsigned char *KS,
|
||||
int nr)
|
||||
*/
|
||||
.globl AES_CBC_decrypt_by8
|
||||
AES_CBC_decrypt_by8:
|
||||
# parameter 1: %rdi - in
|
||||
# parameter 2: %rsi - out
|
||||
# parameter 3: %rdx - ivec
|
||||
# parameter 4: %rcx - length
|
||||
# parameter 5: %r8 - KS
|
||||
# parameter 6: %r9d - nr
|
||||
|
||||
movq %rcx, %r10
|
||||
shrq $4, %rcx
|
||||
shlq $60, %r10
|
||||
je DNO_PARTS_8
|
||||
addq $1, %rcx
|
||||
DNO_PARTS_8:
|
||||
movq %rcx, %r10
|
||||
shlq $61, %r10
|
||||
shrq $61, %r10
|
||||
shrq $3, %rcx
|
||||
movdqu (%rdx), %xmm9
|
||||
je DREMAINDER_8
|
||||
subq $128, %rsi
|
||||
DLOOP_8:
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqu 16(%rdi), %xmm2
|
||||
movdqu 32(%rdi), %xmm3
|
||||
movdqu 48(%rdi), %xmm4
|
||||
movdqu 64(%rdi), %xmm5
|
||||
movdqu 80(%rdi), %xmm6
|
||||
movdqu 96(%rdi), %xmm7
|
||||
movdqu 112(%rdi), %xmm8
|
||||
movdqa (%r8), %xmm10
|
||||
movdqa 16(%r8), %xmm11
|
||||
movdqa 32(%r8), %xmm12
|
||||
movdqa 48(%r8), %xmm13
|
||||
pxor %xmm10, %xmm1
|
||||
pxor %xmm10, %xmm2
|
||||
pxor %xmm10, %xmm3
|
||||
pxor %xmm10, %xmm4
|
||||
pxor %xmm10, %xmm5
|
||||
pxor %xmm10, %xmm6
|
||||
pxor %xmm10, %xmm7
|
||||
pxor %xmm10, %xmm8
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
aesdec %xmm11, %xmm7
|
||||
aesdec %xmm11, %xmm8
|
||||
aesdec %xmm12, %xmm1
|
||||
aesdec %xmm12, %xmm2
|
||||
aesdec %xmm12, %xmm3
|
||||
aesdec %xmm12, %xmm4
|
||||
aesdec %xmm12, %xmm5
|
||||
aesdec %xmm12, %xmm6
|
||||
aesdec %xmm12, %xmm7
|
||||
aesdec %xmm12, %xmm8
|
||||
aesdec %xmm13, %xmm1
|
||||
aesdec %xmm13, %xmm2
|
||||
aesdec %xmm13, %xmm3
|
||||
aesdec %xmm13, %xmm4
|
||||
aesdec %xmm13, %xmm5
|
||||
aesdec %xmm13, %xmm6
|
||||
aesdec %xmm13, %xmm7
|
||||
aesdec %xmm13, %xmm8
|
||||
movdqa 64(%r8), %xmm10
|
||||
movdqa 80(%r8), %xmm11
|
||||
movdqa 96(%r8), %xmm12
|
||||
movdqa 112(%r8), %xmm13
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm10, %xmm5
|
||||
aesdec %xmm10, %xmm6
|
||||
aesdec %xmm10, %xmm7
|
||||
aesdec %xmm10, %xmm8
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
aesdec %xmm11, %xmm7
|
||||
aesdec %xmm11, %xmm8
|
||||
aesdec %xmm12, %xmm1
|
||||
aesdec %xmm12, %xmm2
|
||||
aesdec %xmm12, %xmm3
|
||||
aesdec %xmm12, %xmm4
|
||||
aesdec %xmm12, %xmm5
|
||||
aesdec %xmm12, %xmm6
|
||||
aesdec %xmm12, %xmm7
|
||||
aesdec %xmm12, %xmm8
|
||||
aesdec %xmm13, %xmm1
|
||||
aesdec %xmm13, %xmm2
|
||||
aesdec %xmm13, %xmm3
|
||||
aesdec %xmm13, %xmm4
|
||||
aesdec %xmm13, %xmm5
|
||||
aesdec %xmm13, %xmm6
|
||||
aesdec %xmm13, %xmm7
|
||||
aesdec %xmm13, %xmm8
|
||||
movdqa 128(%r8), %xmm10
|
||||
movdqa 144(%r8), %xmm11
|
||||
movdqa 160(%r8), %xmm12
|
||||
cmpl $12, %r9d
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm10, %xmm5
|
||||
aesdec %xmm10, %xmm6
|
||||
aesdec %xmm10, %xmm7
|
||||
aesdec %xmm10, %xmm8
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
aesdec %xmm11, %xmm7
|
||||
aesdec %xmm11, %xmm8
|
||||
jb DLAST_8
|
||||
movdqa 160(%r8), %xmm10
|
||||
movdqa 176(%r8), %xmm11
|
||||
movdqa 192(%r8), %xmm12
|
||||
cmpl $14, %r9d
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm10, %xmm5
|
||||
aesdec %xmm10, %xmm6
|
||||
aesdec %xmm10, %xmm7
|
||||
aesdec %xmm10, %xmm8
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
aesdec %xmm11, %xmm7
|
||||
aesdec %xmm11, %xmm8
|
||||
jb DLAST_8
|
||||
movdqa 192(%r8), %xmm10
|
||||
movdqa 208(%r8), %xmm11
|
||||
movdqa 224(%r8), %xmm12
|
||||
aesdec %xmm10, %xmm1
|
||||
aesdec %xmm10, %xmm2
|
||||
aesdec %xmm10, %xmm3
|
||||
aesdec %xmm10, %xmm4
|
||||
aesdec %xmm10, %xmm5
|
||||
aesdec %xmm10, %xmm6
|
||||
aesdec %xmm10, %xmm7
|
||||
aesdec %xmm10, %xmm8
|
||||
aesdec %xmm11, %xmm1
|
||||
aesdec %xmm11, %xmm2
|
||||
aesdec %xmm11, %xmm3
|
||||
aesdec %xmm11, %xmm4
|
||||
aesdec %xmm11, %xmm5
|
||||
aesdec %xmm11, %xmm6
|
||||
aesdec %xmm11, %xmm7
|
||||
aesdec %xmm11, %xmm8
|
||||
DLAST_8:
|
||||
addq $128, %rsi
|
||||
aesdeclast %xmm12, %xmm1
|
||||
aesdeclast %xmm12, %xmm2
|
||||
aesdeclast %xmm12, %xmm3
|
||||
aesdeclast %xmm12, %xmm4
|
||||
aesdeclast %xmm12, %xmm5
|
||||
aesdeclast %xmm12, %xmm6
|
||||
aesdeclast %xmm12, %xmm7
|
||||
aesdeclast %xmm12, %xmm8
|
||||
movdqu (%rdi), %xmm10
|
||||
movdqu 16(%rdi), %xmm11
|
||||
movdqu 32(%rdi), %xmm12
|
||||
movdqu 48(%rdi), %xmm13
|
||||
pxor %xmm9, %xmm1
|
||||
pxor %xmm10, %xmm2
|
||||
pxor %xmm11, %xmm3
|
||||
pxor %xmm12, %xmm4
|
||||
pxor %xmm13, %xmm5
|
||||
movdqu 64(%rdi), %xmm10
|
||||
movdqu 80(%rdi), %xmm11
|
||||
movdqu 96(%rdi), %xmm12
|
||||
movdqu 112(%rdi), %xmm9
|
||||
pxor %xmm10, %xmm6
|
||||
pxor %xmm11, %xmm7
|
||||
pxor %xmm12, %xmm8
|
||||
movdqu %xmm1, (%rsi)
|
||||
movdqu %xmm2, 16(%rsi)
|
||||
movdqu %xmm3, 32(%rsi)
|
||||
movdqu %xmm4, 48(%rsi)
|
||||
movdqu %xmm5, 64(%rsi)
|
||||
movdqu %xmm6, 80(%rsi)
|
||||
movdqu %xmm7, 96(%rsi)
|
||||
movdqu %xmm8, 112(%rsi)
|
||||
addq $128, %rdi
|
||||
decq %rcx
|
||||
jne DLOOP_8
|
||||
addq $128, %rsi
|
||||
DREMAINDER_8:
|
||||
cmpq $0, %r10
|
||||
je DEND_8
|
||||
DLOOP_8_2:
|
||||
movdqu (%rdi), %xmm1
|
||||
movdqa %xmm1, %xmm10
|
||||
addq $16, %rdi
|
||||
pxor (%r8), %xmm1
|
||||
movdqu 160(%r8), %xmm2
|
||||
cmpl $12, %r9d
|
||||
aesdec 16(%r8), %xmm1
|
||||
aesdec 32(%r8), %xmm1
|
||||
aesdec 48(%r8), %xmm1
|
||||
aesdec 64(%r8), %xmm1
|
||||
aesdec 80(%r8), %xmm1
|
||||
aesdec 96(%r8), %xmm1
|
||||
aesdec 112(%r8), %xmm1
|
||||
aesdec 128(%r8), %xmm1
|
||||
aesdec 144(%r8), %xmm1
|
||||
jb DLAST_8_2
|
||||
movdqu 192(%r8), %xmm2
|
||||
cmpl $14, %r9d
|
||||
aesdec 160(%r8), %xmm1
|
||||
aesdec 176(%r8), %xmm1
|
||||
jb DLAST_8_2
|
||||
movdqu 224(%r8), %xmm2
|
||||
aesdec 192(%r8), %xmm1
|
||||
aesdec 208(%r8), %xmm1
|
||||
DLAST_8_2:
|
||||
aesdeclast %xmm2, %xmm1
|
||||
pxor %xmm9, %xmm1
|
||||
movdqa %xmm10, %xmm9
|
||||
movdqu %xmm1, (%rsi)
|
||||
addq $16, %rsi
|
||||
decq %r10
|
||||
jne DLOOP_8_2
|
||||
DEND_8:
|
||||
ret
|
||||
|
||||
#endif /* WOLFSSL_AESNI_BYx */
|
||||
|
||||
|
||||
/*
|
||||
|
@@ -2669,6 +2669,94 @@ int aes_test(void)
|
||||
if (memcmp(cipher, verify, AES_BLOCK_SIZE))
|
||||
return -61;
|
||||
|
||||
#if defined(WOLFSSL_AESNI) && defined(HAVE_AES_DECRYPT)
|
||||
{
|
||||
const byte bigMsg[] = {
|
||||
/* "All work and no play makes Jack a dull boy. " */
|
||||
0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
|
||||
0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
|
||||
0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
|
||||
0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
|
||||
0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
|
||||
0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
|
||||
0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
|
||||
0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
|
||||
0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
|
||||
0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
|
||||
0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
|
||||
0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
|
||||
0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
|
||||
0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
|
||||
0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
|
||||
0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
|
||||
0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
|
||||
0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
|
||||
0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
|
||||
0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
|
||||
0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
|
||||
0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
|
||||
0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
|
||||
0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
|
||||
0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
|
||||
0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
|
||||
0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
|
||||
0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
|
||||
0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
|
||||
0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
|
||||
0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
|
||||
0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
|
||||
0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
|
||||
0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
|
||||
0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
|
||||
0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
|
||||
0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20,
|
||||
0x61,0x20,0x64,0x75,0x6c,0x6c,0x20,0x62,
|
||||
0x6f,0x79,0x2e,0x20,0x41,0x6c,0x6c,0x20,
|
||||
0x77,0x6f,0x72,0x6b,0x20,0x61,0x6e,0x64,
|
||||
0x20,0x6e,0x6f,0x20,0x70,0x6c,0x61,0x79,
|
||||
0x20,0x6d,0x61,0x6b,0x65,0x73,0x20,0x4a,
|
||||
0x61,0x63,0x6b,0x20,0x61,0x20,0x64,0x75,
|
||||
0x6c,0x6c,0x20,0x62,0x6f,0x79,0x2e,0x20,
|
||||
0x41,0x6c,0x6c,0x20,0x77,0x6f,0x72,0x6b,
|
||||
0x20,0x61,0x6e,0x64,0x20,0x6e,0x6f,0x20,
|
||||
0x70,0x6c,0x61,0x79,0x20,0x6d,0x61,0x6b,
|
||||
0x65,0x73,0x20,0x4a,0x61,0x63,0x6b,0x20
|
||||
};
|
||||
const byte bigKey[] = "0123456789abcdeffedcba9876543210";
|
||||
byte bigCipher[sizeof(bigMsg)];
|
||||
byte bigPlain[sizeof(bigMsg)];
|
||||
word32 keySz, msgSz;
|
||||
|
||||
/* Iterate from one AES_BLOCK_SIZE of bigMsg through the whole
|
||||
* message by AES_BLOCK_SIZE for each size of AES key. */
|
||||
for (keySz = 16; keySz <= 32; keySz += 8) {
|
||||
for (msgSz = AES_BLOCK_SIZE;
|
||||
msgSz <= sizeof(bigMsg);
|
||||
msgSz += AES_BLOCK_SIZE) {
|
||||
|
||||
memset(bigCipher, 0, sizeof(bigCipher));
|
||||
memset(bigPlain, 0, sizeof(bigPlain));
|
||||
ret = wc_AesSetKey(&enc, bigKey, keySz, iv, AES_ENCRYPTION);
|
||||
if (ret != 0)
|
||||
return -1030;
|
||||
ret = wc_AesSetKey(&dec, bigKey, keySz, iv, AES_DECRYPTION);
|
||||
if (ret != 0)
|
||||
return -1031;
|
||||
|
||||
ret = wc_AesCbcEncrypt(&enc, bigCipher, bigMsg, msgSz);
|
||||
if (ret != 0)
|
||||
return -1032;
|
||||
ret = wc_AesCbcDecrypt(&dec, bigPlain, bigCipher, msgSz);
|
||||
if (ret != 0)
|
||||
return -1033;
|
||||
|
||||
if (memcmp(bigPlain, bigMsg, msgSz))
|
||||
return -1034;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* WOLFSSL_AESNI HAVE_AES_DECRYPT */
|
||||
|
||||
#ifdef HAVE_CAVIUM
|
||||
wc_AesFreeCavium(&enc);
|
||||
wc_AesFreeCavium(&dec);
|
||||
|
Reference in New Issue
Block a user