Fix for the AVX implemetation of Poly1305

Need one more multiple of r (0..6 instead of 0..5)
This commit is contained in:
Sean Parkinson
2017-08-28 11:34:05 +10:00
parent 59358fd91d
commit e6cf3c88f2
2 changed files with 11 additions and 13 deletions

View File

@ -155,7 +155,7 @@ static void poly1305_block_avx(Poly1305* ctx, const unsigned char *m)
"addq %%r8, %%r12\n\t"
"adcq %%rax, %%r13\n\t"
"# r[1] * h[2] +> t3\n\t"
"adcq 112(%[ctx],%%r10,8), %%r14\n\t"
"adcq 120(%[ctx],%%r10,8), %%r14\n\t"
"# r * h in r14, r13, r12, r11 \n\t"
"# h = (r * h) mod 2^130 - 5\n\t"
"movq %%r13, %%r10\n\t"
@ -222,7 +222,7 @@ static void poly1305_blocks_avx(Poly1305* ctx, const unsigned char *m,
"addq %%r8, %%r12\n\t"
"adcq %%rax, %%r13\n\t"
"# r[1] * h[2] +> t3\n\t"
"adcq 120(%[ctx],%%r10,8), %%r14\n\t"
"adcq 128(%[ctx],%%r10,8), %%r14\n\t"
"# r * h in r14, r13, r12, r11 \n\t"
"# h = (r * h) mod 2^130 - 5\n\t"
"movq %%r13, %%r10\n\t"
@ -262,9 +262,9 @@ static void poly1305_setkey_avx(Poly1305* ctx, const byte* key)
ctx->r[0] = *(word64*)(key + 0) & 0x0ffffffc0fffffffL;
ctx->r[1] = *(word64*)(key + 8) & 0x0ffffffc0ffffffcL;
for (i=0; i<6; i++) {
ctx->t0[i] = ctx->r[0] * i;
ctx->t1[i] = ctx->r[1] * i;
for (i=0; i<7; i++) {
ctx->hh[i + 0] = ctx->r[0] * i;
ctx->hh[i + 7] = ctx->r[1] * i;
}
/* h (accumulator) = 0 */
@ -766,14 +766,14 @@ POLY1305_NOINLINE static void poly1305_blocks_avx2(Poly1305* ctx,
"L_begin:\n\t"
"# Load the H values.\n\t"
LOAD_H(%[h], %%ymm0, %%ymm1, %%ymm2, %%ymm3, %%ymm4, %%ymm15)
"movq 416(%[ctx]), %%r8\n\t"
"movq 336(%[ctx]), %%r8\n\t"
"# Check if there is a power of r to load - otherwise use r^4.\n\t"
"cmpq $0x0, %%r8\n\t"
"je L_load_r4\n\t"
"\n\t"
"movq 424(%[ctx]), %%r9\n\t"
"movq 432(%[ctx]), %%r10\n\t"
"movq 440(%[ctx]), %%r11\n\t"
"movq 344(%[ctx]), %%r9\n\t"
"movq 352(%[ctx]), %%r10\n\t"
"movq 360(%[ctx]), %%r11\n\t"
"# Load the 4 powers of r.\n\t"
LOAD_Rx4(%%r8, %%r9, %%r10, %%r11, \
%%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
@ -782,7 +782,7 @@ POLY1305_NOINLINE static void poly1305_blocks_avx2(Poly1305* ctx,
"\n"
"L_load_r4:\n\t"
"# Load r^4 into all four positions.\n\t"
LOAD_R4(384(%[ctx]), %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
LOAD_R4(304(%[ctx]), %%ymm5, %%ymm6, %%ymm7, %%ymm8, %%ymm9,
%%ymm13, %%ymm14)
"\n"
"L_mul_5:\n\t"

View File

@ -68,9 +68,7 @@ typedef struct Poly1305 {
word64 r[3];
word64 h[3];
word64 pad[2];
word64 t0[6];
word64 t1[6];
word64 hh[12];
word64 hh[14];
word32 r0[8];
word32 r1[8];
word32 r2[8];