Merge pull request #4615 from SparkiDev/mp_mulx

TFM: fix Intel MULX multiply
This commit is contained in:
David Garske
2021-11-30 08:26:17 -08:00
committed by GitHub
2 changed files with 13 additions and 13 deletions

View File

@ -1475,7 +1475,7 @@ __asm__ ( \
#if defined(HAVE_INTEL_MULX) #if defined(HAVE_INTEL_MULX)
#define MULADD_BODY(a,b,c) \ #define MULADD_BODY(a,b,carry,c) \
__asm__ volatile( \ __asm__ volatile( \
"movq %[a0],%%rdx\n\t" \ "movq %[a0],%%rdx\n\t" \
"xorq %%rcx, %%rcx\n\t" \ "xorq %%rcx, %%rcx\n\t" \
@ -1483,10 +1483,9 @@ __asm__ ( \
"movq 8(%[cp]),%%r9\n\t" \ "movq 8(%[cp]),%%r9\n\t" \
"movq 16(%[cp]),%%r10\n\t" \ "movq 16(%[cp]),%%r10\n\t" \
"movq 24(%[cp]),%%r11\n\t" \ "movq 24(%[cp]),%%r11\n\t" \
"movq 32(%[cp]),%%r12\n\t" \
"movq 40(%[cp]),%%r13\n\t" \
\ \
"mulx (%[bp]),%%rax, %%rbx\n\t" \ "mulx (%[bp]),%%rax, %%rbx\n\t" \
"adcxq %[ca], %%r8\n\t" \
"adoxq %%rax, %%r8\n\t" \ "adoxq %%rax, %%r8\n\t" \
"mulx 8(%[bp]),%%rax, %%rcx\n\t" \ "mulx 8(%[bp]),%%rax, %%rcx\n\t" \
"adcxq %%rbx, %%r9\n\t" \ "adcxq %%rbx, %%r9\n\t" \
@ -1496,32 +1495,32 @@ __asm__ ( \
"adoxq %%rax, %%r10\n\t" \ "adoxq %%rax, %%r10\n\t" \
"mulx 24(%[bp]),%%rax, %%rcx\n\t" \ "mulx 24(%[bp]),%%rax, %%rcx\n\t" \
"adcxq %%rbx, %%r11\n\t" \ "adcxq %%rbx, %%r11\n\t" \
"mov $0, %[ca]\n\t" \
"adoxq %%rax, %%r11\n\t" \ "adoxq %%rax, %%r11\n\t" \
"adcxq %%rcx, %%r12\n\t" \ "adcxq %%rcx, %[ca]\n\t" \
"mov $0, %%rdx\n\t" \ "mov $0, %%rdx\n\t" \
"adox %%rdx, %%r12\n\t" \ "adoxq %%rdx, %[ca]\n\t" \
"adcx %%rdx, %%r13\n\t" \
\ \
"movq %%r8, 0(%[cp])\n\t" \ "movq %%r8, 0(%[cp])\n\t" \
"movq %%r9, 8(%[cp])\n\t" \ "movq %%r9, 8(%[cp])\n\t" \
"movq %%r10, 16(%[cp])\n\t" \ "movq %%r10, 16(%[cp])\n\t" \
"movq %%r11, 24(%[cp])\n\t" \ "movq %%r11, 24(%[cp])\n\t" \
"movq %%r12, 32(%[cp])\n\t" \ : [ca] "+r" (carry) \
"movq %%r13, 40(%[cp])\n\t" \
: \
: [a0] "r" (a->dp[ix]), [bp] "r" (&(b->dp[iy])), \ : [a0] "r" (a->dp[ix]), [bp] "r" (&(b->dp[iy])), \
[cp] "r" (&(c->dp[iz])) \ [cp] "r" (&(c->dp[iz])) \
: "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", \ : "%r8", "%r9", "%r10", "%r11", \
"%rdx", "%rax", "%rcx", "%rbx" \ "%rdx", "%rax", "%rcx", "%rbx" \
) )
#define TFM_INTEL_MUL_COMBA(a, b, c) \ #define TFM_INTEL_MUL_COMBA(a, b, ca, c) \
for (iz=0; iz<pa; iz++) c->dp[iz] = 0; \ for (iz=0; iz<pa; iz++) c->dp[iz] = 0; \
for (ix=0; ix<a->used; ix++) { \ for (ix=0; ix<a->used; ix++) { \
ca = 0; \
for (iy=0; iy<b->used; iy+=4) { \ for (iy=0; iy<b->used; iy+=4) { \
iz = ix + iy; \ iz = ix + iy; \
MULADD_BODY(a, b, c); \ MULADD_BODY(a, b, ca, c); \
} \ } \
c->dp[ix + iy] = ca; \
} }
#endif #endif

View File

@ -497,6 +497,7 @@ WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
#else #else
fp_int *tmp; fp_int *tmp;
#endif #endif
fp_digit carry;
/* Variables used but not seen by cppcheck. */ /* Variables used but not seen by cppcheck. */
(void)ix; (void)iy; (void)iz; (void)ix; (void)iy; (void)iz;
@ -520,7 +521,7 @@ WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
dst = tmp; dst = tmp;
} }
TFM_INTEL_MUL_COMBA(A, B, dst) ; TFM_INTEL_MUL_COMBA(A, B, carry, dst) ;
dst->used = pa; dst->used = pa;
dst->sign = A->sign ^ B->sign; dst->sign = A->sign ^ B->sign;