forked from wolfSSL/wolfssl
Merge branch 'kojo-intel'
This commit is contained in:
146
wolfcrypt/src/asm.c
Normal file → Executable file
146
wolfcrypt/src/asm.c
Normal file → Executable file
@ -72,7 +72,7 @@ __asm__( \
|
|||||||
#define MONT_FINI
|
#define MONT_FINI
|
||||||
#define LOOP_END
|
#define LOOP_END
|
||||||
#define LOOP_START \
|
#define LOOP_START \
|
||||||
mu = c[x] * mp
|
mu = c[x] * mp;
|
||||||
|
|
||||||
#define INNERMUL \
|
#define INNERMUL \
|
||||||
__asm__( \
|
__asm__( \
|
||||||
@ -87,6 +87,73 @@ __asm__( \
|
|||||||
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
|
:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++) \
|
||||||
: "%rax", "%rdx", "cc")
|
: "%rax", "%rdx", "cc")
|
||||||
|
|
||||||
|
#ifdef HAVE_INTEL_MULX
|
||||||
|
#define MULX_INIT(a0, c0, cy)\
|
||||||
|
__asm__ volatile( \
|
||||||
|
"xorq %%r10, %%r10\n\t" \
|
||||||
|
"movq %1,%%rdx\n\t" \
|
||||||
|
"addq %2, %0\n\t" /* c0+=cy; Set CF, OF */ \
|
||||||
|
"adoxq %%r10, %%r10\n\t" /* Reset OF */ \
|
||||||
|
:"+m"(c0):"r"(a0),"r"(cy):"%r8","%r10","%r11","%r12","%rdx") ; \
|
||||||
|
|
||||||
|
#define MULX_INNERMUL_R1(c0, c1, pre)\
|
||||||
|
{ \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"mulx %%r11,%%r9, %%r8 \n\t" \
|
||||||
|
"movq %2, %%r12\n\t" \
|
||||||
|
"adoxq %%r9,%0 \n\t" \
|
||||||
|
"adcxq %%r8,%1 \n\t" \
|
||||||
|
:"+r"(c0),"+r"(c1):"m"(pre):"%r8","%r9","%r11","%r12","%rdx" \
|
||||||
|
); }
|
||||||
|
|
||||||
|
|
||||||
|
#define MULX_INNERMUL_R2(c0, c1, pre)\
|
||||||
|
{ \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"mulx %%r12,%%r9, %%r8 \n\t" \
|
||||||
|
"movq %2, %%r11\n\t" \
|
||||||
|
"adoxq %%r9,%0 \n\t" \
|
||||||
|
"adcxq %%r8,%1 \n\t" \
|
||||||
|
:"+r"(c0),"+r"(c1):"m"(pre):"%r8","%r9","%r11","%r12","%rdx" \
|
||||||
|
); }
|
||||||
|
|
||||||
|
#define MULX_LOAD_R1(val)\
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"movq %0, %%r11\n\t"\
|
||||||
|
::"m"(val):"%r11"\
|
||||||
|
) ;
|
||||||
|
|
||||||
|
#define MULX_INNERMUL_LAST(c0, c1)\
|
||||||
|
{ \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"mulx %%r12,%%r9, %%r8 \n\t" \
|
||||||
|
"movq $0, %%r10 \n\t" \
|
||||||
|
"adoxq %%r10, %%r9 \n\t" \
|
||||||
|
"adcq $0,%%r8 \n\t" \
|
||||||
|
"addq %%r9,%0 \n\t" \
|
||||||
|
"adcq $0,%%r8 \n\t" \
|
||||||
|
"movq %%r8,%1 \n\t" \
|
||||||
|
:"+m"(c0),"=m"(c1)::"%r8","%r9","%r10","%r12","%rdx"\
|
||||||
|
); }
|
||||||
|
|
||||||
|
#define MULX_INNERMUL8(x,y,z,cy)\
|
||||||
|
MULX_LOAD_R1(x[0]) ;\
|
||||||
|
MULX_INIT(y, _c0, cy) ; /* rdx=y; z0+=cy; */ \
|
||||||
|
MULX_INNERMUL_R1(_c0, _c1, x[1]) ;\
|
||||||
|
MULX_INNERMUL_R2(_c1, _c2, x[2]) ;\
|
||||||
|
MULX_INNERMUL_R1(_c2, _c3, x[3]) ;\
|
||||||
|
MULX_INNERMUL_R2(_c3, _c4, x[4]) ;\
|
||||||
|
MULX_INNERMUL_R1(_c4, _c5, x[5]) ;\
|
||||||
|
MULX_INNERMUL_R2(_c5, _c6, x[6]) ;\
|
||||||
|
MULX_INNERMUL_R1(_c6, _c7, x[7]) ;\
|
||||||
|
MULX_INNERMUL_LAST(_c7, cy) ;\
|
||||||
|
|
||||||
|
#define INNERMUL8_MULX \
|
||||||
|
{\
|
||||||
|
MULX_INNERMUL8(tmpm, mu, _c, cy);\
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#define INNERMUL8 \
|
#define INNERMUL8 \
|
||||||
__asm__( \
|
__asm__( \
|
||||||
"movq 0(%5),%%rax \n\t" \
|
"movq 0(%5),%%rax \n\t" \
|
||||||
@ -178,8 +245,7 @@ __asm__( \
|
|||||||
\
|
\
|
||||||
:"=r"(_c), "=r"(cy) \
|
:"=r"(_c), "=r"(cy) \
|
||||||
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
|
: "0"(_c), "1"(cy), "g"(mu), "r"(tmpm)\
|
||||||
: "%rax", "%rdx", "%r10", "%r11", "cc")
|
: "%rax", "%rdx", "%r10", "%r11", "cc")\
|
||||||
|
|
||||||
|
|
||||||
#define PROPCARRY \
|
#define PROPCARRY \
|
||||||
__asm__( \
|
__asm__( \
|
||||||
@ -1138,6 +1204,80 @@ __asm__( \
|
|||||||
"adcl $0,%2 \n\t" \
|
"adcl $0,%2 \n\t" \
|
||||||
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
|
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
|
||||||
|
|
||||||
|
#elif defined(HAVE_INTEL_MULX)
|
||||||
|
|
||||||
|
/* anything you need at the start */
|
||||||
|
#define COMBA_START
|
||||||
|
|
||||||
|
/* clear the chaining variables */
|
||||||
|
#define COMBA_CLEAR \
|
||||||
|
c0 = c1 = c2 = 0;
|
||||||
|
|
||||||
|
/* forward the carry to the next digit */
|
||||||
|
#define COMBA_FORWARD \
|
||||||
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
|
||||||
|
|
||||||
|
/* store the first sum */
|
||||||
|
#define COMBA_STORE(x) \
|
||||||
|
x = c0;
|
||||||
|
|
||||||
|
/* store the second sum [carry] */
|
||||||
|
#define COMBA_STORE2(x) \
|
||||||
|
x = c1;
|
||||||
|
|
||||||
|
/* anything you need at the end */
|
||||||
|
#define COMBA_FINI
|
||||||
|
|
||||||
|
#define MULADD_MULX(b0, c0, c1)\
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"mulx %2,%%r9, %%r8 \n\t" \
|
||||||
|
"adoxq %%r9,%0 \n\t" \
|
||||||
|
"adcxq %%r8,%1 \n\t" \
|
||||||
|
:"+r"(c0),"+r"(c1):"r"(b0):"%r8","%r9","%rdx"\
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
#define MULADD_MULX_ADD_CARRY(c0, c1)\
|
||||||
|
__asm__ volatile(\
|
||||||
|
"mov $0, %%r10\n\t"\
|
||||||
|
"movq %1, %%r8\n\t" \
|
||||||
|
"adox %%r10, %0\n\t"\
|
||||||
|
"adcx %%r10, %1\n\t"\
|
||||||
|
:"+r"(c0),"+r"(c1)::"%r8","%r9","%r10","%rdx") ;
|
||||||
|
|
||||||
|
#define MULADD_SET_A(a0)\
|
||||||
|
__asm__ volatile("add $0, %%r8\n\t" \
|
||||||
|
"movq %0,%%rdx\n\t"::"r"(a0):"%r8","%r9","%r10","%rdx") ; \
|
||||||
|
|
||||||
|
#define MULADD_BODY(a,b,c)\
|
||||||
|
cp = &(c->dp[iz]) ;\
|
||||||
|
c0 = cp[0] ; c1 = cp[1];\
|
||||||
|
MULADD_SET_A(a->dp[ix]) ;\
|
||||||
|
MULADD_MULX(b0, c0, c1) ;\
|
||||||
|
cp[0]=c0; c0=cp[2]; cp++ ;\
|
||||||
|
MULADD_MULX(b1, c1, c0) ;\
|
||||||
|
cp[0]=c1; c1=cp[2]; cp++ ; \
|
||||||
|
MULADD_MULX(b2, c0, c1) ;\
|
||||||
|
cp[0]=c0; c0=cp[2]; cp++ ; \
|
||||||
|
MULADD_MULX(b3, c1, c0) ;\
|
||||||
|
cp[0]=c1; c1=cp[2]; cp++ ; \
|
||||||
|
MULADD_MULX_ADD_CARRY(c0, c1) ;\
|
||||||
|
cp[0]=c0; cp[1]=c1;
|
||||||
|
|
||||||
|
#define TFM_INTEL_MUL_COMBA(a, b, c)\
|
||||||
|
for(ix=0; ix<pa; ix++)c->dp[ix]=0 ;\
|
||||||
|
for(iy=0; (iy<b->used); iy+=4) {\
|
||||||
|
fp_digit *bp ;\
|
||||||
|
bp = &(b->dp[iy+0]) ; \
|
||||||
|
fp_digit b0 = bp[0] , b1= bp[1], b2= bp[2], b3= bp[3];\
|
||||||
|
ix=0, iz=iy;\
|
||||||
|
while(ix<a->used) {\
|
||||||
|
fp_digit c0, c1; \
|
||||||
|
fp_digit *cp ;\
|
||||||
|
MULADD_BODY(a,b,c); ix++ ; iz++ ; \
|
||||||
|
}\
|
||||||
|
};
|
||||||
|
|
||||||
#elif defined(TFM_X86_64)
|
#elif defined(TFM_X86_64)
|
||||||
/* x86-64 optimized */
|
/* x86-64 optimized */
|
||||||
|
|
||||||
|
@ -203,14 +203,16 @@ static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
|
|||||||
return 0 ;
|
return 0 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_cpuid_flags(void) {
|
static int set_cpuid_flags(void) {
|
||||||
if(cpuid_check==0) {
|
if(cpuid_check==0) {
|
||||||
if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ; }
|
if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
|
||||||
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
|
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
|
||||||
if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;}
|
if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
|
||||||
if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;}
|
if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
|
||||||
cpuid_check = 1 ;
|
cpuid_check = 1 ;
|
||||||
}
|
return 0 ;
|
||||||
|
}
|
||||||
|
return 1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */
|
/* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */
|
||||||
@ -229,16 +231,19 @@ static int (*Transform_p)(Sha256* sha256) /* = _Transform */;
|
|||||||
#define XTRANSFORM(sha256, B) (*Transform_p)(sha256)
|
#define XTRANSFORM(sha256, B) (*Transform_p)(sha256)
|
||||||
|
|
||||||
static void set_Transform(void) {
|
static void set_Transform(void) {
|
||||||
set_cpuid_flags() ;
|
if(set_cpuid_flags())return ;
|
||||||
|
|
||||||
#if defined(HAVE_INTEL_AVX2)
|
#if defined(HAVE_INTEL_AVX2)
|
||||||
if(IS_INTEL_AVX2){ Transform_p = Transform_AVX1_RORX; return ; }
|
if(IS_INTEL_AVX2){
|
||||||
Transform_p = Transform_AVX2 ; /* for avoiding warning,"not used" */
|
Transform_p = Transform_AVX1_RORX; return ;
|
||||||
|
Transform_p = Transform_AVX2 ;
|
||||||
|
/* for avoiding warning,"not used" */
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_INTEL_AVX1)
|
#if defined(HAVE_INTEL_AVX1)
|
||||||
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ;
|
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ;
|
||||||
#endif
|
#endif
|
||||||
Transform_p = Transform ;
|
Transform_p = Transform ; return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@ -251,10 +256,10 @@ static void set_Transform(void) {
|
|||||||
|
|
||||||
/* Dummy for saving MM_REGs on behalf of Transform */
|
/* Dummy for saving MM_REGs on behalf of Transform */
|
||||||
#if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1)
|
#if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1)
|
||||||
#define SAVE_XMM_YMM __asm__ volatile("vpxor %%ymm7, %%ymm7, %%ymm7":::\
|
#define SAVE_XMM_YMM __asm__ volatile("or %%r8, %%r8":::\
|
||||||
"%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
|
"%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15")
|
||||||
#elif defined(HAVE_INTEL_AVX1)
|
#elif defined(HAVE_INTEL_AVX1)
|
||||||
#define SAVE_XMM_YMM __asm__ volatile("vpxor %%xmm7, %%xmm7, %%xmm7":::\
|
#define SAVE_XMM_YMM __asm__ volatile("or %%r8, %%r8":::\
|
||||||
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
|
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\
|
||||||
"xmm11","xmm12","xmm13","xmm14","xmm15")
|
"xmm11","xmm12","xmm13","xmm14","xmm15")
|
||||||
#else
|
#else
|
||||||
@ -336,25 +341,6 @@ static const ALIGN32 word32 K[64] = {
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_INTEL_RORX)
|
|
||||||
#define ROTR(func, bits, x) \
|
|
||||||
word32 func(word32 x) { word32 ret ;\
|
|
||||||
__asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\
|
|
||||||
return ret ;\
|
|
||||||
}
|
|
||||||
|
|
||||||
static INLINE ROTR(rotrFixed_2, 2, x)
|
|
||||||
static INLINE ROTR(rotrFixed_13, 13, x)
|
|
||||||
static INLINE ROTR(rotrFixed_22, 22, x)
|
|
||||||
static INLINE ROTR(rotrFixed_6, 6, x)
|
|
||||||
static INLINE ROTR(rotrFixed_11, 11, x)
|
|
||||||
static INLINE ROTR(rotrFixed_25, 25, x)
|
|
||||||
static INLINE ROTR(rotrFixed_7, 7, x)
|
|
||||||
static INLINE ROTR(rotrFixed_18, 18, x)
|
|
||||||
static INLINE ROTR(rotrFixed_17, 17, x)
|
|
||||||
static INLINE ROTR(rotrFixed_19, 19, x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(FREESCALE_MMCAU)
|
#if defined(FREESCALE_MMCAU)
|
||||||
|
|
||||||
static int Transform(Sha256* sha256, byte* buf)
|
static int Transform(Sha256* sha256, byte* buf)
|
||||||
@ -370,18 +356,11 @@ static int Transform(Sha256* sha256, byte* buf)
|
|||||||
#define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
|
#define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
|
||||||
#define R(x, n) (((x)&0xFFFFFFFFU)>>(n))
|
#define R(x, n) (((x)&0xFFFFFFFFU)>>(n))
|
||||||
|
|
||||||
#if !defined(HAVE_INTEL_RORX)
|
|
||||||
#define S(x, n) rotrFixed(x, n)
|
#define S(x, n) rotrFixed(x, n)
|
||||||
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
|
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
|
||||||
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
|
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
|
||||||
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
|
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
|
||||||
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
|
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
|
||||||
#else
|
|
||||||
#define Sigma0(x) (rotrFixed_2(x) ^ rotrFixed_13(x) ^ rotrFixed_22(x))
|
|
||||||
#define Sigma1(x) (rotrFixed_6(x) ^ rotrFixed_11(x) ^ rotrFixed_25(x))
|
|
||||||
#define Gamma0(x) (rotrFixed_7(x) ^ rotrFixed_18(x) ^ R(x, 3))
|
|
||||||
#define Gamma1(x) (rotrFixed_17(x) ^ rotrFixed_19(x) ^ R(x, 10))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define RND(a,b,c,d,e,f,g,h,i) \
|
#define RND(a,b,c,d,e,f,g,h,i) \
|
||||||
t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
|
t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
|
||||||
@ -634,7 +613,7 @@ int wc_Sha256Hash(const byte* data, word32 len, byte* hash)
|
|||||||
#define S_6 %ebx
|
#define S_6 %ebx
|
||||||
#define S_7 %r9d
|
#define S_7 %r9d
|
||||||
|
|
||||||
#define SSE_REGs "%esi", "%r8", "%edx", "%ebx","%r9","%r10","%r11","%r12","%r13","%r14","%r15"
|
#define SSE_REGs "%edi", "%ecx", "%esi", "%edx", "%ebx","%r8","%r9","%r10","%r11","%r12","%r13","%r14","%r15"
|
||||||
|
|
||||||
#if defined(HAVE_INTEL_RORX)
|
#if defined(HAVE_INTEL_RORX)
|
||||||
#define RND_STEP_RORX_1(a,b,c,d,e,f,g,h,i)\
|
#define RND_STEP_RORX_1(a,b,c,d,e,f,g,h,i)\
|
||||||
@ -732,7 +711,7 @@ __asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \
|
|||||||
/* r8b = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */\
|
/* r8b = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */\
|
||||||
__asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs);\
|
__asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs);\
|
||||||
/* r8b = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */\
|
/* r8b = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */\
|
||||||
__asm__ volatile("movl %r8d, "#h"\n\t"); \
|
__asm__ volatile("movl %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \
|
||||||
/* h = h + w_k + Sigma1(e) + Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
|
/* h = h + w_k + Sigma1(e) + Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \
|
||||||
|
|
||||||
#define RND_X(a,b,c,d,e,f,g,h,i) \
|
#define RND_X(a,b,c,d,e,f,g,h,i) \
|
||||||
|
@ -235,12 +235,16 @@ static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) {
|
|||||||
return 0 ;
|
return 0 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int set_cpuid_flags(void) {
|
#define CHECK_SHA512 0x1
|
||||||
if(cpuid_check==0) {
|
#define CHECK_SHA384 0x2
|
||||||
|
|
||||||
|
static int set_cpuid_flags(int sha) {
|
||||||
|
if((cpuid_check & sha) ==0) {
|
||||||
if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
|
if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;}
|
||||||
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
|
if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; }
|
||||||
if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
|
if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; }
|
||||||
if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
|
if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; }
|
||||||
|
cpuid_check |= sha ;
|
||||||
return 0 ;
|
return 0 ;
|
||||||
}
|
}
|
||||||
return 1 ;
|
return 1 ;
|
||||||
@ -269,21 +273,19 @@ static int (*Transform_p)(Sha512* sha512) = _Transform ;
|
|||||||
#define Transform(sha512) (*Transform_p)(sha512)
|
#define Transform(sha512) (*Transform_p)(sha512)
|
||||||
|
|
||||||
static void set_Transform(void) {
|
static void set_Transform(void) {
|
||||||
if(set_cpuid_flags()) return ;
|
if(set_cpuid_flags(CHECK_SHA512)) return ;
|
||||||
|
|
||||||
#if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
|
#if defined(HAVE_INTEL_AVX2)
|
||||||
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ;
|
if(IS_INTEL_AVX2){
|
||||||
#elif defined(HAVE_INTEL_AVX2)
|
Transform_p = Transform_AVX1_RORX; return ;
|
||||||
#if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX)
|
Transform_p = Transform_AVX2 ;
|
||||||
if(IS_INTEL_AVX2) { Transform_p = Transform_AVX1_RORX ; return ; }
|
/* for avoiding warning,"not used" */
|
||||||
#endif
|
}
|
||||||
if(IS_INTEL_AVX2) { Transform_p = Transform_AVX2 ; return ; }
|
|
||||||
#if defined(HAVE_INTEL_AVX1)
|
|
||||||
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ;
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ;
|
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAVE_INTEL_AVX1)
|
||||||
|
Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; return ;
|
||||||
|
#endif
|
||||||
|
Transform_p = _Transform ; return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@ -1344,7 +1346,7 @@ static int (*Transform384_p)(Sha384* sha384) = _Transform384 ;
|
|||||||
|
|
||||||
#define Transform384(sha384) (*Transform384_p)(sha384)
|
#define Transform384(sha384) (*Transform384_p)(sha384)
|
||||||
static void set_Transform384(void) {
|
static void set_Transform384(void) {
|
||||||
if(set_cpuid_flags())return ;
|
if(set_cpuid_flags(CHECK_SHA384))return ;
|
||||||
|
|
||||||
#if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
|
#if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2)
|
||||||
Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ;
|
Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ;
|
||||||
|
51
wolfcrypt/src/tfm.c
Normal file → Executable file
51
wolfcrypt/src/tfm.c
Normal file → Executable file
@ -401,6 +401,36 @@ void fp_mul_2d(fp_int *a, int b, fp_int *c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* generic PxQ multiplier */
|
/* generic PxQ multiplier */
|
||||||
|
#if defined(HAVE_INTEL_MULX)
|
||||||
|
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
|
||||||
|
|
||||||
|
{
|
||||||
|
int ix, iy, iz, pa;
|
||||||
|
fp_int tmp, *dst;
|
||||||
|
|
||||||
|
/* get size of output and trim */
|
||||||
|
pa = A->used + B->used;
|
||||||
|
if (pa >= FP_SIZE) {
|
||||||
|
pa = FP_SIZE-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (A == C || B == C) {
|
||||||
|
fp_init(&tmp);
|
||||||
|
dst = &tmp;
|
||||||
|
} else {
|
||||||
|
fp_zero(C);
|
||||||
|
dst = C;
|
||||||
|
}
|
||||||
|
|
||||||
|
TFM_INTEL_MUL_COMBA(A, B, dst) ;
|
||||||
|
|
||||||
|
dst->used = pa;
|
||||||
|
dst->sign = A->sign ^ B->sign;
|
||||||
|
fp_clamp(dst);
|
||||||
|
fp_copy(dst, C);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
|
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
|
||||||
{
|
{
|
||||||
int ix, iy, iz, tx, ty, pa;
|
int ix, iy, iz, tx, ty, pa;
|
||||||
@ -455,6 +485,7 @@ void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
|
|||||||
fp_clamp(dst);
|
fp_clamp(dst);
|
||||||
fp_copy(dst, C);
|
fp_copy(dst, C);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* a/b => cb + d == a */
|
/* a/b => cb + d == a */
|
||||||
int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
|
int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
|
||||||
@ -1525,6 +1556,19 @@ void fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
|
|||||||
#include "fp_mont_small.i"
|
#include "fp_mont_small.i"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_INTEL_MULX
|
||||||
|
static inline void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
|
||||||
|
{
|
||||||
|
fp_digit _c0, _c1, _c2, _c3, _c4, _c5, _c6, _c7, cy ;
|
||||||
|
|
||||||
|
cy = *cy_mulx ;
|
||||||
|
_c0=c_mulx[0]; _c1=c_mulx[1]; _c2=c_mulx[2]; _c3=c_mulx[3]; _c4=c_mulx[4]; _c5=c_mulx[5]; _c6=c_mulx[6]; _c7=c_mulx[7];
|
||||||
|
INNERMUL8_MULX ;
|
||||||
|
c_mulx[0]=_c0; c_mulx[1]=_c1; c_mulx[2]=_c2; c_mulx[3]=_c3; c_mulx[4]=_c4; c_mulx[5]=_c5; c_mulx[6]=_c6; c_mulx[7]=_c7;
|
||||||
|
*cy_mulx = cy ;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* computes x/R == x (mod N) via Montgomery Reduction */
|
/* computes x/R == x (mod N) via Montgomery Reduction */
|
||||||
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
||||||
{
|
{
|
||||||
@ -1565,12 +1609,15 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
|
|||||||
y = 0;
|
y = 0;
|
||||||
#if (defined(TFM_SSE2) || defined(TFM_X86_64))
|
#if (defined(TFM_SSE2) || defined(TFM_X86_64))
|
||||||
for (; y < (pa & ~7); y += 8) {
|
for (; y < (pa & ~7); y += 8) {
|
||||||
INNERMUL8;
|
#ifdef HAVE_INTEL_MULX
|
||||||
|
innermul8_mulx(_c, &cy, tmpm, mu) ;
|
||||||
|
#else
|
||||||
|
INNERMUL8 ;
|
||||||
|
#endif
|
||||||
_c += 8;
|
_c += 8;
|
||||||
tmpm += 8;
|
tmpm += 8;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (; y < pa; y++) {
|
for (; y < pa; y++) {
|
||||||
INNERMUL;
|
INNERMUL;
|
||||||
++_c;
|
++_c;
|
||||||
|
Reference in New Issue
Block a user