fixup intel avx request

This commit is contained in:
toddouska
2015-03-26 14:26:25 -07:00
parent 4bd5dfe04a
commit 22afce8609
3 changed files with 40 additions and 32 deletions

View File

@@ -771,6 +771,7 @@ static int wc_InitRng_IntelRD()
#if defined(HAVE_HASHDRBG) || defined(NO_RC4)
/* return 0 on success */
static inline int IntelRDseed32(unsigned int *seed)
{
int rdseed; unsigned char ok ;
@@ -783,18 +784,22 @@ static inline int IntelRDseed32(unsigned int *seed)
return 1;
}
/* return 0 on success */
static inline int IntelRDseed32_r(unsigned int *rnd)
{
int i ;
for(i=0; i<INTELRD_RETRY;i++)
if(IntelRDseed32(rnd))return 0 ;
for(i=0; i<INTELRD_RETRY;i++) {
if(IntelRDseed32(rnd) == 0) return 0 ;
}
return 1 ;
}
/* return 0 on success */
static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
{
(void) os ;
int ret ; byte buff[4] ;
int ret ;
unsigned int rndTmp ;
for( ; sz/4 > 0; sz-=4, output+=4) {
if(IS_INTEL_RDSEED)ret = IntelRDseed32_r((word32 *)output) ;
@@ -804,36 +809,44 @@ static int wc_GenerateSeed_IntelRD(OS_Seed* os, byte* output, word32 sz)
}
if(sz == 0)return 0 ;
if(IS_INTEL_RDSEED)ret = IntelRDseed32_r((word32 *)buff) ;
if(IS_INTEL_RDSEED)ret = IntelRDseed32_r(&rndTmp) ;
else return 1 ;
if(ret)
return 1 ;
XMEMCPY(output, buff, sz) ;
XMEMCPY(output, &rndTmp, sz) ;
return 0;
}
#else
/* return 0 on success */
static inline int IntelRDrand32(unsigned int *rnd)
{
int rdrand; unsigned char ok ;
__asm__ volatile("rdrand %0; setc %1":"=r"(rdrand), "=qm"(ok));
*rnd = rdrand ;
return ok ;
if(ok){
*rnd = rdrand;
return 0 ;
} else
return 1;
}
/* return 0 on success */
static inline int IntelRDrand32_r(unsigned int *rnd)
{
int i ;
for(i=0; i<INTELRD_RETRY;i++)
if(IntelRDrand32(rnd))return 0 ;
for(i=0; i<INTELRD_RETRY;i++) {
if(IntelRDrand32(rnd) == 0) return 0 ;
}
return 1 ;
}
/* return 0 on success */
static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz)
{
(void) os ;
int ret ; byte buff[4] ;
int ret ;
unsigned int rndTmp;
for( ; sz/4 > 0; sz-=4, output+=4) {
if(IS_INTEL_RDRAND)ret = IntelRDrand32_r((word32 *)output);
@@ -843,11 +856,11 @@ static int wc_GenerateRand_IntelRD(OS_Seed* os, byte* output, word32 sz)
}
if(sz == 0)return 0 ;
if(IS_INTEL_RDRAND)ret = IntelRDrand32_r((word32 *)buff);
if(IS_INTEL_RDRAND)ret = IntelRDrand32_r(&rndTmp);
else return 1 ;
if(ret)
return 1 ;
XMEMCPY(output, buff, sz) ;
XMEMCPY(output, &rndTmp, sz) ;
return 0;
}
#endif /* defined(HAVE_HASHDRBG) || defined(NO_RC4) */
@@ -1145,18 +1158,6 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
return 0;
}
#elif defined(HAVE_INTEL_RDGEN) && (defined(HAVE_HASHDRBG) || defined(NO_RC4))
int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
(void) os ;
wc_InitRng_IntelRD() ; /* set cpuid_flags if not yet */
if(IS_INTEL_RDSEED)
return wc_GenerateSeed_IntelRD(NULL, output, sz) ;
else return 1 ;
}
#elif defined(CUSTOM_RAND_GENERATE)
/* Implement your own random generation function
@@ -1192,6 +1193,13 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz)
{
int ret = 0;
#if defined(HAVE_INTEL_RDGEN) && (defined(HAVE_HASHDRBG) || defined(NO_RC4))
wc_InitRng_IntelRD() ; /* set cpuid_flags if not yet */
if(IS_INTEL_RDSEED)
return wc_GenerateSeed_IntelRD(NULL, output, sz) ;
#endif
os->fd = open("/dev/urandom",O_RDONLY);
if (os->fd == -1) {
/* may still have /dev/random */

View File

@@ -956,7 +956,8 @@ __asm__ volatile("movl %r8d, "#h"\n\t"); \
#define W_K_from_buff\
{ word64 _buff[2] ;/* X0..3(xmm4..7) = sha256->buffer[0.15]; */\
{ __attribute__ ((aligned (32))) word64 _buff[2] ; \
/* X0..3(xmm4..7) = sha256->buffer[0.15]; */\
_buff[0] = *(word64*)&sha256->buffer[0] ;\
_buff[1] = *(word64*)&sha256->buffer[2] ;\
__asm__ volatile("vmovaps %0, %%xmm4\n\t"\
@@ -1233,7 +1234,7 @@ static int Transform_AVX1_RORX(Sha256* sha256)
__asm__ volatile("vmovdqu %0, %%ymm11 \n\t"::"m"(reg[7][0]):YMM_REGs);\
}
#if DEBUG
#ifdef DEBUG_XMM
#define DUMP_REG(REG) _DUMP_REG(REG, #REG)
#define DUMP_REG2(REG) _DUMP_REG(REG, #REG)

View File

@@ -97,7 +97,6 @@ int wc_Sha384Hash(const byte* data, word32 len, byte* out)
#endif /* min */
//#undef USE_INTEL_SPEEDUP
#if defined(USE_INTEL_SPEEDUP)
#define HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX2
@@ -292,10 +291,11 @@ static void set_Transform(void) {
#endif
/* Dummy for saving MM_REGs on behalf of Transform */
//#if defined(HAVE_INTEL_AVX2)
//#define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
// "%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
// "%ymm12","%ymm13","%ymm14","%ymm15")
/* #if defined(HAVE_INTEL_AVX2)
#define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
"%ymm0","%ymm1","%ymm2","%ymm3","%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11",\
"%ymm12","%ymm13","%ymm14","%ymm15")
*/
#if defined(HAVE_INTEL_AVX1)
#define SAVE_XMM_YMM __asm__ volatile("orq %%r8, %%r8":::\
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
@@ -771,7 +771,6 @@ static word64 mBYTE_FLIP_MASK[] = { 0x0001020304050607, 0x08090a0b0c0d0e0f } ;
#define s0_(dest, src) s0_1(dest, src) ; s0_2(dest, src) ; s0_3(dest, src)
#define s1_(dest, src) s1_1(dest, src) ; s1_2(dest, src) ; s1_3(dest, src)
//static word64 W_X[16+4];
#define Block_xx_1(i) \
MOVE_to_REG(W_I_15, W_X[(i-15)&15]) ;\
MOVE_to_REG(W_I_7, W_X[(i- 7)&15]) ;\