From bd5fc1712f84916ee372ccb254cb2bea56b06f6c Mon Sep 17 00:00:00 2001 From: Takashi Kojo Date: Sun, 29 Mar 2015 19:39:03 +0900 Subject: [PATCH] fixed sha256/AVX1 crash with --enable-debug: missing register def in inline asm destroy register rorx removed when non-AVX2 build . cleaned set_Transform --- wolfcrypt/src/sha256.c | 79 +++++++++++++++++++----------------------- wolfcrypt/src/sha512.c | 34 +++++++++--------- 2 files changed, 54 insertions(+), 59 deletions(-) diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index d3eac5253..ce02bb551 100755 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -53,7 +53,21 @@ int wc_Sha256Hash(const byte* data, word32 len, byte* out) { return Sha256Hash(data, len, out); } + #else /* else build without fips */ + +#if !defined (ALIGN32) + #if defined (__GNUC__) + #define ALIGN32 __attribute__ ( (aligned (32))) + #elif defined(_MSC_VER) + /* disable align warning, we want alignment ! */ + #pragma warning(disable: 4324) + #define ALIGN32 __declspec (align (32)) + #else + #define ALIGN32 + #endif +#endif + #ifdef WOLFSSL_PIC32MZ_HASH #define wc_InitSha256 wc_InitSha256_sw #define wc_Sha256Update wc_Sha256Update_sw @@ -189,14 +203,16 @@ static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { return 0 ; } -static void set_cpuid_flags(void) { +static int set_cpuid_flags(void) { if(cpuid_check==0) { - if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ; } - if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; } - if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ;} - if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ;} - cpuid_check = 1 ; - } + if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;} + if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; } + if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; } + if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; } + cpuid_check = 1 ; + return 0 ; + } + return 1 ; } /* #if defined(HAVE_INTEL_AVX1/2) at the tail of sha512 */ @@ -215,16 +231,19 @@ static int (*Transform_p)(Sha256* sha256) /* = _Transform */; #define XTRANSFORM(sha256, B) (*Transform_p)(sha256) static void set_Transform(void) { - set_cpuid_flags() ; + if(set_cpuid_flags())return ; #if defined(HAVE_INTEL_AVX2) - if(IS_INTEL_AVX2){ Transform_p = Transform_AVX1_RORX; return ; } - Transform_p = Transform_AVX2 ; /* for avoiding warning,"not used" */ + if(IS_INTEL_AVX2){ + Transform_p = Transform_AVX1_RORX; return ; + Transform_p = Transform_AVX2 ; + /* for avoiding warning,"not used" */ + } #endif #if defined(HAVE_INTEL_AVX1) Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : Transform) ; return ; #endif - Transform_p = Transform ; + Transform_p = Transform ; return ; } #else @@ -237,10 +256,10 @@ static void set_Transform(void) { /* Dummy for saving MM_REGs on behalf of Transform */ #if defined(HAVE_INTEL_AVX2)&& !defined(HAVE_INTEL_AVX1) -#define SAVE_XMM_YMM __asm__ volatile("vpxor %%ymm7, %%ymm7, %%ymm7":::\ +#define SAVE_XMM_YMM __asm__ volatile("or %%r8, %%r8":::\ "%ymm4","%ymm5","%ymm6","%ymm7","%ymm8","%ymm9","%ymm10","%ymm11","%ymm12","%ymm13","%ymm14","%ymm15") #elif defined(HAVE_INTEL_AVX1) -#define SAVE_XMM_YMM __asm__ volatile("vpxor %%xmm7, %%xmm7, %%xmm7":::\ +#define SAVE_XMM_YMM __asm__ volatile("or %%r8, %%r8":::\ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10",\ "xmm11","xmm12","xmm13","xmm14","xmm15") #else @@ -304,7 +323,7 @@ int wc_InitSha256(Sha256* sha256) #if !defined(FREESCALE_MMCAU) -static const __attribute__((aligned(32))) word32 K[64] = { +static const ALIGN32 word32 K[64] = { 0x428A2F98L, 0x71374491L, 0xB5C0FBCFL, 0xE9B5DBA5L, 0x3956C25BL, 0x59F111F1L, 0x923F82A4L, 0xAB1C5ED5L, 0xD807AA98L, 0x12835B01L, 0x243185BEL, 0x550C7DC3L, 0x72BE5D74L, 0x80DEB1FEL, 0x9BDC06A7L, @@ -322,25 +341,6 @@ static const __attribute__((aligned(32))) word32 K[64] = { #endif -#if defined(HAVE_INTEL_RORX) -#define ROTR(func, bits, x) \ -word32 func(word32 x) { word32 ret ;\ - __asm__ ("rorx $"#bits", %1, %0\n\t":"=r"(ret):"r"(x):) ;\ - return ret ;\ -} - -static INLINE ROTR(rotrFixed_2, 2, x) -static INLINE ROTR(rotrFixed_13, 13, x) -static INLINE ROTR(rotrFixed_22, 22, x) -static INLINE ROTR(rotrFixed_6, 6, x) -static INLINE ROTR(rotrFixed_11, 11, x) -static INLINE ROTR(rotrFixed_25, 25, x) -static INLINE ROTR(rotrFixed_7, 7, x) -static INLINE ROTR(rotrFixed_18, 18, x) -static INLINE ROTR(rotrFixed_17, 17, x) -static INLINE ROTR(rotrFixed_19, 19, x) -#endif - #if defined(FREESCALE_MMCAU) static int Transform(Sha256* sha256, byte* buf) @@ -356,18 +356,11 @@ static int Transform(Sha256* sha256, byte* buf) #define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y))) #define R(x, n) (((x)&0xFFFFFFFFU)>>(n)) -#if !defined(HAVE_INTEL_RORX) #define S(x, n) rotrFixed(x, n) #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) -#else -#define Sigma0(x) (rotrFixed_2(x) ^ rotrFixed_13(x) ^ rotrFixed_22(x)) -#define Sigma1(x) (rotrFixed_6(x) ^ rotrFixed_11(x) ^ rotrFixed_25(x)) -#define Gamma0(x) (rotrFixed_7(x) ^ rotrFixed_18(x) ^ R(x, 3)) -#define Gamma1(x) (rotrFixed_17(x) ^ rotrFixed_19(x) ^ R(x, 10)) -#endif #define RND(a,b,c,d,e,f,g,h,i) \ t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \ @@ -620,7 +613,7 @@ int wc_Sha256Hash(const byte* data, word32 len, byte* hash) #define S_6 %ebx #define S_7 %r9d -#define SSE_REGs "%esi", "%r8", "%edx", "%ebx","%r9","%r10","%r11","%r12","%r13","%r14","%r15" +#define SSE_REGs "%edi", "%ecx", "%esi", "%edx", "%ebx","%r8","%r9","%r10","%r11","%r12","%r13","%r14","%r15" #if defined(HAVE_INTEL_RORX) #define RND_STEP_RORX_1(a,b,c,d,e,f,g,h,i)\ @@ -718,7 +711,7 @@ __asm__ volatile("addl %"#h", %%r8d\n\t":::"%r8",SSE_REGs); \ /* r8b = h + w_k + Sigma1(e) + Ch(e,f,g) + Maj(a,b,c) */\ __asm__ volatile("addl %%edx, %%r8d\n\t":::"%edx","%r8",SSE_REGs);\ /* r8b = h + w_k + Sigma1(e) Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */\ -__asm__ volatile("movl %r8d, "#h"\n\t"); \ +__asm__ volatile("movl %%r8d, %"#h"\n\t":::"%r8", SSE_REGs); \ /* h = h + w_k + Sigma1(e) + Sigma0(a) + Ch(e,f,g) + Maj(a,b,c) */ \ #define RND_X(a,b,c,d,e,f,g,h,i) \ @@ -954,7 +947,7 @@ __asm__ volatile("movl %r8d, "#h"\n\t"); \ #define W_K_from_buff\ - { __attribute__ ((aligned (32))) word64 _buff[2] ; \ + { ALIGN32 word64 _buff[2] ; \ /* X0..3(xmm4..7) = sha256->buffer[0.15]; */\ _buff[0] = *(word64*)&sha256->buffer[0] ;\ _buff[1] = *(word64*)&sha256->buffer[2] ;\ diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index 92ade5941..f77c8a2cf 100755 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -235,12 +235,16 @@ static word32 cpuid_flag(word32 leaf, word32 sub, word32 num, word32 bit) { return 0 ; } -static int set_cpuid_flags(void) { - if(cpuid_check==0) { +#define CHECK_SHA512 0x1 +#define CHECK_SHA384 0x2 + +static int set_cpuid_flags(int sha) { + if((cpuid_check & sha) ==0) { if(cpuid_flag(1, 0, ECX, 28)){ cpuid_flags |= CPUID_AVX1 ;} if(cpuid_flag(7, 0, EBX, 5)){ cpuid_flags |= CPUID_AVX2 ; } if(cpuid_flag(1, 0, ECX, 30)){ cpuid_flags |= CPUID_RDRAND ; } if(cpuid_flag(7, 0, EBX, 18)){ cpuid_flags |= CPUID_RDSEED ; } + cpuid_check |= sha ; return 0 ; } return 1 ; @@ -269,21 +273,19 @@ static int (*Transform_p)(Sha512* sha512) = _Transform ; #define Transform(sha512) (*Transform_p)(sha512) static void set_Transform(void) { - if(set_cpuid_flags()) return ; + if(set_cpuid_flags(CHECK_SHA512)) return ; -#if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) - Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; -#elif defined(HAVE_INTEL_AVX2) - #if defined(HAVE_INTEL_AVX1) && defined(HAVE_INTEL_RORX) - if(IS_INTEL_AVX2) { Transform_p = Transform_AVX1_RORX ; return ; } - #endif - if(IS_INTEL_AVX2) { Transform_p = Transform_AVX2 ; return ; } - #if defined(HAVE_INTEL_AVX1) - Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; - #endif -#else - Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; +#if defined(HAVE_INTEL_AVX2) + if(IS_INTEL_AVX2){ + Transform_p = Transform_AVX1_RORX; return ; + Transform_p = Transform_AVX2 ; + /* for avoiding warning,"not used" */ + } #endif +#if defined(HAVE_INTEL_AVX1) + Transform_p = ((IS_INTEL_AVX1) ? Transform_AVX1 : _Transform) ; return ; +#endif + Transform_p = _Transform ; return ; } #else @@ -1344,7 +1346,7 @@ static int (*Transform384_p)(Sha384* sha384) = _Transform384 ; #define Transform384(sha384) (*Transform384_p)(sha384) static void set_Transform384(void) { - if(set_cpuid_flags())return ; + if(set_cpuid_flags(CHECK_SHA384))return ; #if defined(HAVE_INTEL_AVX1) && !defined(HAVE_INTEL_AVX2) Transform384_p = ((IS_INTEL_AVX1) ? Transform384_AVX1 : _Transform384) ;