From f153e651020fb54a63e0cdda134a9dca088f1430 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Mon, 5 Sep 2022 11:38:40 +1000 Subject: [PATCH] SHA-3: check BMI1 availablity for ASM Added benchmarking of SHAKE128 and SHAK256. --- wolfcrypt/benchmark/benchmark.c | 218 +++++++++++++++++++++++++++++++- wolfcrypt/benchmark/benchmark.h | 2 + wolfcrypt/src/cpuid.c | 2 + wolfcrypt/src/evp.c | 16 +-- wolfcrypt/src/hash.c | 36 +++--- wolfcrypt/src/sha3.c | 6 +- wolfssl/wolfcrypt/cpuid.h | 2 + 7 files changed, 251 insertions(+), 31 deletions(-) diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index c31d2c222..6cb587bcc 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -405,9 +405,12 @@ #define BENCH_SHA3_512 0x00000800 #define BENCH_SHA3 (BENCH_SHA3_224 | BENCH_SHA3_256 | \ BENCH_SHA3_384 | BENCH_SHA3_512) -#define BENCH_RIPEMD 0x00001000 -#define BENCH_BLAKE2B 0x00002000 -#define BENCH_BLAKE2S 0x00004000 +#define BENCH_SHAKE128 0x00001000 +#define BENCH_SHAKE256 0x00002000 +#define BENCH_SHAKE (BENCH_SHAKE128 | BENCH_SHAKE256) +#define BENCH_RIPEMD 0x00004000 +#define BENCH_BLAKE2B 0x00008000 +#define BENCH_BLAKE2S 0x00010000 /* MAC algorithms. */ #define BENCH_CMAC 0x00000001 @@ -620,6 +623,15 @@ static const bench_alg bench_digest_opt[] = { #ifndef WOLFSSL_NOSHA3_512 { "-sha3-512", BENCH_SHA3_512 }, #endif + #if !defined(WOLFSSL_NO_SHAKE128) || !defined(WOLFSSL_NO_SHAKE256) + { "-shake", BENCH_SHAKE }, + #endif + #ifndef WOLFSSL_NO_SHAKE128 + { "-shake128", BENCH_SHAKE128 }, + #endif + #ifndef WOLFSSL_NO_SHAKE256 + { "-shake256", BENCH_SHAKE256 }, + #endif #endif #ifdef WOLFSSL_RIPEMD { "-ripemd", BENCH_RIPEMD }, @@ -2065,6 +2077,28 @@ static void* benchmarks_do(void* args) #endif } #endif /* WOLFSSL_NOSHA3_512 */ + #ifndef WOLFSSL_NO_SHAKE128 + if (bench_all || (bench_digest_algs & BENCH_SHAKE128)) { + #ifndef NO_SW_BENCH + bench_shake128(0); + #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) && \ + !defined(NO_HW_BENCH) + bench_shake128(1); + #endif + } + #endif /* WOLFSSL_NO_SHAKE128 */ + #ifndef WOLFSSL_NO_SHAKE256 + if (bench_all || (bench_digest_algs & BENCH_SHAKE256)) { + #ifndef NO_SW_BENCH + bench_shake256(0); + #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_SHA3) && \ + !defined(NO_HW_BENCH) + bench_shake256(1); + #endif + } + #endif /* WOLFSSL_NO_SHAKE256 */ #endif #ifdef WOLFSSL_RIPEMD if (bench_all || (bench_digest_algs & BENCH_RIPEMD)) @@ -4645,6 +4679,184 @@ exit: WC_FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT); } #endif /* WOLFSSL_NOSHA3_512 */ + +#ifndef WOLFSSL_NO_SHAKE128 +void bench_shake128(int useDeviceID) +{ + wc_Shake hash[BENCH_MAX_PENDING]; + double start; + int ret = 0, i, count = 0, times, pending = 0; + WC_DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_128_BLOCK_SIZE, HEAP_HINT); + WC_INIT_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_128_BLOCK_SIZE, HEAP_HINT); + + /* clear for done cleanup */ + XMEMSET(hash, 0, sizeof(hash)); + + if (digest_stream) { + /* init keys */ + for (i = 0; i < BENCH_MAX_PENDING; i++) { + ret = wc_InitShake128(&hash[i], HEAP_HINT, + useDeviceID ? devId : INVALID_DEVID); + if (ret != 0) { + printf("InitShake128 failed, ret = %d\n", ret); + goto exit; + } + } + + bench_stats_start(&count, &start); + do { + for (times = 0; times < numBlocks || pending > 0; ) { + bench_async_poll(&pending); + + /* while free pending slots in queue, submit ops */ + for (i = 0; i < BENCH_MAX_PENDING; i++) { + if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), + 0, ×, numBlocks, &pending)) { + ret = wc_Shake128_Update(&hash[i], bench_plain, + BENCH_SIZE); + if (!bench_async_handle(&ret, + BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, &pending)) { + goto exit_shake128; + } + } + } /* for i */ + } /* for times */ + count += times; + + times = 0; + do { + bench_async_poll(&pending); + for (i = 0; i < BENCH_MAX_PENDING; i++) { + if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), + 0, ×, numBlocks, &pending)) { + ret = wc_Shake128_Final(&hash[i], digest[i], + WC_SHA3_128_BLOCK_SIZE); + if (!bench_async_handle(&ret, + BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, &pending)) { + goto exit_shake128; + } + } + } /* for i */ + } while (pending > 0); + } while (bench_stats_sym_check(start)); + } + else { + bench_stats_start(&count, &start); + do { + for (times = 0; times < numBlocks; times++) { + ret = wc_InitShake128(hash, HEAP_HINT, INVALID_DEVID); + if (ret == 0) + ret = wc_Shake128_Update(hash, bench_plain, BENCH_SIZE); + if (ret == 0) + ret = wc_Shake128_Final(hash, digest[0], + WC_SHA3_128_BLOCK_SIZE); + if (ret != 0) + goto exit_shake128; + } /* for times */ + count += times; + } while (bench_stats_sym_check(start)); + } +exit_shake128: + bench_stats_sym_finish("SHAKE128", useDeviceID, count, bench_size, start, ret); + +exit: + + for (i = 0; i < BENCH_MAX_PENDING; i++) { + wc_Shake128_Free(&hash[i]); + } + + WC_FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT); +} +#endif /* WOLFSSL_NO_SHAKE128 */ + +#ifndef WOLFSSL_NO_SHAKE256 +void bench_shake256(int useDeviceID) +{ + wc_Shake hash[BENCH_MAX_PENDING]; + double start; + int ret = 0, i, count = 0, times, pending = 0; + WC_DECLARE_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_256_BLOCK_SIZE, HEAP_HINT); + WC_INIT_ARRAY(digest, byte, BENCH_MAX_PENDING, WC_SHA3_256_BLOCK_SIZE, HEAP_HINT); + + /* clear for done cleanup */ + XMEMSET(hash, 0, sizeof(hash)); + + if (digest_stream) { + /* init keys */ + for (i = 0; i < BENCH_MAX_PENDING; i++) { + ret = wc_InitShake256(&hash[i], HEAP_HINT, + useDeviceID ? devId : INVALID_DEVID); + if (ret != 0) { + printf("InitShake256 failed, ret = %d\n", ret); + goto exit; + } + } + + bench_stats_start(&count, &start); + do { + for (times = 0; times < numBlocks || pending > 0; ) { + bench_async_poll(&pending); + + /* while free pending slots in queue, submit ops */ + for (i = 0; i < BENCH_MAX_PENDING; i++) { + if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), + 0, ×, numBlocks, &pending)) { + ret = wc_Shake256_Update(&hash[i], bench_plain, + BENCH_SIZE); + if (!bench_async_handle(&ret, + BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, &pending)) { + goto exit_shake256; + } + } + } /* for i */ + } /* for times */ + count += times; + + times = 0; + do { + bench_async_poll(&pending); + for (i = 0; i < BENCH_MAX_PENDING; i++) { + if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&hash[i]), + 0, ×, numBlocks, &pending)) { + ret = wc_Shake256_Final(&hash[i], digest[i], + WC_SHA3_256_BLOCK_SIZE); + if (!bench_async_handle(&ret, + BENCH_ASYNC_GET_DEV(&hash[i]), 0, ×, &pending)) { + goto exit_shake256; + } + } + } /* for i */ + } while (pending > 0); + } while (bench_stats_sym_check(start)); + } + else { + bench_stats_start(&count, &start); + do { + for (times = 0; times < numBlocks; times++) { + ret = wc_InitShake256(hash, HEAP_HINT, INVALID_DEVID); + if (ret == 0) + ret = wc_Shake256_Update(hash, bench_plain, BENCH_SIZE); + if (ret == 0) + ret = wc_Shake256_Final(hash, digest[0], + WC_SHA3_256_BLOCK_SIZE); + if (ret != 0) + goto exit_shake256; + } /* for times */ + count += times; + } while (bench_stats_sym_check(start)); + } +exit_shake256: + bench_stats_sym_finish("SHAKE256", useDeviceID, count, bench_size, start, ret); + +exit: + + for (i = 0; i < BENCH_MAX_PENDING; i++) { + wc_Shake256_Free(&hash[i]); + } + + WC_FREE_ARRAY(digest, BENCH_MAX_PENDING, HEAP_HINT); +} +#endif /* WOLFSSL_NO_SHAKE256 */ #endif diff --git a/wolfcrypt/benchmark/benchmark.h b/wolfcrypt/benchmark/benchmark.h index 2cc16d14c..e24f073db 100644 --- a/wolfcrypt/benchmark/benchmark.h +++ b/wolfcrypt/benchmark/benchmark.h @@ -68,6 +68,8 @@ void bench_sha3_224(int useDeviceID); void bench_sha3_256(int useDeviceID); void bench_sha3_384(int useDeviceID); void bench_sha3_512(int useDeviceID); +void bench_shake128(int useDeviceID); +void bench_shake256(int useDeviceID); int bench_ripemd(void); void bench_cmac(void); void bench_scrypt(void); diff --git a/wolfcrypt/src/cpuid.c b/wolfcrypt/src/cpuid.c index 5f5b05a99..676f76b45 100644 --- a/wolfcrypt/src/cpuid.c +++ b/wolfcrypt/src/cpuid.c @@ -97,6 +97,8 @@ if (cpuid_flag(1, 0, ECX, 25)) { cpuid_flags |= CPUID_AESNI ; } if (cpuid_flag(7, 0, EBX, 19)) { cpuid_flags |= CPUID_ADX ; } if (cpuid_flag(1, 0, ECX, 22)) { cpuid_flags |= CPUID_MOVBE ; } + if (cpuid_flag(7, 0, EBX, 3)) { cpuid_flags |= CPUID_BMI1 ; } + cpuid_check = 1; } } diff --git a/wolfcrypt/src/evp.c b/wolfcrypt/src/evp.c index 3fa982ef4..4559bc795 100644 --- a/wolfcrypt/src/evp.c +++ b/wolfcrypt/src/evp.c @@ -4864,10 +4864,10 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD* type) case WC_HASH_TYPE_MD5_SHA: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif default: @@ -5391,10 +5391,10 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD* type) case WC_HASH_TYPE_MD5_SHA: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif default: @@ -7274,10 +7274,10 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD* type) case WC_HASH_TYPE_MD5_SHA: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif default: @@ -7387,10 +7387,10 @@ int wolfSSL_EVP_MD_type(const WOLFSSL_EVP_MD* type) case WC_HASH_TYPE_MD5_SHA: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif default: diff --git a/wolfcrypt/src/hash.c b/wolfcrypt/src/hash.c index 6a4687eda..9048d00b4 100644 --- a/wolfcrypt/src/hash.c +++ b/wolfcrypt/src/hash.c @@ -396,10 +396,10 @@ int wc_HashGetDigestSize(enum wc_HashType hash_type) break; /* Not Supported */ - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -509,10 +509,10 @@ int wc_HashGetBlockSize(enum wc_HashType hash_type) break; /* Not Supported */ - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -627,10 +627,10 @@ int wc_Hash(enum wc_HashType hash_type, const byte* data, case WC_HASH_TYPE_MD4: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -725,10 +725,10 @@ int wc_HashInit_ex(wc_HashAlg* hash, enum wc_HashType type, void* heap, case WC_HASH_TYPE_MD4: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -831,10 +831,10 @@ int wc_HashUpdate(wc_HashAlg* hash, enum wc_HashType type, const byte* data, case WC_HASH_TYPE_MD4: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -928,10 +928,10 @@ int wc_HashFinal(wc_HashAlg* hash, enum wc_HashType type, byte* out) case WC_HASH_TYPE_MD4: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -1037,10 +1037,10 @@ int wc_HashFree(wc_HashAlg* hash, enum wc_HashType type) case WC_HASH_TYPE_MD4: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: @@ -1113,10 +1113,10 @@ int wc_HashSetFlags(wc_HashAlg* hash, enum wc_HashType type, word32 flags) case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: case WC_HASH_TYPE_NONE: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif default: @@ -1185,10 +1185,10 @@ int wc_HashGetFlags(wc_HashAlg* hash, enum wc_HashType type, word32* flags) case WC_HASH_TYPE_MD4: case WC_HASH_TYPE_BLAKE2B: case WC_HASH_TYPE_BLAKE2S: - #ifndef WOLFSSL_NO_SHAKE128 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE128) case WC_HASH_TYPE_SHAKE128: #endif - #ifndef WOLFSSL_NO_SHAKE256 + #if defined(WOLFSSL_SHA3) && defined(WOLFSSL_SHAKE256) case WC_HASH_TYPE_SHAKE256: #endif case WC_HASH_TYPE_NONE: diff --git a/wolfcrypt/src/sha3.c b/wolfcrypt/src/sha3.c index c3b595135..8804bb38b 100644 --- a/wolfcrypt/src/sha3.c +++ b/wolfcrypt/src/sha3.c @@ -614,7 +614,7 @@ static int InitSha3(wc_Sha3* sha3) if (!cpuid_flags_set) { cpuid_flags = cpuid_get_flags(); cpuid_flags_set = 1; - if (IS_INTEL_BMI2(cpuid_flags)) { + if (IS_INTEL_BMI1(cpuid_flags) && IS_INTEL_BMI2(cpuid_flags)) { sha3_block = sha3_block_bmi2; sha3_block_n = sha3_block_n_bmi2; } @@ -719,7 +719,9 @@ static int Sha3Final(wc_Sha3* sha3, byte padChar, byte* hash, byte p, word32 l) #endif sha3->t[sha3->i ] = padChar; sha3->t[rate - 1] |= 0x80; - XMEMSET(sha3->t + sha3->i + 1, 0, rate - 1 - sha3->i - 1); + if (rate - 1 > (word32)sha3->i + 1) { + XMEMSET(sha3->t + sha3->i + 1, 0, rate - 1 - (sha3->i + 1)); + } for (i = 0; i < p; i++) { sha3->s[i] ^= Load64BitBigEndian(sha3->t + 8 * i); } diff --git a/wolfssl/wolfcrypt/cpuid.h b/wolfssl/wolfcrypt/cpuid.h index 52692db07..c7638f0af 100644 --- a/wolfssl/wolfcrypt/cpuid.h +++ b/wolfssl/wolfcrypt/cpuid.h @@ -49,6 +49,7 @@ #define CPUID_AESNI 0x0020 #define CPUID_ADX 0x0040 /* ADCX, ADOX */ #define CPUID_MOVBE 0x0080 /* Move and byte swap */ + #define CPUID_BMI1 0x0100 /* ANDN */ #define IS_INTEL_AVX1(f) ((f) & CPUID_AVX1) #define IS_INTEL_AVX2(f) ((f) & CPUID_AVX2) @@ -58,6 +59,7 @@ #define IS_INTEL_AESNI(f) ((f) & CPUID_AESNI) #define IS_INTEL_ADX(f) ((f) & CPUID_ADX) #define IS_INTEL_MOVBE(f) ((f) & CPUID_MOVBE) + #define IS_INTEL_BMI1(f) ((f) & CPUID_BMI1) #endif