Merge pull request #8826 from douzzer/20250530-ML-KEM-WC_SHA3_NO_ASM

20250530-ML-KEM-WC_SHA3_NO_ASM
This commit is contained in:
philljj
2025-05-30 16:25:48 -05:00
committed by GitHub
7 changed files with 56 additions and 26 deletions

View File

@@ -35,7 +35,10 @@ jobs:
name: Checkout wolfSSL name: Checkout wolfSSL
- name: install_multilib - name: install_multilib
run: sudo apt-get install -y gcc-multilib run: |
export DEBIAN_FRONTEND=noninteractive
sudo apt-get update
sudo apt-get install -y gcc-multilib
- name: Build wolfCrypt with extra type conversion warnings - name: Build wolfCrypt with extra type conversion warnings
run: | run: |

View File

@@ -1502,6 +1502,9 @@ do
ml-kem) ml-kem)
ENABLED_ML_KEM=yes ENABLED_ML_KEM=yes
;; ;;
noasm)
AM_CFLAGS="$AM_CFLAGS -DWC_MLKEM_NO_ASM"
;;
*) *)
AC_MSG_ERROR([Invalid choice for MLKEM []: $ENABLED_MLKEM.]) AC_MSG_ERROR([Invalid choice for MLKEM []: $ENABLED_MLKEM.])
break;; break;;

View File

@@ -143,6 +143,7 @@ $(obj)/wolfcrypt/src/chacha_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/poly1305_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) $(obj)/wolfcrypt/src/poly1305_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/poly1305_asm.o: OBJECT_FILES_NON_STANDARD := y $(obj)/wolfcrypt/src/poly1305_asm.o: OBJECT_FILES_NON_STANDARD := y
$(obj)/wolfcrypt/src/wc_mlkem_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE) $(obj)/wolfcrypt/src/wc_mlkem_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
$(obj)/wolfcrypt/src/wc_mlkem_asm.o: OBJECT_FILES_NON_STANDARD := y
ifndef READELF ifndef READELF
READELF := readelf READELF := readelf

View File

@@ -248,9 +248,6 @@ while (0)
* *
* s The state. * s The state.
*/ */
#ifndef USE_INTEL_SPEEDUP
static
#endif
void BlockSha3(word64* s) void BlockSha3(word64* s)
{ {
byte i, x, y; byte i, x, y;
@@ -541,9 +538,6 @@ while (0)
* *
* s The state. * s The state.
*/ */
#ifndef USE_INTEL_SPEEDUP
static
#endif
void BlockSha3(word64* s) void BlockSha3(word64* s)
{ {
word64 n[25]; word64 n[25];

View File

@@ -65,6 +65,12 @@
#include <wolfssl/wolfcrypt/libwolfssl_sources.h> #include <wolfssl/wolfcrypt/libwolfssl_sources.h>
#ifdef WC_MLKEM_NO_ASM
#undef USE_INTEL_SPEEDUP
#undef WOLFSSL_ARMASM
#undef WOLFSSL_RISCV_ASM
#endif
#include <wolfssl/wolfcrypt/mlkem.h> #include <wolfssl/wolfcrypt/mlkem.h>
#include <wolfssl/wolfcrypt/wc_mlkem.h> #include <wolfssl/wolfcrypt/wc_mlkem.h>
#include <wolfssl/wolfcrypt/hash.h> #include <wolfssl/wolfcrypt/hash.h>

View File

@@ -69,6 +69,12 @@
#include <wolfssl/wolfcrypt/libwolfssl_sources.h> #include <wolfssl/wolfcrypt/libwolfssl_sources.h>
#ifdef WC_MLKEM_NO_ASM
#undef USE_INTEL_SPEEDUP
#undef WOLFSSL_ARMASM
#undef WOLFSSL_RISCV_ASM
#endif
#include <wolfssl/wolfcrypt/wc_mlkem.h> #include <wolfssl/wolfcrypt/wc_mlkem.h>
#include <wolfssl/wolfcrypt/cpuid.h> #include <wolfssl/wolfcrypt/cpuid.h>
@@ -2481,6 +2487,7 @@ static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed)
XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5)); XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5));
state[20] = W64LIT(0x8000000000000000); state[20] = W64LIT(0x8000000000000000);
for (i = 0; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) { for (i = 0; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) {
#ifndef WC_SHA3_NO_ASM
if (IS_INTEL_BMI2(cpuid_flags)) { if (IS_INTEL_BMI2(cpuid_flags)) {
sha3_block_bmi2(state); sha3_block_bmi2(state);
} }
@@ -2489,13 +2496,16 @@ static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed)
sha3_block_avx2(state); sha3_block_avx2(state);
RESTORE_VECTOR_REGISTERS(); RESTORE_VECTOR_REGISTERS();
} }
else { else
#endif /* !WC_SHA3_NO_ASM */
{
BlockSha3(state); BlockSha3(state);
} }
XMEMCPY(rand + i, state, SHA3_128_BYTES); XMEMCPY(rand + i, state, SHA3_128_BYTES);
} }
ctr0 = mlkem_rej_uniform_n_avx2(a, MLKEM_N, rand, GEN_MATRIX_SIZE); ctr0 = mlkem_rej_uniform_n_avx2(a, MLKEM_N, rand, GEN_MATRIX_SIZE);
while (ctr0 < MLKEM_N) { while (ctr0 < MLKEM_N) {
#ifndef WC_SHA3_NO_ASM
if (IS_INTEL_BMI2(cpuid_flags)) { if (IS_INTEL_BMI2(cpuid_flags)) {
sha3_block_bmi2(state); sha3_block_bmi2(state);
} }
@@ -2504,7 +2514,9 @@ static int mlkem_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed)
sha3_block_avx2(state); sha3_block_avx2(state);
RESTORE_VECTOR_REGISTERS(); RESTORE_VECTOR_REGISTERS();
} }
else { else
#endif /* !WC_SHA3_NO_ASM */
{
BlockSha3(state); BlockSha3(state);
} }
XMEMCPY(rand, state, SHA3_128_BYTES); XMEMCPY(rand, state, SHA3_128_BYTES);
@@ -3054,6 +3066,7 @@ static int mlkem_prf(wc_Shake* shake256, byte* out, unsigned int outLen,
unsigned int len = min(outLen, WC_SHA3_256_BLOCK_SIZE); unsigned int len = min(outLen, WC_SHA3_256_BLOCK_SIZE);
/* Perform a block operation on the state for next block of output. */ /* Perform a block operation on the state for next block of output. */
#ifndef WC_SHA3_NO_ASM
if (IS_INTEL_BMI2(cpuid_flags)) { if (IS_INTEL_BMI2(cpuid_flags)) {
sha3_block_bmi2(state); sha3_block_bmi2(state);
} }
@@ -3062,7 +3075,9 @@ static int mlkem_prf(wc_Shake* shake256, byte* out, unsigned int outLen,
sha3_block_avx2(state); sha3_block_avx2(state);
RESTORE_VECTOR_REGISTERS(); RESTORE_VECTOR_REGISTERS();
} }
else { else
#endif /* !WC_SHA3_NO_ASM */
{
BlockSha3(state); BlockSha3(state);
} }
@@ -3109,6 +3124,7 @@ int mlkem_kdf(byte* seed, int seedLen, byte* out, int outLen)
XMEMSET(state + len64 + 1, 0, (25 - len64 - 1) * sizeof(word64)); XMEMSET(state + len64 + 1, 0, (25 - len64 - 1) * sizeof(word64));
state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000); state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000);
#ifndef WC_SHA3_NO_ASM
if (IS_INTEL_BMI2(cpuid_flags)) { if (IS_INTEL_BMI2(cpuid_flags)) {
sha3_block_bmi2(state); sha3_block_bmi2(state);
} }
@@ -3116,7 +3132,9 @@ int mlkem_kdf(byte* seed, int seedLen, byte* out, int outLen)
sha3_block_avx2(state); sha3_block_avx2(state);
RESTORE_VECTOR_REGISTERS(); RESTORE_VECTOR_REGISTERS();
} }
else { else
#endif
{
BlockSha3(state); BlockSha3(state);
} }
XMEMCPY(out, state, outLen); XMEMCPY(out, state, outLen);
@@ -4121,6 +4139,7 @@ static int mlkem_get_noise_eta2_avx2(MLKEM_PRF_T* prf, sword16* p,
state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000); state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000);
/* Perform a block operation on the state for next block of output. */ /* Perform a block operation on the state for next block of output. */
#ifndef WC_SHA3_NO_ASM
if (IS_INTEL_BMI2(cpuid_flags)) { if (IS_INTEL_BMI2(cpuid_flags)) {
sha3_block_bmi2(state); sha3_block_bmi2(state);
} }
@@ -4128,7 +4147,9 @@ static int mlkem_get_noise_eta2_avx2(MLKEM_PRF_T* prf, sword16* p,
sha3_block_avx2(state); sha3_block_avx2(state);
RESTORE_VECTOR_REGISTERS(); RESTORE_VECTOR_REGISTERS();
} }
else { else
#endif /* !WC_SHA3_NO_ASM */
{
BlockSha3(state); BlockSha3(state);
} }
mlkem_cbd_eta2_avx2(p, (byte*)state); mlkem_cbd_eta2_avx2(p, (byte*)state);

View File

@@ -220,23 +220,25 @@ WOLFSSL_API int wc_Shake256_Copy(wc_Shake* src, wc_Sha3* dst);
WOLFSSL_API int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags); WOLFSSL_API int wc_Sha3_GetFlags(wc_Sha3* sha3, word32* flags);
#endif #endif
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#ifdef WC_SHA3_NO_ASM #ifdef WC_SHA3_NO_ASM
/* asm speedups disabled */ /* asm speedups disabled */
#if defined(USE_INTEL_SPEEDUP) && !defined(WC_MLKEM_NO_ASM)
/* native ML-KEM uses this directly. */
WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s);
#endif
#elif defined(USE_INTEL_SPEEDUP) #elif defined(USE_INTEL_SPEEDUP)
WOLFSSL_LOCAL void sha3_block_n_bmi2(word64* s, const byte* data, word32 n, WOLFSSL_LOCAL void sha3_block_n_bmi2(word64* s, const byte* data, word32 n,
word64 c); word64 c);
WOLFSSL_LOCAL void sha3_block_bmi2(word64* s); WOLFSSL_LOCAL void sha3_block_bmi2(word64* s);
WOLFSSL_LOCAL void sha3_block_avx2(word64* s); WOLFSSL_LOCAL void sha3_block_avx2(word64* s);
WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s); WOLFSSL_LOCAL void sha3_blocksx4_avx2(word64* s);
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#elif defined(__aarch64__) && defined(WOLFSSL_ARMASM) #elif defined(__aarch64__) && defined(WOLFSSL_ARMASM)
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3 #ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
WOLFSSL_LOCAL void BlockSha3_crypto(word64 *s); WOLFSSL_LOCAL void BlockSha3_crypto(word64 *s);
#endif #endif
WOLFSSL_LOCAL void BlockSha3_base(word64 *s); WOLFSSL_LOCAL void BlockSha3_base(word64 *s);
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#elif defined(WOLFSSL_ARMASM) || defined(WOLFSSL_RISCV_ASM)
WOLFSSL_LOCAL void BlockSha3(word64 *s);
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus