From 60c249960232856bdc7f19cabc9eb39a197448ba Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Fri, 27 Sep 2024 17:15:17 -0500 Subject: [PATCH] wolfssl/wolfcrypt/types.h: when defining fallback do-nothing SAVE_VECTOR_REGISTERS2(), also define SAVE_VECTOR_REGISTERS2_DOES_NOTHING, and likewise for fallback CAN_SAVE_VECTOR_REGISTERS, define CAN_SAVE_VECTOR_REGISTERS_ALWAYS_TRUE; wolfcrypt/src/aes.c: * when SAVE_VECTOR_REGISTERS2_DOES_NOTHING, define do-nothing VECTOR_REGISTERS_PUSH and VECTOR_REGISTERS_POP, to mollify Coverity CONSTANT_EXPRESSION_RESULT; * in AesGcmDecryptUpdate_aesni(), omit " && (c != NULL)" clause from computation of endA argument to AesGcmAadUpdate_aesni(), to mollify Coverity FORWARD_NULL (impermissible nullness is already checked and BAD_FUNC_ARGed by the sole caller, wc_AesGcmDecryptUpdate()); wolfcrypt/src/misc.c: add readUnalignedWord64(), writeUnalignedWord64(), readUnalignedWords64(), and writeUnalignedWords64(), for safe word64 access to possibly-unaligned data; wolfcrypt/src/wc_kyber_poly.c: use readUnalignedWords64() and readUnalignedWord64() to mitigate sanitizer-reported "load of misaligned address". --- wolfcrypt/src/aes.c | 13 +++++- wolfcrypt/src/misc.c | 46 ++++++++++++++++++ wolfcrypt/src/wc_kyber_poly.c | 88 ++++++++++++++++------------------- wolfssl/wolfcrypt/misc.h | 8 ++++ wolfssl/wolfcrypt/types.h | 2 + 5 files changed, 107 insertions(+), 50 deletions(-) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 4c9a8d181..e76f66f13 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -4759,7 +4759,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #ifdef WC_C_DYNAMIC_FALLBACK -#define VECTOR_REGISTERS_PUSH { \ +#define VECTOR_REGISTERS_PUSH { \ int orig_use_aesni = aes->use_aesni; \ if (aes->use_aesni && (SAVE_VECTOR_REGISTERS2() != 0)) { \ aes->use_aesni = 0; \ @@ -4774,6 +4774,15 @@ int wc_AesSetIV(Aes* aes, const byte* iv) } \ WC_DO_NOTHING +#elif defined(SAVE_VECTOR_REGISTERS2_DOES_NOTHING) + +#define VECTOR_REGISTERS_PUSH { \ + WC_DO_NOTHING + +#define VECTOR_REGISTERS_POP \ + } \ + WC_DO_NOTHING + #else #define VECTOR_REGISTERS_PUSH { \ @@ -9796,7 +9805,7 @@ static WARN_UNUSED_RESULT int AesGcmDecryptUpdate_aesni( ASSERT_SAVED_VECTOR_REGISTERS(); /* Hash in A, the Authentication Data */ - ret = AesGcmAadUpdate_aesni(aes, a, aSz, (cSz > 0) && (c != NULL)); + ret = AesGcmAadUpdate_aesni(aes, a, aSz, cSz > 0); if (ret != 0) return ret; diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index 7a9bcb02c..e4b53d91f 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -211,6 +211,52 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords(word32* out, const word32* in, #if defined(WORD64_AVAILABLE) && !defined(WOLFSSL_NO_WORD64_OPS) +WC_MISC_STATIC WC_INLINE word64 readUnalignedWord64(const byte *in) +{ + if (((wc_ptr_t)in & (wc_ptr_t)(sizeof(word64) - 1U)) == (wc_ptr_t)0) + return *(word64 *)in; + else { + word64 out; + XMEMCPY(&out, in, sizeof(word64)); + return out; + } +} + +WC_MISC_STATIC WC_INLINE word64 writeUnalignedWord64(void *out, word64 in) +{ + if (((wc_ptr_t)out & (wc_ptr_t)(sizeof(word64) - 1U)) == (wc_ptr_t)0) + *(word64 *)out = in; + else { + XMEMCPY(out, &in, sizeof(word64)); + } + return in; +} + +WC_MISC_STATIC WC_INLINE void readUnalignedWords64(word64 *out, const byte *in, + size_t count) +{ + if (((wc_ptr_t)in & (wc_ptr_t)(sizeof(word64) - 1U)) == (wc_ptr_t)0) { + const word64 *in_word64 = (const word64 *)in; + while (count-- > 0) + *out++ = *in_word64++; + } + else { + XMEMCPY(out, in, count * sizeof(word64)); + } +} + +WC_MISC_STATIC WC_INLINE void writeUnalignedWords64(byte *out, const word64 *in, + size_t count) +{ + if (((wc_ptr_t)out & (wc_ptr_t)(sizeof(word64) - 1U)) == (wc_ptr_t)0) { + word64 *out_word64 = (word64 *)out; + while (count-- > 0) + *out_word64++ = *in++; + } + else { + XMEMCPY(out, in, count * sizeof(word64)); + } +} WC_MISC_STATIC WC_INLINE word64 rotlFixed64(word64 x, word64 y) { diff --git a/wolfcrypt/src/wc_kyber_poly.c b/wolfcrypt/src/wc_kyber_poly.c index 492d159a8..4514ad317 100644 --- a/wolfcrypt/src/wc_kyber_poly.c +++ b/wolfcrypt/src/wc_kyber_poly.c @@ -67,6 +67,13 @@ #ifdef WOLFSSL_WC_KYBER +#ifdef NO_INLINE + #include +#else + #define WOLFSSL_MISC_INCLUDED + #include +#endif + /* Declared in wc_kyber.c to stop compiler optimizer from simplifying. */ extern volatile sword16 kyber_opt_blocker; @@ -1560,14 +1567,11 @@ static int kyber_gen_matrix_k3_avx2(sword16* a, byte* seed, int transposed) a += 4 * KYBER_N; } - state[0] = ((word64*)seed)[0]; - state[1] = ((word64*)seed)[1]; - state[2] = ((word64*)seed)[2]; - state[3] = ((word64*)seed)[3]; + readUnalignedWords64(state, seed, 4); /* Transposed value same as not. */ state[4] = 0x1f0000 + (2 << 8) + 2; XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5)); - state[20] = 0x8000000000000000UL; + state[20] = W64LIT(0x8000000000000000); for (i = 0; i < GEN_MATRIX_SIZE; i += SHA3_128_BYTES) { if (IS_INTEL_BMI2(cpuid_flags)) { sha3_block_bmi2(state); @@ -1748,14 +1752,11 @@ static int kyber_gen_matrix_k2_aarch64(sword16* a, byte* seed, int transposed) a += 3 * KYBER_N; - state[0] = ((word64*)seed)[0]; - state[1] = ((word64*)seed)[1]; - state[2] = ((word64*)seed)[2]; - state[3] = ((word64*)seed)[3]; + readUnalignedWords64(state, seed, 4); /* Transposed value same as not. */ state[4] = 0x1f0000 + (1 << 8) + 1; XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5)); - state[20] = 0x8000000000000000UL; + state[20] = W64LIT(0x8000000000000000); BlockSha3(state); p = (byte*)state; ctr0 = kyber_rej_uniform_neon(a, KYBER_N, p, XOF_BLOCK_SIZE); @@ -1899,14 +1900,11 @@ static int kyber_gen_matrix_k4_aarch64(sword16* a, byte* seed, int transposed) a += 3 * KYBER_N; } - state[0] = ((word64*)seed)[0]; - state[1] = ((word64*)seed)[1]; - state[2] = ((word64*)seed)[2]; - state[3] = ((word64*)seed)[3]; + readUnalignedWords64(state, seed, 4); /* Transposed value same as not. */ state[4] = 0x1f0000 + (3 << 8) + 3; XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5)); - state[20] = 0x8000000000000000UL; + state[20] = W64LIT(0x8000000000000000); BlockSha3(state); p = (byte*)state; ctr0 = kyber_rej_uniform_neon(a, KYBER_N, p, XOF_BLOCK_SIZE); @@ -2047,18 +2045,15 @@ static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, const byte* key) { #ifdef USE_INTEL_SPEEDUP - int i; word64 state[25]; (void)shake256; - for (i = 0; i < KYBER_SYM_SZ / 8; i++) { - state[i] = ((word64*)key)[i]; - } + readUnalignedWords64(state, key, KYBER_SYM_SZ / sizeof(word64)); state[KYBER_SYM_SZ / 8] = 0x1f00 | key[KYBER_SYM_SZ]; XMEMSET(state + KYBER_SYM_SZ / 8 + 1, 0, (25 - KYBER_SYM_SZ / 8 - 1) * sizeof(word64)); - state[WC_SHA3_256_COUNT - 1] = 0x8000000000000000UL; + state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000); if (IS_INTEL_BMI2(cpuid_flags)) { sha3_block_bmi2(state); @@ -2098,15 +2093,12 @@ static int kyber_prf(wc_Shake* shake256, byte* out, unsigned int outLen, int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) { word64 state[25]; - int i; - int len64 = seedLen / 8; + word32 len64 = seedLen / 8; - for (i = 0; i < len64; i++) { - state[i] = ((word64*)seed)[i]; - } + readUnalignedWords64(state, seed, len64); state[len64] = 0x1f; XMEMSET(state + len64 + 1, 0, (25 - len64 - 1) * sizeof(word64)); - state[WC_SHA3_256_COUNT - 1] = 0x8000000000000000UL; + state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000); if (IS_INTEL_BMI2(cpuid_flags)) { sha3_block_bmi2(state); @@ -2136,15 +2128,12 @@ int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) int kyber_kdf(byte* seed, int seedLen, byte* out, int outLen) { word64 state[25]; - int i; - int len64 = seedLen / 8; + word32 len64 = seedLen / 8; - for (i = 0; i < len64; i++) { - state[i] = ((word64*)seed)[i]; - } + readUnalignedWords64(state, seed, len64); state[len64] = 0x1f; XMEMSET(state + len64 + 1, 0, (25 - len64 - 1) * sizeof(word64)); - state[WC_SHA3_256_COUNT - 1] = 0x8000000000000000UL; + state[WC_SHA3_256_COUNT - 1] = W64LIT(0x8000000000000000); BlockSha3(state); XMEMCPY(out, state, outLen); @@ -2199,10 +2188,11 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, i = 0; for (j = 0; j < minJ; j += 6) { /* Use 48 bits (6 bytes) as four 12-bit integers. */ - sword16 v0 = (*(word64*)r) & 0xfff; - sword16 v1 = ((*(word64*)r) >> 12) & 0xfff; - sword16 v2 = ((*(word64*)r) >> 24) & 0xfff; - sword16 v3 = ((*(word64*)r) >> 36) & 0xfff; + word64 r_word = readUnalignedWord64(r); + sword16 v0 = r_word & 0xfff; + sword16 v1 = (r_word >> 12) & 0xfff; + sword16 v2 = (r_word >> 24) & 0xfff; + sword16 v3 = (r_word >> 36) & 0xfff; p[i] = v0 & (0 - (v0 < KYBER_Q)); i += v0 < KYBER_Q; @@ -2219,10 +2209,11 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, if (j < rLen) { for (; (i + 4 < len) && (j < rLen); j += 6) { /* Use 48 bits (6 bytes) as four 12-bit integers. */ - sword16 v0 = (*(word64*)r) & 0xfff; - sword16 v1 = ((*(word64*)r) >> 12) & 0xfff; - sword16 v2 = ((*(word64*)r) >> 24) & 0xfff; - sword16 v3 = ((*(word64*)r) >> 36) & 0xfff; + word64 r_word = readUnalignedWord64(r); + sword16 v0 = r_word & 0xfff; + sword16 v1 = (r_word >> 12) & 0xfff; + sword16 v2 = (r_word >> 24) & 0xfff; + sword16 v3 = (r_word >> 36) & 0xfff; p[i] = v0; i += v0 < KYBER_Q; @@ -2238,10 +2229,11 @@ static unsigned int kyber_rej_uniform_c(sword16* p, unsigned int len, } for (; (i < len) && (j < rLen); j += 6) { /* Use 48 bits (6 bytes) as four 12-bit integers. */ - sword16 v0 = (*(word64*)r) & 0xfff; - sword16 v1 = ((*(word64*)r) >> 12) & 0xfff; - sword16 v2 = ((*(word64*)r) >> 24) & 0xfff; - sword16 v3 = ((*(word64*)r) >> 36) & 0xfff; + word64 r_word = readUnalignedWord64(r); + sword16 v0 = r_word & 0xfff; + sword16 v1 = (r_word >> 12) & 0xfff; + sword16 v2 = (r_word >> 24) & 0xfff; + sword16 v3 = (r_word >> 36) & 0xfff; /* Reject first 12-bit integer if greater than or equal to q. */ if (v0 < KYBER_Q) { @@ -2511,9 +2503,9 @@ static void kyber_cbd_eta2(sword16* p, const byte* r) #endif /* Take the next 8 bytes, little endian, as a 64 bit value. */ #ifdef BIG_ENDIAN_ORDER - word64 t = ByteReverseWord64(*(word64*)r); + word64 t = ByteReverseWord64(readUnalignedWord64(r)); #else - word64 t = *(word64*)r; + word64 t = readUnalignedWord64(r); #endif word64 d; /* Add second bits to first. */ @@ -3023,7 +3015,7 @@ static void kyber_get_noise_eta3_aarch64(byte* rand, byte* seed, byte o) state[3] = ((word64*)seed)[3]; state[4] = 0x1f00 + o; XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5)); - state[16] = 0x8000000000000000UL; + state[16] = W64LIT(0x8000000000000000); BlockSha3(state); XMEMCPY(rand , state, SHA3_256_BYTES); BlockSha3(state); @@ -3083,7 +3075,7 @@ static void kyber_get_noise_eta2_aarch64(byte* rand, byte* seed, byte o) /* Transposed value same as not. */ state[4] = 0x1f00 + o; XMEMSET(state + 5, 0, sizeof(*state) * (25 - 5)); - state[16] = 0x8000000000000000UL; + state[16] = W64LIT(0x8000000000000000); BlockSha3(state); } diff --git a/wolfssl/wolfcrypt/misc.h b/wolfssl/wolfcrypt/misc.h index cc068db44..9acc31b12 100644 --- a/wolfssl/wolfcrypt/misc.h +++ b/wolfssl/wolfcrypt/misc.h @@ -76,6 +76,14 @@ int ConstantCompare(const byte* a, const byte* b, int length); #ifdef WORD64_AVAILABLE WOLFSSL_LOCAL +word64 readUnalignedWord64(const byte *in); +WOLFSSL_LOCAL +word64 writeUnalignedWord64(void *out, word64 in); +WOLFSSL_LOCAL +void readUnalignedWords64(word64 *out, const byte *in, size_t count); +WOLFSSL_LOCAL +void writeUnalignedWords64(byte *out, const word64 *in, size_t count); +WOLFSSL_LOCAL word64 rotlFixed64(word64 x, word64 y); WOLFSSL_LOCAL word64 rotrFixed64(word64 x, word64 y); diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index 6ff073622..217772297 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -1729,9 +1729,11 @@ typedef struct w64wrapper { #endif #ifndef SAVE_VECTOR_REGISTERS2 #define SAVE_VECTOR_REGISTERS2() 0 + #define SAVE_VECTOR_REGISTERS2_DOES_NOTHING #endif #ifndef CAN_SAVE_VECTOR_REGISTERS #define CAN_SAVE_VECTOR_REGISTERS() 1 + #define CAN_SAVE_VECTOR_REGISTERS_ALWAYS_TRUE #endif #ifndef WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL #define WC_DEBUG_SET_VECTOR_REGISTERS_RETVAL(x) WC_DO_NOTHING