From 66376bed287c36952b3485f5276d3b3d4751b38e Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Fri, 7 Mar 2025 17:55:21 -0600 Subject: [PATCH] wolfcrypt/src/misc.c: in xorbufout() and xorbuf(), call XorWords() directly via a simplified path if all args are already aligned to WOLFSSL_WORD_SIZE (fixes performance regression from dc2e2631bc). configure.ac: add a "Conflicting asm settings" error check at end, since our configuration currently blows up if --enable-intelasm and --disable-asm are combined. --- configure.ac | 7 ++ wolfcrypt/src/misc.c | 168 +++++++++++++++++++++++++++---------------- 2 files changed, 114 insertions(+), 61 deletions(-) diff --git a/configure.ac b/configure.ac index 2e5cae220..fdd740220 100644 --- a/configure.ac +++ b/configure.ac @@ -10064,6 +10064,13 @@ if test "x$ENABLED_LINUXKM" = "xyes"; then fi fi +AS_IF([test "$ENABLED_ASM" = "no" && (test "$ENABLED_INTELASM" != "no" || \ + test "$ENABLED_AESNI" != "no" || \ + test "$ENABLED_ARMASM" != "no" || \ + test "$ENABLED_RISCV_ASM" != "no" || \ + test "$ENABLED_SP_ASM" != "no")], + [AC_MSG_ERROR([Conflicting asm settings.])]) + # The following AM_CONDITIONAL statements set flags for use in the Makefiles. # Some of these affect build targets and objects, some trigger different # test scripts for make check. diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index 8e234b508..1d5b06740 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -407,14 +407,18 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in, #endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */ #ifndef WOLFSSL_NO_XOR_OPS + +/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */ +wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0); + /* This routine performs a bitwise XOR operation of <*r> and <*a> for number of wolfssl_words, placing the result in <*r>. */ WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r, const wolfssl_word** a, const wolfssl_word** b, word32 n) { - word32 i; + const wolfssl_word *e = *a + n; - for (i = 0; i < n; i++) + while (*a < e) *((*r)++) = *((*a)++) ^ *((*b)++); } @@ -424,48 +428,68 @@ counts, placing the result in <*buf>. */ WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf, const void* mask, word32 count) { - word32 i; - byte* o; - const byte* b; - const byte* m; + byte* o = (byte*)out; + const byte* b = (const byte*)buf; + const byte* m = (const byte*)mask; - o = (byte*)out; - b = (const byte*)buf; - m = (const byte*)mask; + /* type-punning helpers */ + union { + byte* bp; + wolfssl_word* wp; + } tpo; + union { + const byte* bp; + const wolfssl_word* wp; + } tpb, tpm; + if (((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == 0) && + ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == 0) && + ((((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)) == 0)) + { + /* All buffers are already aligned. Possible to XOR by words without + * fixup. + */ - if (((wc_ptr_t)o) % WOLFSSL_WORD_SIZE == - ((wc_ptr_t)b) % WOLFSSL_WORD_SIZE && - ((wc_ptr_t)b) % WOLFSSL_WORD_SIZE == - ((wc_ptr_t)m) % WOLFSSL_WORD_SIZE) { - /* type-punning helpers */ - union { - byte* bp; - wolfssl_word* wp; - } tpo; - union { - const byte* bp; - const wolfssl_word* wp; - } tpb, tpm; - /* Alignment checks out. Possible to XOR words. */ - /* Move alignment so that it lines up with a - * WOLFSSL_WORD_SIZE boundary */ - while (((wc_ptr_t)b) % WOLFSSL_WORD_SIZE != 0 && count > 0) { - *(o++) = (byte)(*(b++) ^ *(m++)); - count--; - } tpo.bp = o; tpb.bp = b; tpm.bp = m; - XorWordsOut( &tpo.wp, &tpb.wp, &tpm.wp, count / WOLFSSL_WORD_SIZE); + XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2); o = tpo.bp; b = tpb.bp; m = tpm.bp; - count %= WOLFSSL_WORD_SIZE; + count &= (WOLFSSL_WORD_SIZE - 1); + } + else if ((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == + (((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) && + (((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == + (((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1))) + { + /* Alignment can be fixed up to allow XOR by words. */ + + /* Perform bytewise xor until pointers are aligned to + * WOLFSSL_WORD_SIZE. + */ + while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0)) + { + *o++ = (byte)(*b++ ^ *m++); + count--; + } + + tpo.bp = o; + tpb.bp = b; + tpm.bp = m; + XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2); + o = tpo.bp; + b = tpb.bp; + m = tpm.bp; + count &= (WOLFSSL_WORD_SIZE - 1); + } + + while (count > 0) { + *o++ = (byte)(*b++ ^ *m++); + count--; } - for (i = 0; i < count; i++) - o[i] = (byte)(b[i] ^ m[i]); } /* This routine performs a bitwise XOR operation of <*r> and <*a> for number @@ -473,9 +497,9 @@ of wolfssl_words, placing the result in <*r>. */ WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a, word32 n) { - word32 i; + const wolfssl_word *e = *a + n; - for (i = 0; i < n; i++) + while (*a < e) *((*r)++) ^= *((*a)++); } @@ -484,36 +508,55 @@ counts, placing the result in <*buf>. */ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count) { - /* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */ - wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0); + byte* b = (byte*)buf; + const byte* m = (const byte*)mask; - word32 i; - byte* b; - const byte* m; + /* type-punning helpers */ + union { + byte* bp; + wolfssl_word* wp; + } tpb; + union { + const byte* bp; + const wolfssl_word* wp; + } tpm; - b = (byte*)buf; - m = (const byte*)mask; - - if ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == - (((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1))) + if ((((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == 0) && + (((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)) == 0)) { - /* type-punning helpers */ - union { - byte* bp; - wolfssl_word* wp; - } tpb; - union { - const byte* bp; - const wolfssl_word* wp; - } tpm; - /* Alignment checks out. Possible to XOR words. */ - /* Move alignment so that it lines up with a - * WOLFSSL_WORD_SIZE boundary */ - while ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) != 0 && count > 0) + /* Both buffers are already aligned. Possible to XOR by words without + * fixup. + */ + + tpb.bp = b; + tpm.bp = m; + /* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */ + #if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE) + PRAGMA_GCC_DIAG_PUSH; + PRAGMA_GCC("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") + #endif + XorWords(&tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2); + #if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE) + PRAGMA_GCC_DIAG_POP; + #endif + b = tpb.bp; + m = tpm.bp; + count &= (WOLFSSL_WORD_SIZE - 1); + } + else if (((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == + ((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1))) + { + /* Alignment can be fixed up to allow XOR by words. */ + + /* Perform bytewise xor until pointers are aligned to + * WOLFSSL_WORD_SIZE. + */ + while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0)) { *(b++) ^= *(m++); count--; } + tpb.bp = b; tpm.bp = m; /* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */ @@ -530,10 +573,13 @@ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count) count &= (WOLFSSL_WORD_SIZE - 1); } - for (i = 0; i < count; i++) - b[i] ^= m[i]; + while (count > 0) { + *b++ ^= *m++; + count--; + } } -#endif + +#endif /* !WOLFSSL_NO_XOR_OPS */ #ifndef WOLFSSL_NO_FORCE_ZERO /* This routine fills the first len bytes of the memory area pointed by mem