wolfcrypt/src/misc.c: in xorbufout() and xorbuf(), call XorWords() directly via a simplified path if all args are already aligned to WOLFSSL_WORD_SIZE (fixes performance regression from dc2e2631bc).

configure.ac: add a "Conflicting asm settings" error check at end, since our configuration currently blows up if --enable-intelasm and --disable-asm are combined.
This commit is contained in:
Daniel Pouzzner
2025-03-07 17:55:21 -06:00
parent c3f24568ff
commit 66376bed28
2 changed files with 114 additions and 61 deletions

View File

@ -10064,6 +10064,13 @@ if test "x$ENABLED_LINUXKM" = "xyes"; then
fi fi
fi fi
AS_IF([test "$ENABLED_ASM" = "no" && (test "$ENABLED_INTELASM" != "no" || \
test "$ENABLED_AESNI" != "no" || \
test "$ENABLED_ARMASM" != "no" || \
test "$ENABLED_RISCV_ASM" != "no" || \
test "$ENABLED_SP_ASM" != "no")],
[AC_MSG_ERROR([Conflicting asm settings.])])
# The following AM_CONDITIONAL statements set flags for use in the Makefiles. # The following AM_CONDITIONAL statements set flags for use in the Makefiles.
# Some of these affect build targets and objects, some trigger different # Some of these affect build targets and objects, some trigger different
# test scripts for make check. # test scripts for make check.

View File

@ -407,14 +407,18 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */ #endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
#ifndef WOLFSSL_NO_XOR_OPS #ifndef WOLFSSL_NO_XOR_OPS
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number /* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
of wolfssl_words, placing the result in <*r>. */ of wolfssl_words, placing the result in <*r>. */
WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r, WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r,
const wolfssl_word** a, const wolfssl_word** b, word32 n) const wolfssl_word** a, const wolfssl_word** b, word32 n)
{ {
word32 i; const wolfssl_word *e = *a + n;
for (i = 0; i < n; i++) while (*a < e)
*((*r)++) = *((*a)++) ^ *((*b)++); *((*r)++) = *((*a)++) ^ *((*b)++);
} }
@ -424,48 +428,68 @@ counts, placing the result in <*buf>. */
WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf, WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf,
const void* mask, word32 count) const void* mask, word32 count)
{ {
word32 i; byte* o = (byte*)out;
byte* o; const byte* b = (const byte*)buf;
const byte* b; const byte* m = (const byte*)mask;
const byte* m;
o = (byte*)out; /* type-punning helpers */
b = (const byte*)buf; union {
m = (const byte*)mask; byte* bp;
wolfssl_word* wp;
} tpo;
union {
const byte* bp;
const wolfssl_word* wp;
} tpb, tpm;
if (((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
((((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)) == 0))
{
/* All buffers are already aligned. Possible to XOR by words without
* fixup.
*/
if (((wc_ptr_t)o) % WOLFSSL_WORD_SIZE ==
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE &&
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE ==
((wc_ptr_t)m) % WOLFSSL_WORD_SIZE) {
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpo;
union {
const byte* bp;
const wolfssl_word* wp;
} tpb, tpm;
/* Alignment checks out. Possible to XOR words. */
/* Move alignment so that it lines up with a
* WOLFSSL_WORD_SIZE boundary */
while (((wc_ptr_t)b) % WOLFSSL_WORD_SIZE != 0 && count > 0) {
*(o++) = (byte)(*(b++) ^ *(m++));
count--;
}
tpo.bp = o; tpo.bp = o;
tpb.bp = b; tpb.bp = b;
tpm.bp = m; tpm.bp = m;
XorWordsOut( &tpo.wp, &tpb.wp, &tpm.wp, count / WOLFSSL_WORD_SIZE); XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
o = tpo.bp; o = tpo.bp;
b = tpb.bp; b = tpb.bp;
m = tpm.bp; m = tpm.bp;
count %= WOLFSSL_WORD_SIZE; count &= (WOLFSSL_WORD_SIZE - 1);
}
else if ((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) &&
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
{
/* Alignment can be fixed up to allow XOR by words. */
/* Perform bytewise xor until pointers are aligned to
* WOLFSSL_WORD_SIZE.
*/
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
{
*o++ = (byte)(*b++ ^ *m++);
count--;
}
tpo.bp = o;
tpb.bp = b;
tpm.bp = m;
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
o = tpo.bp;
b = tpb.bp;
m = tpm.bp;
count &= (WOLFSSL_WORD_SIZE - 1);
}
while (count > 0) {
*o++ = (byte)(*b++ ^ *m++);
count--;
} }
for (i = 0; i < count; i++)
o[i] = (byte)(b[i] ^ m[i]);
} }
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number /* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
@ -473,9 +497,9 @@ of wolfssl_words, placing the result in <*r>. */
WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a, WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a,
word32 n) word32 n)
{ {
word32 i; const wolfssl_word *e = *a + n;
for (i = 0; i < n; i++) while (*a < e)
*((*r)++) ^= *((*a)++); *((*r)++) ^= *((*a)++);
} }
@ -484,36 +508,55 @@ counts, placing the result in <*buf>. */
WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count) WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
{ {
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */ byte* b = (byte*)buf;
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0); const byte* m = (const byte*)mask;
word32 i; /* type-punning helpers */
byte* b; union {
const byte* m; byte* bp;
wolfssl_word* wp;
} tpb;
union {
const byte* bp;
const wolfssl_word* wp;
} tpm;
b = (byte*)buf; if ((((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
m = (const byte*)mask; (((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)) == 0))
if ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
{ {
/* type-punning helpers */ /* Both buffers are already aligned. Possible to XOR by words without
union { * fixup.
byte* bp; */
wolfssl_word* wp;
} tpb; tpb.bp = b;
union { tpm.bp = m;
const byte* bp; /* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
const wolfssl_word* wp; #if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
} tpm; PRAGMA_GCC_DIAG_PUSH;
/* Alignment checks out. Possible to XOR words. */ PRAGMA_GCC("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
/* Move alignment so that it lines up with a #endif
* WOLFSSL_WORD_SIZE boundary */ XorWords(&tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
while ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) != 0 && count > 0) #if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
PRAGMA_GCC_DIAG_POP;
#endif
b = tpb.bp;
m = tpm.bp;
count &= (WOLFSSL_WORD_SIZE - 1);
}
else if (((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) ==
((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)))
{
/* Alignment can be fixed up to allow XOR by words. */
/* Perform bytewise xor until pointers are aligned to
* WOLFSSL_WORD_SIZE.
*/
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
{ {
*(b++) ^= *(m++); *(b++) ^= *(m++);
count--; count--;
} }
tpb.bp = b; tpb.bp = b;
tpm.bp = m; tpm.bp = m;
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */ /* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
@ -530,10 +573,13 @@ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
count &= (WOLFSSL_WORD_SIZE - 1); count &= (WOLFSSL_WORD_SIZE - 1);
} }
for (i = 0; i < count; i++) while (count > 0) {
b[i] ^= m[i]; *b++ ^= *m++;
count--;
}
} }
#endif
#endif /* !WOLFSSL_NO_XOR_OPS */
#ifndef WOLFSSL_NO_FORCE_ZERO #ifndef WOLFSSL_NO_FORCE_ZERO
/* This routine fills the first len bytes of the memory area pointed by mem /* This routine fills the first len bytes of the memory area pointed by mem