wolfcrypt/src/misc.c: in xorbufout() and xorbuf(), call XorWords() directly via a simplified path if all args are already aligned to WOLFSSL_WORD_SIZE (fixes performance regression from dc2e2631bc).

configure.ac: add a "Conflicting asm settings" error check at end, since our configuration currently blows up if --enable-intelasm and --disable-asm are combined.
This commit is contained in:
Daniel Pouzzner
2025-03-07 17:55:21 -06:00
parent c3f24568ff
commit 66376bed28
2 changed files with 114 additions and 61 deletions

View File

@ -10064,6 +10064,13 @@ if test "x$ENABLED_LINUXKM" = "xyes"; then
fi
fi
AS_IF([test "$ENABLED_ASM" = "no" && (test "$ENABLED_INTELASM" != "no" || \
test "$ENABLED_AESNI" != "no" || \
test "$ENABLED_ARMASM" != "no" || \
test "$ENABLED_RISCV_ASM" != "no" || \
test "$ENABLED_SP_ASM" != "no")],
[AC_MSG_ERROR([Conflicting asm settings.])])
# The following AM_CONDITIONAL statements set flags for use in the Makefiles.
# Some of these affect build targets and objects, some trigger different
# test scripts for make check.

View File

@ -407,14 +407,18 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
#ifndef WOLFSSL_NO_XOR_OPS
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
of wolfssl_words, placing the result in <*r>. */
WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r,
const wolfssl_word** a, const wolfssl_word** b, word32 n)
{
word32 i;
const wolfssl_word *e = *a + n;
for (i = 0; i < n; i++)
while (*a < e)
*((*r)++) = *((*a)++) ^ *((*b)++);
}
@ -424,48 +428,68 @@ counts, placing the result in <*buf>. */
WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf,
const void* mask, word32 count)
{
word32 i;
byte* o;
const byte* b;
const byte* m;
byte* o = (byte*)out;
const byte* b = (const byte*)buf;
const byte* m = (const byte*)mask;
o = (byte*)out;
b = (const byte*)buf;
m = (const byte*)mask;
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpo;
union {
const byte* bp;
const wolfssl_word* wp;
} tpb, tpm;
if (((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
((((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)) == 0))
{
/* All buffers are already aligned. Possible to XOR by words without
* fixup.
*/
if (((wc_ptr_t)o) % WOLFSSL_WORD_SIZE ==
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE &&
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE ==
((wc_ptr_t)m) % WOLFSSL_WORD_SIZE) {
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpo;
union {
const byte* bp;
const wolfssl_word* wp;
} tpb, tpm;
/* Alignment checks out. Possible to XOR words. */
/* Move alignment so that it lines up with a
* WOLFSSL_WORD_SIZE boundary */
while (((wc_ptr_t)b) % WOLFSSL_WORD_SIZE != 0 && count > 0) {
*(o++) = (byte)(*(b++) ^ *(m++));
count--;
}
tpo.bp = o;
tpb.bp = b;
tpm.bp = m;
XorWordsOut( &tpo.wp, &tpb.wp, &tpm.wp, count / WOLFSSL_WORD_SIZE);
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
o = tpo.bp;
b = tpb.bp;
m = tpm.bp;
count %= WOLFSSL_WORD_SIZE;
count &= (WOLFSSL_WORD_SIZE - 1);
}
else if ((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) &&
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
{
/* Alignment can be fixed up to allow XOR by words. */
/* Perform bytewise xor until pointers are aligned to
* WOLFSSL_WORD_SIZE.
*/
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
{
*o++ = (byte)(*b++ ^ *m++);
count--;
}
tpo.bp = o;
tpb.bp = b;
tpm.bp = m;
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
o = tpo.bp;
b = tpb.bp;
m = tpm.bp;
count &= (WOLFSSL_WORD_SIZE - 1);
}
while (count > 0) {
*o++ = (byte)(*b++ ^ *m++);
count--;
}
for (i = 0; i < count; i++)
o[i] = (byte)(b[i] ^ m[i]);
}
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
@ -473,9 +497,9 @@ of wolfssl_words, placing the result in <*r>. */
WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a,
word32 n)
{
word32 i;
const wolfssl_word *e = *a + n;
for (i = 0; i < n; i++)
while (*a < e)
*((*r)++) ^= *((*a)++);
}
@ -484,36 +508,55 @@ counts, placing the result in <*buf>. */
WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
{
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
byte* b = (byte*)buf;
const byte* m = (const byte*)mask;
word32 i;
byte* b;
const byte* m;
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpb;
union {
const byte* bp;
const wolfssl_word* wp;
} tpm;
b = (byte*)buf;
m = (const byte*)mask;
if ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
if ((((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
(((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)) == 0))
{
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpb;
union {
const byte* bp;
const wolfssl_word* wp;
} tpm;
/* Alignment checks out. Possible to XOR words. */
/* Move alignment so that it lines up with a
* WOLFSSL_WORD_SIZE boundary */
while ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) != 0 && count > 0)
/* Both buffers are already aligned. Possible to XOR by words without
* fixup.
*/
tpb.bp = b;
tpm.bp = m;
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
PRAGMA_GCC_DIAG_PUSH;
PRAGMA_GCC("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
#endif
XorWords(&tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
PRAGMA_GCC_DIAG_POP;
#endif
b = tpb.bp;
m = tpm.bp;
count &= (WOLFSSL_WORD_SIZE - 1);
}
else if (((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) ==
((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)))
{
/* Alignment can be fixed up to allow XOR by words. */
/* Perform bytewise xor until pointers are aligned to
* WOLFSSL_WORD_SIZE.
*/
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
{
*(b++) ^= *(m++);
count--;
}
tpb.bp = b;
tpm.bp = m;
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
@ -530,10 +573,13 @@ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
count &= (WOLFSSL_WORD_SIZE - 1);
}
for (i = 0; i < count; i++)
b[i] ^= m[i];
while (count > 0) {
*b++ ^= *m++;
count--;
}
}
#endif
#endif /* !WOLFSSL_NO_XOR_OPS */
#ifndef WOLFSSL_NO_FORCE_ZERO
/* This routine fills the first len bytes of the memory area pointed by mem