forked from wolfSSL/wolfssl
wolfcrypt/src/misc.c: in xorbufout() and xorbuf(), call XorWords() directly via a simplified path if all args are already aligned to WOLFSSL_WORD_SIZE (fixes performance regression from dc2e2631bc
).
configure.ac: add a "Conflicting asm settings" error check at end, since our configuration currently blows up if --enable-intelasm and --disable-asm are combined.
This commit is contained in:
@ -10064,6 +10064,13 @@ if test "x$ENABLED_LINUXKM" = "xyes"; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
AS_IF([test "$ENABLED_ASM" = "no" && (test "$ENABLED_INTELASM" != "no" || \
|
||||||
|
test "$ENABLED_AESNI" != "no" || \
|
||||||
|
test "$ENABLED_ARMASM" != "no" || \
|
||||||
|
test "$ENABLED_RISCV_ASM" != "no" || \
|
||||||
|
test "$ENABLED_SP_ASM" != "no")],
|
||||||
|
[AC_MSG_ERROR([Conflicting asm settings.])])
|
||||||
|
|
||||||
# The following AM_CONDITIONAL statements set flags for use in the Makefiles.
|
# The following AM_CONDITIONAL statements set flags for use in the Makefiles.
|
||||||
# Some of these affect build targets and objects, some trigger different
|
# Some of these affect build targets and objects, some trigger different
|
||||||
# test scripts for make check.
|
# test scripts for make check.
|
||||||
|
@ -407,14 +407,18 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
|
|||||||
#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
|
#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */
|
||||||
|
|
||||||
#ifndef WOLFSSL_NO_XOR_OPS
|
#ifndef WOLFSSL_NO_XOR_OPS
|
||||||
|
|
||||||
|
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
|
||||||
|
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
|
||||||
|
|
||||||
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
|
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
|
||||||
of wolfssl_words, placing the result in <*r>. */
|
of wolfssl_words, placing the result in <*r>. */
|
||||||
WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r,
|
WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r,
|
||||||
const wolfssl_word** a, const wolfssl_word** b, word32 n)
|
const wolfssl_word** a, const wolfssl_word** b, word32 n)
|
||||||
{
|
{
|
||||||
word32 i;
|
const wolfssl_word *e = *a + n;
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
while (*a < e)
|
||||||
*((*r)++) = *((*a)++) ^ *((*b)++);
|
*((*r)++) = *((*a)++) ^ *((*b)++);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -424,48 +428,68 @@ counts, placing the result in <*buf>. */
|
|||||||
WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf,
|
WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf,
|
||||||
const void* mask, word32 count)
|
const void* mask, word32 count)
|
||||||
{
|
{
|
||||||
word32 i;
|
byte* o = (byte*)out;
|
||||||
byte* o;
|
const byte* b = (const byte*)buf;
|
||||||
const byte* b;
|
const byte* m = (const byte*)mask;
|
||||||
const byte* m;
|
|
||||||
|
|
||||||
o = (byte*)out;
|
/* type-punning helpers */
|
||||||
b = (const byte*)buf;
|
union {
|
||||||
m = (const byte*)mask;
|
byte* bp;
|
||||||
|
wolfssl_word* wp;
|
||||||
|
} tpo;
|
||||||
|
union {
|
||||||
|
const byte* bp;
|
||||||
|
const wolfssl_word* wp;
|
||||||
|
} tpb, tpm;
|
||||||
|
|
||||||
|
if (((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
|
||||||
|
((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
|
||||||
|
((((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)) == 0))
|
||||||
|
{
|
||||||
|
/* All buffers are already aligned. Possible to XOR by words without
|
||||||
|
* fixup.
|
||||||
|
*/
|
||||||
|
|
||||||
if (((wc_ptr_t)o) % WOLFSSL_WORD_SIZE ==
|
|
||||||
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE &&
|
|
||||||
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE ==
|
|
||||||
((wc_ptr_t)m) % WOLFSSL_WORD_SIZE) {
|
|
||||||
/* type-punning helpers */
|
|
||||||
union {
|
|
||||||
byte* bp;
|
|
||||||
wolfssl_word* wp;
|
|
||||||
} tpo;
|
|
||||||
union {
|
|
||||||
const byte* bp;
|
|
||||||
const wolfssl_word* wp;
|
|
||||||
} tpb, tpm;
|
|
||||||
/* Alignment checks out. Possible to XOR words. */
|
|
||||||
/* Move alignment so that it lines up with a
|
|
||||||
* WOLFSSL_WORD_SIZE boundary */
|
|
||||||
while (((wc_ptr_t)b) % WOLFSSL_WORD_SIZE != 0 && count > 0) {
|
|
||||||
*(o++) = (byte)(*(b++) ^ *(m++));
|
|
||||||
count--;
|
|
||||||
}
|
|
||||||
tpo.bp = o;
|
tpo.bp = o;
|
||||||
tpb.bp = b;
|
tpb.bp = b;
|
||||||
tpm.bp = m;
|
tpm.bp = m;
|
||||||
XorWordsOut( &tpo.wp, &tpb.wp, &tpm.wp, count / WOLFSSL_WORD_SIZE);
|
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
|
||||||
o = tpo.bp;
|
o = tpo.bp;
|
||||||
b = tpb.bp;
|
b = tpb.bp;
|
||||||
m = tpm.bp;
|
m = tpm.bp;
|
||||||
count %= WOLFSSL_WORD_SIZE;
|
count &= (WOLFSSL_WORD_SIZE - 1);
|
||||||
|
}
|
||||||
|
else if ((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) ==
|
||||||
|
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) &&
|
||||||
|
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
|
||||||
|
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
|
||||||
|
{
|
||||||
|
/* Alignment can be fixed up to allow XOR by words. */
|
||||||
|
|
||||||
|
/* Perform bytewise xor until pointers are aligned to
|
||||||
|
* WOLFSSL_WORD_SIZE.
|
||||||
|
*/
|
||||||
|
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
|
||||||
|
{
|
||||||
|
*o++ = (byte)(*b++ ^ *m++);
|
||||||
|
count--;
|
||||||
|
}
|
||||||
|
|
||||||
|
tpo.bp = o;
|
||||||
|
tpb.bp = b;
|
||||||
|
tpm.bp = m;
|
||||||
|
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
|
||||||
|
o = tpo.bp;
|
||||||
|
b = tpb.bp;
|
||||||
|
m = tpm.bp;
|
||||||
|
count &= (WOLFSSL_WORD_SIZE - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (count > 0) {
|
||||||
|
*o++ = (byte)(*b++ ^ *m++);
|
||||||
|
count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < count; i++)
|
|
||||||
o[i] = (byte)(b[i] ^ m[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
|
/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
|
||||||
@ -473,9 +497,9 @@ of wolfssl_words, placing the result in <*r>. */
|
|||||||
WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a,
|
WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a,
|
||||||
word32 n)
|
word32 n)
|
||||||
{
|
{
|
||||||
word32 i;
|
const wolfssl_word *e = *a + n;
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
while (*a < e)
|
||||||
*((*r)++) ^= *((*a)++);
|
*((*r)++) ^= *((*a)++);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -484,36 +508,55 @@ counts, placing the result in <*buf>. */
|
|||||||
|
|
||||||
WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
|
WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
|
||||||
{
|
{
|
||||||
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
|
byte* b = (byte*)buf;
|
||||||
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);
|
const byte* m = (const byte*)mask;
|
||||||
|
|
||||||
word32 i;
|
/* type-punning helpers */
|
||||||
byte* b;
|
union {
|
||||||
const byte* m;
|
byte* bp;
|
||||||
|
wolfssl_word* wp;
|
||||||
|
} tpb;
|
||||||
|
union {
|
||||||
|
const byte* bp;
|
||||||
|
const wolfssl_word* wp;
|
||||||
|
} tpm;
|
||||||
|
|
||||||
b = (byte*)buf;
|
if ((((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
|
||||||
m = (const byte*)mask;
|
(((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)) == 0))
|
||||||
|
|
||||||
if ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
|
|
||||||
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
|
|
||||||
{
|
{
|
||||||
/* type-punning helpers */
|
/* Both buffers are already aligned. Possible to XOR by words without
|
||||||
union {
|
* fixup.
|
||||||
byte* bp;
|
*/
|
||||||
wolfssl_word* wp;
|
|
||||||
} tpb;
|
tpb.bp = b;
|
||||||
union {
|
tpm.bp = m;
|
||||||
const byte* bp;
|
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
|
||||||
const wolfssl_word* wp;
|
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
|
||||||
} tpm;
|
PRAGMA_GCC_DIAG_PUSH;
|
||||||
/* Alignment checks out. Possible to XOR words. */
|
PRAGMA_GCC("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
|
||||||
/* Move alignment so that it lines up with a
|
#endif
|
||||||
* WOLFSSL_WORD_SIZE boundary */
|
XorWords(&tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
|
||||||
while ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) != 0 && count > 0)
|
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
|
||||||
|
PRAGMA_GCC_DIAG_POP;
|
||||||
|
#endif
|
||||||
|
b = tpb.bp;
|
||||||
|
m = tpm.bp;
|
||||||
|
count &= (WOLFSSL_WORD_SIZE - 1);
|
||||||
|
}
|
||||||
|
else if (((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) ==
|
||||||
|
((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)))
|
||||||
|
{
|
||||||
|
/* Alignment can be fixed up to allow XOR by words. */
|
||||||
|
|
||||||
|
/* Perform bytewise xor until pointers are aligned to
|
||||||
|
* WOLFSSL_WORD_SIZE.
|
||||||
|
*/
|
||||||
|
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
|
||||||
{
|
{
|
||||||
*(b++) ^= *(m++);
|
*(b++) ^= *(m++);
|
||||||
count--;
|
count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
tpb.bp = b;
|
tpb.bp = b;
|
||||||
tpm.bp = m;
|
tpm.bp = m;
|
||||||
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
|
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
|
||||||
@ -530,10 +573,13 @@ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
|
|||||||
count &= (WOLFSSL_WORD_SIZE - 1);
|
count &= (WOLFSSL_WORD_SIZE - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < count; i++)
|
while (count > 0) {
|
||||||
b[i] ^= m[i];
|
*b++ ^= *m++;
|
||||||
|
count--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
#endif /* !WOLFSSL_NO_XOR_OPS */
|
||||||
|
|
||||||
#ifndef WOLFSSL_NO_FORCE_ZERO
|
#ifndef WOLFSSL_NO_FORCE_ZERO
|
||||||
/* This routine fills the first len bytes of the memory area pointed by mem
|
/* This routine fills the first len bytes of the memory area pointed by mem
|
||||||
|
Reference in New Issue
Block a user