Files
wolfssl/wolfcrypt/src/misc.c
David Garske c1640e8a3d Intel QuickAssist (QAT) support and async enhancements/fixes:
* Adds ./configure "--with-intelqa=../QAT1.6”, port files, memory management and README.md (see wolfcrypt/src/port/intel/).
* Added Intel QAT support for RSA public/private (CRT/non-CRT), AES CBC/GCM, ECDH/ECDSA, DH, DES3, SHA, SHA224, SHA256, SHA384, SHA512, MD5 and HMAC.
* wolfSSL async enabled all client and server: PKI, Encrypt/Decrypt, Hashing/HMAC and Certificate Sign/Verify.
* wolfSSL async support in functions: Encrypt, Decrypt, VerifyMAC, BuildMessage, ConfirmSignature, DoCertificate, ParseCertRelative, and MakeSignature.
* wolfCrypt test and benchmark async support added for all HW acceleration.
* wolfCrypt benchmark multi-threading support.
* Added QuickAssist memory overrides for XMALLOC, XFREE and XREALLOC. XREALLOC determines if existing pointer needs reallocated for NUMA.
* Refactor to make sure “heap” is available for async dev init.
* Added async support for all examples for connect, accept, read and write.
* Added new WC_BIGINT (in wolfmath.c) for async hardware support.
* Added async simulator tests for DES3 CBC, AES CBC/GCM.
* Added QAT standalone build for unit testing.
* Added int return code to SHA and MD5 functions.
* Refactor of the async stack variable handling, so async operations have generic args buffer area and cleanup function pointer.
* Combined duplicate code for async push/pop handling.
* Refactor internal.c to add AllocKey / FreeKey.
* Refactor of hash init/free in TLS to use InitHashes and FreeHashes.
* Refactor of the async event->context to use WOLF_EVENT_TYPE_ASYNC_WOLFSSL for WOLFSSL* and WOLF_EVENT_TYPE_ASYNC_WOLFCRYPT for WC_ASYNC_DEV*.
* Suppress error message for WC_PENDING_E.
* Implemented "wolfSSL_EVP_MD_CTX_init" to do memset.
* Cleanup of the openssl compat CTX sizes when async is enabled.
* Cleanup of AES, DES3, DH, SHA, MD5, DES3, DH, HMAC, MD5 for consistency and readability.
* Cleanup of the OPAQUE_LEN.
* Cleanup to use ENCRYPT_LEN instead of sizeof(ssl->arrays.preMasterSecret).
* Changed ssl->arrays.preMasterSecret to use XMALLOC (accelerates HW operations)
* Reduce verbosity with debug enabled for "GetMyVersion", "wolfSSL Using RSA OAEP padding" and "wolfSSL Using RSA PKCSV15 padding".
* Updated RSA un-padding error message so its different than one above it for better debugging.
* Added QAT async enables for each algorithm.
* Refactor of the async init to use _ex.
* Added WC_ASYNC_THRESH_NONE to allow bypass of the async thresholds for testing.
* Reformatted the benchmark results:
PKI: "RSA 2048 private HW 18522 ops took 1.003 sec, avg 0.054 ms, 18467.763 ops/sec"
Crypto/Hashing: SHA-256 SW 350 megs took 1.009 seconds, 346.946 MB/s Cycles per byte = 9.87
* Added min execution time for all benchmarks.
* Moved wc_*GetHash and wc_*RestorePos to appropriate files so use of isCopy flag is local.
* Fix for ECC sign status sometimes being invalid due to uninitialized ECC digest in benchmark.
* Added new DECLARE_VAR/FREE_VAR and DECLARE_ARRAY/FREE_ARRAY macros for helping setup test/benchmark variables to accelerate async.
* Added NO_SW_BENCH option to only run HW bench.
* Added support for PRNG to use hardware SHA256 if _wc devId provided.
* Fix to prevent curve tests from running against wrong curve sizes. Changed wc_ecc_set_curve to match on exact size.
* Added the wc_*GetHash calls to the wolfCrypt tests.
* Added async hardware start/stop to wolfSSL init/cleanup.
* Refactor to add wc_*Copy for hashing context (for async), which replaces wc_*RestorePos.
* Fixes for building with TI hashing (including: SHA224, missing new API’s and building with dummy build for non hw testing). Note: We need to add build test for this `./configure CFLAGS="-DWOLFSSL_TI_HASH -DTI_DUMMY_BUILD”`.
* Added arg checks on wc_*GetHash and wc_*Copy.
* Cleanup of the BuildMD5, BuildSHA, BuildMD5_CertVerify and BuildSHA_CertVerify functions.
* Added new ./configure --enable-asyncthreads, to allow enable/disable of the async threading support. If --enable-asynccrypt set this will be enabled by default if pthread is supported. Allows multi-threaded benchmarks with async simulator.
* Added checks for all hashing to verify valid ->buffLen.
* Fix for SHA512 scan-build warning about un-initialized “W_X”.
* Fix for valgrind un-initialized use of buffer in AllocDer (der->buffer) and BuildTlsFinished handshake_hash.
* Refactor of the benchmarking to use common function for start, check and finish of the stats.
* Fixed issue with ECC cache loading in multi-threading.
* Fix bug with AESNI not aligned code that assumes XMALLOC is 16-byte aligned.
* Added new WC_ASYNC_NO_… options to allow disabling of individual async algorithms. New defines are: WC_ASYNC_NO_CRYPT, WC_ASYNC_NO_PKI and WC_ASYNC_NO_HASH. Additionally each algorithm has a WC_ASYNC_NO_[ALGO] define.
* Added “wolfSSL_GetAllocators” API and fixed the wolfCrypt memcb_test so it restores callback pointers after test is complete (fixes issue with using custom allocators and test breaking it).
2017-04-10 14:45:05 -07:00

248 lines
6.1 KiB
C

/* misc.c
*
* Copyright (C) 2006-2016 wolfSSL Inc.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <wolfssl/wolfcrypt/settings.h>
#ifndef WOLF_CRYPT_MISC_C
#define WOLF_CRYPT_MISC_C
#include <wolfssl/wolfcrypt/misc.h>
/* inlining these functions is a huge speed increase and a small size decrease,
because the functions are smaller than function call setup/cleanup, e.g.,
md5 benchmark is twice as fast with inline. If you don't want it, then
define NO_INLINE and compile this file into wolfssl, otherwise it's used as
a source header
*/
#ifdef NO_INLINE
#define STATIC
#else
#define STATIC static
#endif
/* Check for if compiling misc.c when not needed. */
#if !defined(WOLFSSL_MISC_INCLUDED) && !defined(NO_INLINE)
#warning misc.c does not need to be compiled when using inline (NO_INLINE not defined)
#else
#if defined(__ICCARM__)
#include <intrinsics.h>
#endif
#ifdef INTEL_INTRINSICS
#include <stdlib.h> /* get intrinsic definitions */
/* for non visual studio probably need no long version, 32 bit only
* i.e., _rotl and _rotr */
#pragma intrinsic(_lrotl, _lrotr)
STATIC INLINE word32 rotlFixed(word32 x, word32 y)
{
return y ? _lrotl(x, y) : x;
}
STATIC INLINE word32 rotrFixed(word32 x, word32 y)
{
return y ? _lrotr(x, y) : x;
}
#else /* generic */
STATIC INLINE word32 rotlFixed(word32 x, word32 y)
{
return (x << y) | (x >> (sizeof(y) * 8 - y));
}
STATIC INLINE word32 rotrFixed(word32 x, word32 y)
{
return (x >> y) | (x << (sizeof(y) * 8 - y));
}
#endif
STATIC INLINE word32 ByteReverseWord32(word32 value)
{
#ifdef PPC_INTRINSICS
/* PPC: load reverse indexed instruction */
return (word32)__lwbrx(&value,0);
#elif defined(__ICCARM__)
return (word32)__REV(value);
#elif defined(KEIL_INTRINSICS)
return (word32)__rev(value);
#elif defined(FAST_ROTATE)
/* 5 instructions with rotate instruction, 9 without */
return (rotrFixed(value, 8U) & 0xff00ff00) |
(rotlFixed(value, 8U) & 0x00ff00ff);
#else
/* 6 instructions with rotate instruction, 8 without */
value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
return rotlFixed(value, 16U);
#endif
}
STATIC INLINE void ByteReverseWords(word32* out, const word32* in,
word32 byteCount)
{
word32 count = byteCount/(word32)sizeof(word32), i;
for (i = 0; i < count; i++)
out[i] = ByteReverseWord32(in[i]);
}
#ifdef WORD64_AVAILABLE
STATIC INLINE word64 rotlFixed64(word64 x, word64 y)
{
return (x << y) | (x >> (sizeof(y) * 8 - y));
}
STATIC INLINE word64 rotrFixed64(word64 x, word64 y)
{
return (x >> y) | (x << (sizeof(y) * 8 - y));
}
STATIC INLINE word64 ByteReverseWord64(word64 value)
{
#if defined(WOLFCRYPT_SLOW_WORD64)
return (word64)(ByteReverseWord32((word32)value)) << 32 |
ByteReverseWord32((word32)(value>>32));
#else
value = ((value & W64LIT(0xFF00FF00FF00FF00)) >> 8) |
((value & W64LIT(0x00FF00FF00FF00FF)) << 8);
value = ((value & W64LIT(0xFFFF0000FFFF0000)) >> 16) |
((value & W64LIT(0x0000FFFF0000FFFF)) << 16);
return rotlFixed64(value, 32U);
#endif
}
STATIC INLINE void ByteReverseWords64(word64* out, const word64* in,
word32 byteCount)
{
word32 count = byteCount/(word32)sizeof(word64), i;
for (i = 0; i < count; i++)
out[i] = ByteReverseWord64(in[i]);
}
#endif /* WORD64_AVAILABLE */
STATIC INLINE void XorWords(wolfssl_word* r, const wolfssl_word* a, word32 n)
{
word32 i;
for (i = 0; i < n; i++) r[i] ^= a[i];
}
STATIC INLINE void xorbuf(void* buf, const void* mask, word32 count)
{
if (((wolfssl_word)buf | (wolfssl_word)mask | count) % WOLFSSL_WORD_SIZE == 0)
XorWords( (wolfssl_word*)buf,
(const wolfssl_word*)mask, count / WOLFSSL_WORD_SIZE);
else {
word32 i;
byte* b = (byte*)buf;
const byte* m = (const byte*)mask;
for (i = 0; i < count; i++) b[i] ^= m[i];
}
}
/* Make sure compiler doesn't skip */
STATIC INLINE void ForceZero(const void* mem, word32 len)
{
volatile byte* z = (volatile byte*)mem;
#ifdef WOLFSSL_X86_64_BUILD
volatile word64* w;
for (w = (volatile word64*)z; len >= sizeof(*w); len -= sizeof(*w))
*w++ = 0;
z = (volatile byte*)w;
#endif
while (len--) *z++ = 0;
}
/* check all length bytes for equality, return 0 on success */
STATIC INLINE int ConstantCompare(const byte* a, const byte* b, int length)
{
int i;
int compareSum = 0;
for (i = 0; i < length; i++) {
compareSum |= a[i] ^ b[i];
}
return compareSum;
}
#ifndef WOLFSSL_HAVE_MIN
#define WOLFSSL_HAVE_MIN
#if defined(HAVE_FIPS) && !defined(min) /* so ifdef check passes */
#define min min
#endif
STATIC INLINE word32 min(word32 a, word32 b)
{
return a > b ? b : a;
}
#endif /* !WOLFSSL_HAVE_MIN */
#ifndef WOLFSSL_HAVE_MAX
#define WOLFSSL_HAVE_MAX
#if defined(HAVE_FIPS) && !defined(max) /* so ifdef check passes */
#define max max
#endif
STATIC INLINE word32 max(word32 a, word32 b)
{
return a > b ? a : b;
}
#endif /* !WOLFSSL_HAVE_MAX */
#undef STATIC
#endif /* !WOLFSSL_MISC_INCLUDED && !NO_INLINE */
#endif /* WOLF_CRYPT_MISC_C */