mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-01-29 12:22:11 +01:00
linuxkm: enable the rest of the _asm implementations for x86, wrapped in {SAVE,RESTORE}_VECTOR_REGISTERS().
This commit is contained in:
@@ -339,12 +339,16 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(cpuidFlags)) {
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
chacha_encrypt_avx2(ctx, input, output, msglen);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
if (IS_INTEL_AVX1(cpuidFlags)) {
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
chacha_encrypt_avx1(ctx, input, output, msglen);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -85,7 +85,16 @@ int wc_curve25519_make_pub(int public_size, byte* pub, int private_size,
|
||||
}
|
||||
#else
|
||||
fe_init();
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
|
||||
ret = curve25519(pub, priv, kCurve25519BasePoint);
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
@@ -148,7 +157,15 @@ int wc_curve25519_shared_secret_ex(curve25519_key* private_key,
|
||||
#ifdef FREESCALE_LTC_ECC
|
||||
ret = nxp_ltc_curve25519(&o, private_key->k.point, &public_key->p, kLTC_Curve25519 /* input point P on Curve25519 */);
|
||||
#else
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
|
||||
ret = curve25519(o, private_key->k.point, public_key->p.point);
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
#endif
|
||||
if (ret != 0) {
|
||||
#ifdef FREESCALE_LTC_ECC
|
||||
|
||||
@@ -267,7 +267,9 @@ static void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
|
||||
{
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
/* AVX2 is handled in wc_Poly1305Update. */
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
poly1305_blocks_avx(ctx, m, bytes);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#elif defined(POLY130564)
|
||||
const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */
|
||||
word64 r0,r1,r2;
|
||||
@@ -394,7 +396,9 @@ static void poly1305_block(Poly1305* ctx, const unsigned char *m)
|
||||
{
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
/* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
poly1305_block_avx(ctx, m);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#else
|
||||
poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE);
|
||||
#endif
|
||||
@@ -430,12 +434,14 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
|
||||
intel_flags = cpuid_get_flags();
|
||||
cpu_flags_set = 1;
|
||||
}
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(intel_flags))
|
||||
poly1305_setkey_avx2(ctx, key);
|
||||
else
|
||||
#endif
|
||||
poly1305_setkey_avx(ctx, key);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#elif defined(POLY130564)
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
@@ -510,12 +516,14 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(intel_flags))
|
||||
poly1305_final_avx2(ctx, mac);
|
||||
else
|
||||
#endif
|
||||
poly1305_final_avx(ctx, mac);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#elif defined(POLY130564)
|
||||
|
||||
/* process the remaining block */
|
||||
@@ -712,11 +720,13 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
|
||||
if (ctx->leftover < sizeof(ctx->buffer))
|
||||
return 0;
|
||||
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
if (!ctx->started)
|
||||
poly1305_calc_powers_avx2(ctx);
|
||||
poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
|
||||
ctx->leftover = 0;
|
||||
}
|
||||
} else
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
|
||||
/* process full blocks */
|
||||
if (bytes >= sizeof(ctx->buffer)) {
|
||||
@@ -735,6 +745,7 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
|
||||
ctx->buffer[ctx->leftover + i] = m[i];
|
||||
ctx->leftover += bytes;
|
||||
}
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
@@ -309,9 +309,26 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
/* = NULL */
|
||||
static int transform_check = 0;
|
||||
static word32 intel_flags;
|
||||
static int Transform_Sha256_is_vectorized = 0;
|
||||
|
||||
#define XTRANSFORM(S, D) (*Transform_Sha256_p)((S),(D))
|
||||
#define XTRANSFORM_LEN(S, D, L) (*Transform_Sha256_Len_p)((S),(D),(L))
|
||||
#define XTRANSFORM(S, D) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha256_p)((S),(D)); \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
#define XTRANSFORM_LEN(S, D, L) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha256_Len_p)((S),(D),(L)); \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static void Sha256_SetTransform(void)
|
||||
{
|
||||
@@ -327,6 +344,7 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
if (IS_INTEL_BMI2(intel_flags)) {
|
||||
Transform_Sha256_p = Transform_Sha256_AVX2_RORX;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX2_RORX_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -334,11 +352,13 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
{
|
||||
Transform_Sha256_p = Transform_Sha256_AVX2;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX2_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
#ifdef HAVE_INTEL_RORX
|
||||
else {
|
||||
Transform_Sha256_p = Transform_Sha256_AVX1_RORX;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX1_RORX_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -348,12 +368,14 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
if (IS_INTEL_AVX1(intel_flags)) {
|
||||
Transform_Sha256_p = Transform_Sha256_AVX1;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX1_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
Transform_Sha256_p = Transform_Sha256;
|
||||
Transform_Sha256_Len_p = NULL;
|
||||
Transform_Sha256_is_vectorized = 0;
|
||||
}
|
||||
|
||||
transform_check = 1;
|
||||
|
||||
@@ -337,9 +337,31 @@ static int InitSha512(wc_Sha512* sha512)
|
||||
static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL;
|
||||
static int transform_check = 0;
|
||||
static int intel_flags;
|
||||
static int Transform_Sha512_is_vectorized = 0;
|
||||
#if 0
|
||||
#define Transform_Sha512(sha512) (*Transform_Sha512_p)(sha512)
|
||||
#define Transform_Sha512_Len(sha512, len) \
|
||||
(*Transform_Sha512_Len_p)(sha512, len)
|
||||
#endif
|
||||
|
||||
#define Transform_Sha512(sha512) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha512_p)(sha512); \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
#define Transform_Sha512_Len(sha512, len) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha512_Len_p)(sha512, len); \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static void Sha512_SetTransform(void)
|
||||
{
|
||||
@@ -354,17 +376,20 @@ static int InitSha512(wc_Sha512* sha512)
|
||||
if (IS_INTEL_BMI2(intel_flags)) {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX2_RORX;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX2_RORX_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (1) {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX2;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX2_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
#ifdef HAVE_INTEL_RORX
|
||||
else {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX1_RORX;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX1_RORX_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -374,10 +399,14 @@ static int InitSha512(wc_Sha512* sha512)
|
||||
if (IS_INTEL_AVX1(intel_flags)) {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX1;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX1_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
Transform_Sha512_p = _Transform_Sha512;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
|
||||
transform_check = 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user