mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-08-01 03:34:39 +02:00
linuxkm: enable the rest of the _asm implementations for x86, wrapped in {SAVE,RESTORE}_VECTOR_REGISTERS().
This commit is contained in:
@@ -64,18 +64,38 @@ $(obj)/wolfcrypt/test/test.o: ccflags-y += -DNO_MAIN_DRIVER
|
||||
|
||||
$(obj)/wolfcrypt/src/aes.o: ccflags-y = $(WOLFSSL_CFLAGS) $(WOLFSSL_CFLAGS_YES_VECTOR_INSNS)
|
||||
|
||||
asflags-y := $(WOLFSSL_ASFLAGS)
|
||||
asflags-y := $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPUSIMD_DISABLE)
|
||||
|
||||
# these two _asms are kernel-compatible (they don't reference the pic-related _GLOBAL_OFFSET_TABLE_)
|
||||
# but they still irritate objtool: "unannotated intra-function call" and "BP used as a scratch register"
|
||||
# after the C wrapper for a vectorized algorithm has been equipped with {SAVE,RESTORE}_VECTOR_REGISTERS(),
|
||||
# it can be safely included here:
|
||||
$(obj)/wolfcrypt/src/aes_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
$(obj)/wolfcrypt/src/aes_gcm_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
$(obj)/wolfcrypt/src/chacha_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
$(obj)/wolfcrypt/src/poly1305_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
$(obj)/wolfcrypt/src/sha256_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
$(obj)/wolfcrypt/src/sha512_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
$(obj)/wolfcrypt/src/fe_x25519_asm.o: asflags-y = $(WOLFSSL_ASFLAGS) $(ASFLAGS_FPU_DISABLE_SIMD_ENABLE)
|
||||
|
||||
# these _asms are kernel-compatible, but they still irritate objtool:
|
||||
$(obj)/wolfcrypt/src/aes_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/aes_gcm_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/chacha_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/poly1305_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/sha256_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/sha512_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
$(obj)/wolfcrypt/src/fe_x25519_asm.o: OBJECT_FILES_NON_STANDARD := y
|
||||
|
||||
# auto-generate the exported symbol list, leveraging the WOLFSSL_API visibility tags.
|
||||
# exclude symbols that don't match wc_* or wolf*.
|
||||
$(src)/linuxkm/module_exports.c: $(src)/linuxkm/module_exports.c.template $(WOLFSSL_OBJ_TARGETS)
|
||||
@cp $< $@
|
||||
@readelf --symbols --wide $(WOLFSSL_OBJ_TARGETS) | awk '/^ *[0-9]+: /{if ($$8 !~ /^(wc_|wolf)/){next;} if (($$4 == "FUNC") && ($$5 == "GLOBAL") && ($$6 == "DEFAULT")) { print "EXPORT_SYMBOL(" $$8 ");"; }}' >> $@
|
||||
@readelf --symbols --wide $(WOLFSSL_OBJ_TARGETS) | \
|
||||
awk '/^ *[0-9]+: / { \
|
||||
if ($$8 !~ /^(wc_|wolf)/){next;} \
|
||||
if (($$4 == "FUNC") && ($$5 == "GLOBAL") && ($$6 == "DEFAULT")) { \
|
||||
print "EXPORT_SYMBOL(" $$8 ");"; \
|
||||
} \
|
||||
}' >> $@
|
||||
@echo -e '#ifndef NO_CRYPT_TEST\nEXPORT_SYMBOL(wolfcrypt_test);\n#endif' >> $@
|
||||
|
||||
clean-files := module_exports.c
|
||||
|
@@ -339,12 +339,16 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
|
||||
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(cpuidFlags)) {
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
chacha_encrypt_avx2(ctx, input, output, msglen);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
if (IS_INTEL_AVX1(cpuidFlags)) {
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
chacha_encrypt_avx1(ctx, input, output, msglen);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
|
@@ -85,7 +85,16 @@ int wc_curve25519_make_pub(int public_size, byte* pub, int private_size,
|
||||
}
|
||||
#else
|
||||
fe_init();
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
|
||||
ret = curve25519(pub, priv, kCurve25519BasePoint);
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
@@ -148,7 +157,15 @@ int wc_curve25519_shared_secret_ex(curve25519_key* private_key,
|
||||
#ifdef FREESCALE_LTC_ECC
|
||||
ret = nxp_ltc_curve25519(&o, private_key->k.point, &public_key->p, kLTC_Curve25519 /* input point P on Curve25519 */);
|
||||
#else
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
|
||||
ret = curve25519(o, private_key->k.point, public_key->p.point);
|
||||
|
||||
#if defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#endif
|
||||
#endif
|
||||
if (ret != 0) {
|
||||
#ifdef FREESCALE_LTC_ECC
|
||||
|
@@ -267,7 +267,9 @@ static void poly1305_blocks(Poly1305* ctx, const unsigned char *m,
|
||||
{
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
/* AVX2 is handled in wc_Poly1305Update. */
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
poly1305_blocks_avx(ctx, m, bytes);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#elif defined(POLY130564)
|
||||
const word64 hibit = (ctx->finished) ? 0 : ((word64)1 << 40); /* 1 << 128 */
|
||||
word64 r0,r1,r2;
|
||||
@@ -394,7 +396,9 @@ static void poly1305_block(Poly1305* ctx, const unsigned char *m)
|
||||
{
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
/* No call to poly1305_block when AVX2, AVX2 does 4 blocks at a time. */
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
poly1305_block_avx(ctx, m);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#else
|
||||
poly1305_blocks(ctx, m, POLY1305_BLOCK_SIZE);
|
||||
#endif
|
||||
@@ -430,12 +434,14 @@ int wc_Poly1305SetKey(Poly1305* ctx, const byte* key, word32 keySz)
|
||||
intel_flags = cpuid_get_flags();
|
||||
cpu_flags_set = 1;
|
||||
}
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(intel_flags))
|
||||
poly1305_setkey_avx2(ctx, key);
|
||||
else
|
||||
#endif
|
||||
poly1305_setkey_avx(ctx, key);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#elif defined(POLY130564)
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
@@ -510,12 +516,14 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
||||
return BAD_FUNC_ARG;
|
||||
|
||||
#ifdef USE_INTEL_SPEEDUP
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
#ifdef HAVE_INTEL_AVX2
|
||||
if (IS_INTEL_AVX2(intel_flags))
|
||||
poly1305_final_avx2(ctx, mac);
|
||||
else
|
||||
#endif
|
||||
poly1305_final_avx(ctx, mac);
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
#elif defined(POLY130564)
|
||||
|
||||
/* process the remaining block */
|
||||
@@ -712,11 +720,13 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
|
||||
if (ctx->leftover < sizeof(ctx->buffer))
|
||||
return 0;
|
||||
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
if (!ctx->started)
|
||||
poly1305_calc_powers_avx2(ctx);
|
||||
poly1305_blocks_avx2(ctx, ctx->buffer, sizeof(ctx->buffer));
|
||||
ctx->leftover = 0;
|
||||
}
|
||||
} else
|
||||
SAVE_VECTOR_REGISTERS();
|
||||
|
||||
/* process full blocks */
|
||||
if (bytes >= sizeof(ctx->buffer)) {
|
||||
@@ -735,6 +745,7 @@ int wc_Poly1305Update(Poly1305* ctx, const byte* m, word32 bytes)
|
||||
ctx->buffer[ctx->leftover + i] = m[i];
|
||||
ctx->leftover += bytes;
|
||||
}
|
||||
RESTORE_VECTOR_REGISTERS();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
@@ -309,9 +309,26 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
/* = NULL */
|
||||
static int transform_check = 0;
|
||||
static word32 intel_flags;
|
||||
static int Transform_Sha256_is_vectorized = 0;
|
||||
|
||||
#define XTRANSFORM(S, D) (*Transform_Sha256_p)((S),(D))
|
||||
#define XTRANSFORM_LEN(S, D, L) (*Transform_Sha256_Len_p)((S),(D),(L))
|
||||
#define XTRANSFORM(S, D) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha256_p)((S),(D)); \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
#define XTRANSFORM_LEN(S, D, L) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha256_Len_p)((S),(D),(L)); \
|
||||
if (Transform_Sha256_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static void Sha256_SetTransform(void)
|
||||
{
|
||||
@@ -327,6 +344,7 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
if (IS_INTEL_BMI2(intel_flags)) {
|
||||
Transform_Sha256_p = Transform_Sha256_AVX2_RORX;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX2_RORX_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -334,11 +352,13 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
{
|
||||
Transform_Sha256_p = Transform_Sha256_AVX2;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX2_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
#ifdef HAVE_INTEL_RORX
|
||||
else {
|
||||
Transform_Sha256_p = Transform_Sha256_AVX1_RORX;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX1_RORX_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -348,12 +368,14 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
if (IS_INTEL_AVX1(intel_flags)) {
|
||||
Transform_Sha256_p = Transform_Sha256_AVX1;
|
||||
Transform_Sha256_Len_p = Transform_Sha256_AVX1_Len;
|
||||
Transform_Sha256_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
Transform_Sha256_p = Transform_Sha256;
|
||||
Transform_Sha256_Len_p = NULL;
|
||||
Transform_Sha256_is_vectorized = 0;
|
||||
}
|
||||
|
||||
transform_check = 1;
|
||||
|
@@ -337,9 +337,31 @@ static int InitSha512(wc_Sha512* sha512)
|
||||
static int (*Transform_Sha512_Len_p)(wc_Sha512* sha512, word32 len) = NULL;
|
||||
static int transform_check = 0;
|
||||
static int intel_flags;
|
||||
static int Transform_Sha512_is_vectorized = 0;
|
||||
#if 0
|
||||
#define Transform_Sha512(sha512) (*Transform_Sha512_p)(sha512)
|
||||
#define Transform_Sha512_Len(sha512, len) \
|
||||
(*Transform_Sha512_Len_p)(sha512, len)
|
||||
#endif
|
||||
|
||||
#define Transform_Sha512(sha512) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha512_p)(sha512); \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
#define Transform_Sha512_Len(sha512, len) ({ \
|
||||
int _ret; \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
SAVE_VECTOR_REGISTERS(); \
|
||||
_ret = (*Transform_Sha512_Len_p)(sha512, len); \
|
||||
if (Transform_Sha512_is_vectorized) \
|
||||
RESTORE_VECTOR_REGISTERS(); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static void Sha512_SetTransform(void)
|
||||
{
|
||||
@@ -354,17 +376,20 @@ static int InitSha512(wc_Sha512* sha512)
|
||||
if (IS_INTEL_BMI2(intel_flags)) {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX2_RORX;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX2_RORX_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (1) {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX2;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX2_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
#ifdef HAVE_INTEL_RORX
|
||||
else {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX1_RORX;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX1_RORX_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -374,10 +399,14 @@ static int InitSha512(wc_Sha512* sha512)
|
||||
if (IS_INTEL_AVX1(intel_flags)) {
|
||||
Transform_Sha512_p = Transform_Sha512_AVX1;
|
||||
Transform_Sha512_Len_p = Transform_Sha512_AVX1_Len;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
Transform_Sha512_p = _Transform_Sha512;
|
||||
Transform_Sha512_is_vectorized = 1;
|
||||
}
|
||||
|
||||
transform_check = 1;
|
||||
}
|
||||
|
@@ -88,12 +88,21 @@
|
||||
#endif
|
||||
#include <linux/net.h>
|
||||
#include <linux/slab.h>
|
||||
#if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP) || defined(WOLFSSL_ARMASM)
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
|
||||
#include <asm/i387.h>
|
||||
#else
|
||||
#include <asm/simd.h>
|
||||
#endif
|
||||
#if defined(WOLFSSL_AESNI) || defined(USE_INTEL_SPEEDUP)
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0)
|
||||
#include <asm/i387.h>
|
||||
#else
|
||||
#include <asm/simd.h>
|
||||
#endif
|
||||
#define SAVE_VECTOR_REGISTERS() kernel_fpu_begin()
|
||||
#define RESTORE_VECTOR_REGISTERS() kernel_fpu_end()
|
||||
#elif defined(WOLFSSL_ARMASM)
|
||||
#include <asm/fpsimd.h>
|
||||
#define SAVE_VECTOR_REGISTERS() ({ preempt_disable(); fpsimd_preserve_current_state(); })
|
||||
#define SAVE_VECTOR_REGISTERS() ({ fpsimd_restore_current_state(); preempt_enable(); })
|
||||
#else
|
||||
#define SAVE_VECTOR_REGISTERS() ({})
|
||||
#define RESTORE_VECTOR_REGISTERS() ({})
|
||||
#endif
|
||||
_Pragma("GCC diagnostic pop");
|
||||
|
||||
@@ -122,13 +131,10 @@
|
||||
/* the rigmarole around kstrtol() here is to accommodate its warn-unused-result attribute. */
|
||||
#define XATOI(s) ({ long _xatoi_res = 0; int _xatoi_ret = kstrtol(s, 10, &_xatoi_res); if (_xatoi_ret != 0) { _xatoi_res = 0; } (int)_xatoi_res; })
|
||||
|
||||
#define SAVE_VECTOR_REGISTERS() kernel_fpu_begin()
|
||||
#define RESTORE_VECTOR_REGISTERS() kernel_fpu_end()
|
||||
|
||||
#else /* ! WOLFSSL_LINUXKM */
|
||||
|
||||
#define SAVE_VECTOR_REGISTERS()
|
||||
#define RESTORE_VECTOR_REGISTERS()
|
||||
#define SAVE_VECTOR_REGISTERS() ({})
|
||||
#define RESTORE_VECTOR_REGISTERS() ({})
|
||||
|
||||
#endif /* WOLFSSL_LINUXKM */
|
||||
|
||||
|
Reference in New Issue
Block a user