mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-07-29 18:27:29 +02:00
Kyber: Improve performance
Unroll loops and use larger types. Allow benchmark to run each kyber parameter separately. Allow benchmark to have -ml-dsa specified which runs all parameters. Fix thumb2 ASM C code to not have duplicate includes and ifdef checks. Fix thumb2 ASM C code to include error-crypt.h to ensure no empty translation unit. Check for WOLFSSL_SHA3 before including Thumb2 SHA-3 assembly code.
This commit is contained in:
@ -654,7 +654,6 @@
|
||||
#define BENCH_RSA 0x00000002
|
||||
#define BENCH_RSA_SZ 0x00000004
|
||||
#define BENCH_DH 0x00000010
|
||||
#define BENCH_KYBER 0x00000020
|
||||
#define BENCH_ECC_MAKEKEY 0x00001000
|
||||
#define BENCH_ECC 0x00002000
|
||||
#define BENCH_ECC_ENCRYPT 0x00004000
|
||||
@ -681,11 +680,22 @@
|
||||
#define BENCH_SAKKE 0x80000000
|
||||
|
||||
/* Post-Quantum Asymmetric algorithms. */
|
||||
#define BENCH_KYBER512 0x00000020
|
||||
#define BENCH_KYBER768 0x00000040
|
||||
#define BENCH_KYBER1024 0x00000080
|
||||
#define BENCH_KYBER (BENCH_KYBER512 | BENCH_KYBER768 | \
|
||||
BENCH_KYBER1024)
|
||||
#define BENCH_FALCON_LEVEL1_SIGN 0x00000001
|
||||
#define BENCH_FALCON_LEVEL5_SIGN 0x00000002
|
||||
#define BENCH_DILITHIUM_LEVEL2_SIGN 0x04000000
|
||||
#define BENCH_DILITHIUM_LEVEL3_SIGN 0x08000000
|
||||
#define BENCH_DILITHIUM_LEVEL5_SIGN 0x10000000
|
||||
#define BENCH_ML_DSA_44_SIGN 0x04000000
|
||||
#define BENCH_ML_DSA_65_SIGN 0x08000000
|
||||
#define BENCH_ML_DSA_87_SIGN 0x10000000
|
||||
#define BENCH_ML_DSA_SIGN (BENCH_ML_DSA_44_SIGN | \
|
||||
BENCH_ML_DSA_65_SIGN | \
|
||||
BENCH_ML_DSA_87_SIGN)
|
||||
|
||||
/* Post-Quantum Asymmetric algorithms. (Part 2) */
|
||||
#define BENCH_SPHINCS_FAST_LEVEL1_SIGN 0x00000001
|
||||
@ -959,9 +969,6 @@ static const bench_alg bench_asym_opt[] = {
|
||||
#ifndef NO_DH
|
||||
{ "-dh", BENCH_DH },
|
||||
#endif
|
||||
#ifdef WOLFSSL_HAVE_KYBER
|
||||
{ "-kyber", BENCH_KYBER },
|
||||
#endif
|
||||
#ifdef HAVE_ECC
|
||||
{ "-ecc-kg", BENCH_ECC_MAKEKEY },
|
||||
{ "-ecc", BENCH_ECC },
|
||||
@ -1060,7 +1067,8 @@ static const bench_pq_hash_sig_alg bench_pq_hash_sig_opt[] = {
|
||||
};
|
||||
#endif /* BENCH_PQ_STATEFUL_HBS */
|
||||
|
||||
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
|
||||
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
|
||||
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
|
||||
/* The post-quantum-specific mapping of command line option to bit values and
|
||||
* OQS name. */
|
||||
typedef struct bench_pq_alg {
|
||||
@ -1073,18 +1081,25 @@ typedef struct bench_pq_alg {
|
||||
/* All recognized post-quantum asymmetric algorithm choosing command line
|
||||
* options. */
|
||||
static const bench_pq_alg bench_pq_asym_opt[] = {
|
||||
{ "-pq", 0xffffffff },
|
||||
{ "-pq", 0xffffffff },
|
||||
#ifdef WOLFSSL_HAVE_KYBER
|
||||
{ "-kyber", BENCH_KYBER },
|
||||
{ "-kyber512", BENCH_KYBER512 },
|
||||
{ "-kyber768", BENCH_KYBER768 },
|
||||
{ "-kyber1024", BENCH_KYBER1024 },
|
||||
#endif
|
||||
#if defined(HAVE_FALCON)
|
||||
{ "-falcon_level1", BENCH_FALCON_LEVEL1_SIGN },
|
||||
{ "-falcon_level5", BENCH_FALCON_LEVEL5_SIGN },
|
||||
{ "-falcon_level1", BENCH_FALCON_LEVEL1_SIGN },
|
||||
{ "-falcon_level5", BENCH_FALCON_LEVEL5_SIGN },
|
||||
#endif
|
||||
#if defined(HAVE_DILITHIUM)
|
||||
{ "-dilithium_level2", BENCH_DILITHIUM_LEVEL2_SIGN },
|
||||
{ "-dilithium_level3", BENCH_DILITHIUM_LEVEL3_SIGN },
|
||||
{ "-dilithium_level5", BENCH_DILITHIUM_LEVEL5_SIGN },
|
||||
{ "-ml-dsa-44", BENCH_DILITHIUM_LEVEL2_SIGN },
|
||||
{ "-ml-dsa-65", BENCH_DILITHIUM_LEVEL3_SIGN },
|
||||
{ "-ml-dsa-87", BENCH_DILITHIUM_LEVEL5_SIGN },
|
||||
{ "-dilithium_level2", BENCH_DILITHIUM_LEVEL2_SIGN },
|
||||
{ "-dilithium_level3", BENCH_DILITHIUM_LEVEL3_SIGN },
|
||||
{ "-dilithium_level5", BENCH_DILITHIUM_LEVEL5_SIGN },
|
||||
{ "-ml-dsa", BENCH_ML_DSA_SIGN },
|
||||
{ "-ml-dsa-44", BENCH_ML_DSA_44_SIGN },
|
||||
{ "-ml-dsa-65", BENCH_ML_DSA_65_SIGN },
|
||||
{ "-ml-dsa-87", BENCH_ML_DSA_87_SIGN },
|
||||
#endif
|
||||
{ NULL, 0 }
|
||||
};
|
||||
@ -3576,15 +3591,21 @@ static void* benchmarks_do(void* args)
|
||||
#endif
|
||||
|
||||
#ifdef WOLFSSL_HAVE_KYBER
|
||||
if (bench_all || (bench_asym_algs & BENCH_KYBER)) {
|
||||
if (bench_all || (bench_pq_asym_algs & BENCH_KYBER)) {
|
||||
#ifdef WOLFSSL_KYBER512
|
||||
bench_kyber(KYBER512);
|
||||
if (bench_pq_asym_algs & BENCH_KYBER512) {
|
||||
bench_kyber(KYBER512);
|
||||
}
|
||||
#endif
|
||||
#ifdef WOLFSSL_KYBER768
|
||||
bench_kyber(KYBER768);
|
||||
if (bench_pq_asym_algs & BENCH_KYBER768) {
|
||||
bench_kyber(KYBER768);
|
||||
}
|
||||
#endif
|
||||
#ifdef WOLFSSL_KYBER1024
|
||||
bench_kyber(KYBER1024);
|
||||
if (bench_pq_asym_algs & BENCH_KYBER1024) {
|
||||
bench_kyber(KYBER1024);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
@ -14523,7 +14544,8 @@ static void Usage(void)
|
||||
print_alg(bench_asym_opt[i].str, &line);
|
||||
for (i=0; bench_other_opt[i].str != NULL; i++)
|
||||
print_alg(bench_other_opt[i].str, &line);
|
||||
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
|
||||
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
|
||||
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
|
||||
for (i=0; bench_pq_asym_opt[i].str != NULL; i++)
|
||||
print_alg(bench_pq_asym_opt[i].str, &line);
|
||||
#if defined(HAVE_SPHINCS)
|
||||
@ -14799,8 +14821,8 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
|
||||
optMatched = 1;
|
||||
}
|
||||
}
|
||||
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || \
|
||||
defined(HAVE_SPHINCS)
|
||||
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
|
||||
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
|
||||
/* Known asymmetric post-quantum algorithms */
|
||||
for (i=0; !optMatched && bench_pq_asym_opt[i].str != NULL; i++) {
|
||||
if (string_matches(argv[1], bench_pq_asym_opt[i].str)) {
|
||||
|
@ -32,6 +32,8 @@
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.type L_sha3_arm2_neon_rt, %object
|
||||
.size L_sha3_arm2_neon_rt, 192
|
||||
@ -85,60 +87,6 @@ L_sha3_arm2_neon_rt:
|
||||
.word 0x0
|
||||
.word 0x80008008
|
||||
.word 0x80000000
|
||||
.text
|
||||
.type L_sha3_arm2_rt, %object
|
||||
.size L_sha3_arm2_rt, 192
|
||||
.align 4
|
||||
L_sha3_arm2_rt:
|
||||
.word 0x1
|
||||
.word 0x0
|
||||
.word 0x8082
|
||||
.word 0x0
|
||||
.word 0x808a
|
||||
.word 0x80000000
|
||||
.word 0x80008000
|
||||
.word 0x80000000
|
||||
.word 0x808b
|
||||
.word 0x0
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8009
|
||||
.word 0x80000000
|
||||
.word 0x8a
|
||||
.word 0x0
|
||||
.word 0x88
|
||||
.word 0x0
|
||||
.word 0x80008009
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x0
|
||||
.word 0x8000808b
|
||||
.word 0x0
|
||||
.word 0x8b
|
||||
.word 0x80000000
|
||||
.word 0x8089
|
||||
.word 0x80000000
|
||||
.word 0x8003
|
||||
.word 0x80000000
|
||||
.word 0x8002
|
||||
.word 0x80000000
|
||||
.word 0x80
|
||||
.word 0x80000000
|
||||
.word 0x800a
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x80000000
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8080
|
||||
.word 0x80000000
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008008
|
||||
.word 0x80000000
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.align 4
|
||||
.globl BlockSha3
|
||||
@ -407,6 +355,59 @@ L_sha3_arm32_neon_begin:
|
||||
.size BlockSha3,.-BlockSha3
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
.text
|
||||
.type L_sha3_arm2_rt, %object
|
||||
.size L_sha3_arm2_rt, 192
|
||||
.align 4
|
||||
L_sha3_arm2_rt:
|
||||
.word 0x1
|
||||
.word 0x0
|
||||
.word 0x8082
|
||||
.word 0x0
|
||||
.word 0x808a
|
||||
.word 0x80000000
|
||||
.word 0x80008000
|
||||
.word 0x80000000
|
||||
.word 0x808b
|
||||
.word 0x0
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8009
|
||||
.word 0x80000000
|
||||
.word 0x8a
|
||||
.word 0x0
|
||||
.word 0x88
|
||||
.word 0x0
|
||||
.word 0x80008009
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x0
|
||||
.word 0x8000808b
|
||||
.word 0x0
|
||||
.word 0x8b
|
||||
.word 0x80000000
|
||||
.word 0x8089
|
||||
.word 0x80000000
|
||||
.word 0x8003
|
||||
.word 0x80000000
|
||||
.word 0x8002
|
||||
.word 0x80000000
|
||||
.word 0x80
|
||||
.word 0x80000000
|
||||
.word 0x800a
|
||||
.word 0x0
|
||||
.word 0x8000000a
|
||||
.word 0x80000000
|
||||
.word 0x80008081
|
||||
.word 0x80000000
|
||||
.word 0x8080
|
||||
.word 0x80000000
|
||||
.word 0x80000001
|
||||
.word 0x0
|
||||
.word 0x80008008
|
||||
.word 0x80000000
|
||||
.text
|
||||
.align 4
|
||||
.globl BlockSha3
|
||||
@ -2391,6 +2392,7 @@ L_sha3_arm32_begin:
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
.size BlockSha3,.-BlockSha3
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
|
@ -51,6 +51,8 @@
|
||||
#define __asm__ __asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __KEIL__ */
|
||||
#ifdef WOLFSSL_SHA3
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_sha3_arm2_neon_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
@ -66,29 +68,12 @@ static const uint64_t L_sha3_arm2_neon_rt[] = {
|
||||
0x0000000080000001UL, 0x8000000080008008UL,
|
||||
};
|
||||
|
||||
static const uint64_t L_sha3_arm2_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
0x000000000000808bUL, 0x0000000080000001UL,
|
||||
0x8000000080008081UL, 0x8000000000008009UL,
|
||||
0x000000000000008aUL, 0x0000000000000088UL,
|
||||
0x0000000080008009UL, 0x000000008000000aUL,
|
||||
0x000000008000808bUL, 0x800000000000008bUL,
|
||||
0x8000000000008089UL, 0x8000000000008003UL,
|
||||
0x8000000000008002UL, 0x8000000000000080UL,
|
||||
0x000000000000800aUL, 0x800000008000000aUL,
|
||||
0x8000000080008081UL, 0x8000000000008080UL,
|
||||
0x0000000080000001UL, 0x8000000080008008UL,
|
||||
};
|
||||
|
||||
#include <wolfssl/wolfcrypt/sha3.h>
|
||||
|
||||
#ifndef WOLFSSL_ARMASM_NO_NEON
|
||||
void BlockSha3(word64* state_p)
|
||||
{
|
||||
register word64* state asm ("r0") = (word64*)state_p;
|
||||
register uint64_t* L_sha3_arm2_neon_rt_c asm ("r1") = (uint64_t*)&L_sha3_arm2_neon_rt;
|
||||
register uint64_t* L_sha3_arm2_rt_c asm ("r2") = (uint64_t*)&L_sha3_arm2_rt;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"sub sp, sp, #16\n\t"
|
||||
@ -348,16 +333,31 @@ void BlockSha3(word64* state_p)
|
||||
"vst1.8 {d20-d23}, [%[state]]!\n\t"
|
||||
"vst1.8 {d24}, [%[state]]\n\t"
|
||||
"add sp, sp, #16\n\t"
|
||||
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c)
|
||||
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c)
|
||||
:
|
||||
: "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
|
||||
: "memory", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
static const uint64_t L_sha3_arm2_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
0x000000000000808bUL, 0x0000000080000001UL,
|
||||
0x8000000080008081UL, 0x8000000000008009UL,
|
||||
0x000000000000008aUL, 0x0000000000000088UL,
|
||||
0x0000000080008009UL, 0x000000008000000aUL,
|
||||
0x000000008000808bUL, 0x800000000000008bUL,
|
||||
0x8000000000008089UL, 0x8000000000008003UL,
|
||||
0x8000000000008002UL, 0x8000000000000080UL,
|
||||
0x000000000000800aUL, 0x800000008000000aUL,
|
||||
0x8000000080008081UL, 0x8000000000008080UL,
|
||||
0x0000000080000001UL, 0x8000000080008008UL,
|
||||
};
|
||||
|
||||
#include <wolfssl/wolfcrypt/sha3.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM_NO_NEON
|
||||
void BlockSha3(word64* state_p)
|
||||
{
|
||||
register word64* state asm ("r0") = (word64*)state_p;
|
||||
@ -2348,6 +2348,7 @@ void BlockSha3(word64* state_p)
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_NO_NEON */
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */
|
||||
|
@ -28,19 +28,12 @@
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
@ -3056,7 +3049,4 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long
|
||||
#endif /* !NO_AES */
|
||||
#endif /* !__aarch64__ && __thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
||||
|
@ -28,19 +28,12 @@
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
@ -6904,7 +6897,4 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
|
||||
#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
|
||||
#endif /* !__aarch64__ && __thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
||||
|
@ -28,19 +28,12 @@
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
@ -1472,7 +1465,4 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len)
|
||||
#endif /* !NO_SHA256 */
|
||||
#endif /* !__aarch64__ && __thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
||||
|
@ -34,6 +34,7 @@
|
||||
#ifndef WOLFSSL_ARMASM_INLINE
|
||||
.thumb
|
||||
.syntax unified
|
||||
#ifdef WOLFSSL_SHA3
|
||||
.text
|
||||
.type L_sha3_thumb2_rt, %object
|
||||
.size L_sha3_thumb2_rt, 192
|
||||
@ -1165,6 +1166,7 @@ L_sha3_thumb2_begin:
|
||||
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
/* Cycle Count = 1505 */
|
||||
.size BlockSha3,.-BlockSha3
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
#endif /* !__aarch64__ && __thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
|
@ -28,19 +28,12 @@
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
@ -50,6 +43,7 @@
|
||||
#define __asm__ __asm
|
||||
#define __volatile__ volatile
|
||||
#endif /* __KEIL__ */
|
||||
#ifdef WOLFSSL_SHA3
|
||||
static const uint64_t L_sha3_thumb2_rt[] = {
|
||||
0x0000000000000001UL, 0x0000000000008082UL,
|
||||
0x800000000000808aUL, 0x8000000080008000UL,
|
||||
@ -1162,9 +1156,7 @@ void BlockSha3(word64* state)
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* WOLFSSL_SHA3 */
|
||||
#endif /* !__aarch64__ && __thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
||||
|
@ -28,19 +28,12 @@
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
#include <stdint.h>
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
#include <wolfssl/wolfcrypt/settings.h>
|
||||
#ifdef WOLFSSL_ARMASM_INLINE
|
||||
|
||||
#ifdef WOLFSSL_ARMASM
|
||||
#if !defined(__aarch64__) && defined(__thumb__)
|
||||
|
||||
#ifdef __IAR_SYSTEMS_ICC__
|
||||
#define __asm__ asm
|
||||
#define __volatile__ volatile
|
||||
@ -3587,7 +3580,4 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
|
||||
#endif /* WOLFSSL_SHA512 */
|
||||
#endif /* !__aarch64__ && __thumb__ */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
#endif /* !defined(__aarch64__) && defined(__thumb__) */
|
||||
#endif /* WOLFSSL_ARMASM */
|
||||
|
||||
#endif /* WOLFSSL_ARMASM_INLINE */
|
||||
|
@ -2056,6 +2056,8 @@ static void kyber_cbd_eta3(sword16* p, const byte* r)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
#if defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_KYBER_NO_LARGE_CODE) || \
|
||||
defined(BIG_ENDIAN_ORDER)
|
||||
#ifndef WORD64_AVAILABLE
|
||||
/* Calculate four integer coefficients at a time. */
|
||||
for (i = 0; i < KYBER_N; i += 4) {
|
||||
@ -2129,7 +2131,59 @@ static void kyber_cbd_eta3(sword16* p, const byte* r)
|
||||
/* Move over used bytes. */
|
||||
r += 6;
|
||||
}
|
||||
#endif
|
||||
#endif /* WORD64_AVAILABLE */
|
||||
#else
|
||||
/* Calculate eight integer coefficients at a time. */
|
||||
for (i = 0; i < KYBER_N; i += 16) {
|
||||
const word32* r32 = (const word32*)r;
|
||||
/* Take the next 12 bytes, little endian, as 24 bit values. */
|
||||
word32 t0 = r32[0] & 0xffffff;
|
||||
word32 t1 = ((r32[0] >> 24) | (r32[1] << 8)) & 0xffffff;
|
||||
word32 t2 = ((r32[1] >> 16) | (r32[2] << 16)) & 0xffffff;
|
||||
word32 t3 = r32[2] >> 8 ;
|
||||
word32 d0;
|
||||
word32 d1;
|
||||
word32 d2;
|
||||
word32 d3;
|
||||
|
||||
/* Add second and third bits to first. */
|
||||
d0 = (t0 >> 0) & 0x00249249;
|
||||
d0 += (t0 >> 1) & 0x00249249;
|
||||
d0 += (t0 >> 2) & 0x00249249;
|
||||
d1 = (t1 >> 0) & 0x00249249;
|
||||
d1 += (t1 >> 1) & 0x00249249;
|
||||
d1 += (t1 >> 2) & 0x00249249;
|
||||
d2 = (t2 >> 0) & 0x00249249;
|
||||
d2 += (t2 >> 1) & 0x00249249;
|
||||
d2 += (t2 >> 2) & 0x00249249;
|
||||
d3 = (t3 >> 0) & 0x00249249;
|
||||
d3 += (t3 >> 1) & 0x00249249;
|
||||
d3 += (t3 >> 2) & 0x00249249;
|
||||
/* Values 0, 1, 2 or 3 in consecutive 3 bits.
|
||||
* 0 - 1/8, 1 - 3/8, 2 - 3/8, 3 - 1/8. */
|
||||
|
||||
p[i + 0] = ETA3_SUB(d0, 0);
|
||||
p[i + 1] = ETA3_SUB(d0, 1);
|
||||
p[i + 2] = ETA3_SUB(d0, 2);
|
||||
p[i + 3] = ETA3_SUB(d0, 3);
|
||||
p[i + 4] = ETA3_SUB(d1, 0);
|
||||
p[i + 5] = ETA3_SUB(d1, 1);
|
||||
p[i + 6] = ETA3_SUB(d1, 2);
|
||||
p[i + 7] = ETA3_SUB(d1, 3);
|
||||
p[i + 8] = ETA3_SUB(d2, 0);
|
||||
p[i + 9] = ETA3_SUB(d2, 1);
|
||||
p[i + 10] = ETA3_SUB(d2, 2);
|
||||
p[i + 11] = ETA3_SUB(d2, 3);
|
||||
p[i + 12] = ETA3_SUB(d3, 0);
|
||||
p[i + 13] = ETA3_SUB(d3, 1);
|
||||
p[i + 14] = ETA3_SUB(d3, 2);
|
||||
p[i + 15] = ETA3_SUB(d3, 3);
|
||||
/* -3-1/64, -2-6/64, -1-15/64, 0-20/64, 1-15/64, 2-6/64, 3-1/64 */
|
||||
|
||||
/* Move over used bytes. */
|
||||
r += 12;
|
||||
}
|
||||
#endif /* WOLFSSL_SMALL_STACK || WOLFSSL_KYBER_NO_LARGE_CODE || BIG_ENDIAN_ORDER */
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2677,6 +2731,8 @@ static void kyber_vec_compress_10_c(byte* r, sword16* v, unsigned int kp)
|
||||
|
||||
/* Each polynomial. */
|
||||
for (i = 0; i < kp; i++) {
|
||||
#if defined(WOLFSSL_SMALL_STACK) || defined(WOLFSSL_KYBER_NO_LARGE_CODE) || \
|
||||
defined(BIG_ENDIAN_ORDER)
|
||||
/* Each 4 polynomial coefficients. */
|
||||
for (j = 0; j < KYBER_N; j += 4) {
|
||||
#ifdef WOLFSSL_KYBER_SMALL
|
||||
@ -2710,6 +2766,44 @@ static void kyber_vec_compress_10_c(byte* r, sword16* v, unsigned int kp)
|
||||
/* Move over set bytes. */
|
||||
r += 5;
|
||||
}
|
||||
#else
|
||||
/* Each 16 polynomial coefficients. */
|
||||
for (j = 0; j < KYBER_N; j += 16) {
|
||||
/* Compress four polynomial values to 10 bits each. */
|
||||
sword16 t0 = TO_COMP_WORD_10(v, i, j, 0);
|
||||
sword16 t1 = TO_COMP_WORD_10(v, i, j, 1);
|
||||
sword16 t2 = TO_COMP_WORD_10(v, i, j, 2);
|
||||
sword16 t3 = TO_COMP_WORD_10(v, i, j, 3);
|
||||
sword16 t4 = TO_COMP_WORD_10(v, i, j, 4);
|
||||
sword16 t5 = TO_COMP_WORD_10(v, i, j, 5);
|
||||
sword16 t6 = TO_COMP_WORD_10(v, i, j, 6);
|
||||
sword16 t7 = TO_COMP_WORD_10(v, i, j, 7);
|
||||
sword16 t8 = TO_COMP_WORD_10(v, i, j, 8);
|
||||
sword16 t9 = TO_COMP_WORD_10(v, i, j, 9);
|
||||
sword16 t10 = TO_COMP_WORD_10(v, i, j, 10);
|
||||
sword16 t11 = TO_COMP_WORD_10(v, i, j, 11);
|
||||
sword16 t12 = TO_COMP_WORD_10(v, i, j, 12);
|
||||
sword16 t13 = TO_COMP_WORD_10(v, i, j, 13);
|
||||
sword16 t14 = TO_COMP_WORD_10(v, i, j, 14);
|
||||
sword16 t15 = TO_COMP_WORD_10(v, i, j, 15);
|
||||
|
||||
word32* r32 = (word32*)r;
|
||||
/* Pack sixteen 10-bit values into byte array. */
|
||||
r32[0] = t0 | ((word32)t1 << 10) | ((word32)t2 << 20) |
|
||||
((word32)t3 << 30);
|
||||
r32[1] = (t3 >> 2) | ((word32)t4 << 8) | ((word32)t5 << 18) |
|
||||
((word32)t6 << 28);
|
||||
r32[2] = (t6 >> 4) | ((word32)t7 << 6) | ((word32)t8 << 16) |
|
||||
((word32)t9 << 26);
|
||||
r32[3] = (t9 >> 6) | ((word32)t10 << 4) | ((word32)t11 << 14) |
|
||||
((word32)t12 << 24);
|
||||
r32[4] = (t12 >> 8) | ((word32)t13 << 2) | ((word32)t14 << 12) |
|
||||
((word32)t15 << 22);
|
||||
|
||||
/* Move over set bytes. */
|
||||
r += 20;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user