Merge pull request #7833 from SparkiDev/riscv-sha512-asm

RISC-V 64: Add assembly code for SHA-512
This commit is contained in:
David Garske
2024-08-06 10:39:10 -07:00
committed by GitHub
5 changed files with 1743 additions and 15 deletions

View File

@ -234,6 +234,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha256
endif BUILD_RISCV_ASM
if BUILD_SHA512
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha512.c
else
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE
@ -262,6 +265,7 @@ endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif !BUILD_RISCV_ASM
endif BUILD_SHA512
if BUILD_SHA3
@ -393,6 +397,9 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha256
endif BUILD_RISCV_ASM
if BUILD_SHA512
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha512.c
else
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE
@ -419,6 +426,7 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha512_asm.S
endif BUILD_INTELASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif !BUILD_RISCV_ASM
endif BUILD_SHA512
if BUILD_SHA3
@ -738,6 +746,9 @@ endif !BUILD_FIPS_CURRENT
if !BUILD_FIPS_CURRENT
if BUILD_SHA512
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-sha512.c
else
if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE
@ -766,6 +777,7 @@ endif BUILD_INTELASM
endif !BUILD_X86_ASM
endif !BUILD_ARMASM
endif !BUILD_ARMASM_NEON
endif !BUILD_RISCV_ASM
endif BUILD_SHA512
endif !BUILD_FIPS_CURRENT

View File

@ -600,13 +600,6 @@ static WC_INLINE void Sha256Transform(wc_Sha256* sha256, const byte* data,
(0b010 << 12) | (0b1110111 << 0) | \
(vd << 7) | (vs1 << 15) | (vs2 << 20))
#ifndef WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION
/* Indecies to use with gather vector instruction to reverse bytes. */
static const word32 rev_idx[4] = {
0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
};
#endif /* !WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION */
#define RND4(w0, w1, w2, w3, k) \
/* Four rounds of compression. */ \
VADD_VV(REG_V7, w0, k) \
@ -690,9 +683,6 @@ static void Sha256Transform(wc_Sha256* sha256, const byte* data,
: [blocks] "+r" (blocks), [data] "+r" (data), [k] "+r" (k)
: [digest] "r" (sha256->digest)
#ifndef WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION
, [rev_idx] "r" (rev_idx)
#endif
: "cc", "memory", "t0", "t1"
);
}
@ -884,10 +874,6 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
#endif
:
: [digest] "r" (sha256->digest), [hash] "r" (hash)
#if defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM) && \
!defined(WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION)
, [rev_idx] "r" (rev_idx)
#endif
: "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
"a4", "a5", "a6", "a7"
);

File diff suppressed because it is too large Load Diff

View File

@ -28,7 +28,7 @@
#if (defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384)) && \
(!defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)) && \
!defined(WOLFSSL_PSOC6_CRYPTO)
!defined(WOLFSSL_PSOC6_CRYPTO) && !defined(WOLFSSL_RISCV_ASM)
/* determine if we are using Espressif SHA hardware acceleration */
#undef WOLFSSL_USE_ESP32_CRYPT_HASH_HW

View File

@ -165,6 +165,12 @@
(0 << 28) | ((cnt - 1) << 29) | (vd << 7) | (rs1 << 15))
/* Load 1 Vector register with 64-bit components. */
#define VL1RE64_V(vd, rs1) VLRE_V(vd, rs1, 1, WIDTH_64)
/* Load 2 Vector register with 64-bit components. */
#define VL2RE64_V(vd, rs1) VLRE_V(vd, rs1, 2, WIDTH_64)
/* Load 4 Vector register with 64-bit components. */
#define VL4RE64_V(vd, rs1) VLRE_V(vd, rs1, 4, WIDTH_64)
/* Load 8 Vector register with 64-bit components. */
#define VL8RE64_V(vd, rs1) VLRE_V(vd, rs1, 8, WIDTH_64)
/* Load 1 Vector register with 32-bit components. */
#define VL1RE32_V(vd, rs1) VLRE_V(vd, rs1, 1, WIDTH_32)
/* Load 2 Vector register with 32-bit components. */