Merge pull request #7818 from SparkiDev/riscv-chacha-asm

RISC-V ChaCha20: assembly implementations
This commit is contained in:
David Garske
2024-08-02 07:52:53 -07:00
committed by GitHub
9 changed files with 2465 additions and 53 deletions

View File

@ -3077,10 +3077,14 @@ do
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_CARRYLESS" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_CARRYLESS"
;; ;;
zkn|zkned) zkn|zkned)
# AES encrypt/decrpyt # AES encrypt/decrpyt, SHA-2
ENABLED_RISCV_ASM=yes ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM"
;; ;;
zv)
ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR"
;;
zvkg) zvkg)
# VGMUL, VHHSH # VGMUL, VHHSH
ENABLED_RISCV_ASM=yes ENABLED_RISCV_ASM=yes
@ -3097,12 +3101,12 @@ do
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION"
;; ;;
zvkned) zvkned)
# Vector AES # Vector AES, SHA-2
ENABLED_RISCV_ASM=yes ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_CRYPTO_ASM" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_VECTOR_CRYPTO_ASM"
;; ;;
*) *)
AC_MSG_ERROR([Invalid RISC-V option [yes,zbkb,zbb,zbc,zbkc,zkn,zkned,zvkg,zvbc,zvbb,zvkb,zvkned]: $ENABLED_RISCV_ASM.]) AC_MSG_ERROR([Invalid RISC-V option [yes,zbkb,zbb,zbc,zbkc,zkn,zkned,zv,zvkg,zvbc,zvbb,zvkb,zvkned]: $ENABLED_RISCV_ASM.])
break break
;; ;;
esac esac

View File

@ -971,17 +971,21 @@ if BUILD_CHACHA
if BUILD_ARMASM_NEON if BUILD_ARMASM_NEON
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-chacha.c
else else
if BUILD_RISCV_ASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/riscv/riscv-64-chacha.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha.c
endif !BUILD_RISCV_ASM
if !BUILD_X86_ASM if !BUILD_X86_ASM
if BUILD_INTELASM if BUILD_INTELASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha_asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha_asm.S
endif endif BUILD_INTELASM
endif endif !BUILD_X86_ASM
endif endif !BUILD_ARMASM_NEON
if BUILD_POLY1305 if BUILD_POLY1305
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha20_poly1305.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/chacha20_poly1305.c
endif endif BUILD_POLY1305
endif endif BUILD_CHACHA
if !BUILD_INLINE if !BUILD_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/misc.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/misc.c

View File

@ -38,6 +38,9 @@ Public domain.
#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON) #if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON)
/* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */ /* implementation is located in wolfcrypt/src/port/arm/armv8-chacha.c */
#elif defined(WOLFSSL_RISCV_ASM)
/* implementation located in wolfcrypt/src/port/rsicv/riscv-64-chacha.c */
#else #else
#if defined(HAVE_CHACHA) #if defined(HAVE_CHACHA)

View File

@ -75,18 +75,6 @@ static WC_INLINE void memcpy16(byte* out, const byte* in)
#endif #endif
/* vd = vs2 << uimm */
#define VSLL_VI(vd, vs2, uimm) \
ASM_WORD((0b100101 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* vd = vs2 >> uimm */
#define VSRL_VI(vd, vs2, uimm) \
ASM_WORD((0b101000 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* Vector register set if equal: vd[i] = vs1[i] == vs2[i] ? 1 : 0 */ /* Vector register set if equal: vd[i] = vs1[i] == vs2[i] ? 1 : 0 */
#define VMSEQ_VV(vd, vs1, vs2) \ #define VMSEQ_VV(vd, vs1, vs2) \
ASM_WORD((0b011000 << 26) | (0b1 << 25) | \ ASM_WORD((0b011000 << 26) | (0b1 << 25) | \

File diff suppressed because it is too large Load Diff

View File

@ -846,41 +846,41 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
#elif defined(WOLFSSL_RISCV_BASE_BIT_MANIPULATION) #elif defined(WOLFSSL_RISCV_BASE_BIT_MANIPULATION)
"ld t1, 0(%[digest])\n\t" "ld t1, 0(%[digest])\n\t"
"ld t3, 8(%[digest])\n\t" "ld t3, 8(%[digest])\n\t"
"ld s1, 16(%[digest])\n\t" "ld a5, 16(%[digest])\n\t"
"ld s3, 24(%[digest])\n\t" "ld a7, 24(%[digest])\n\t"
REV8(REG_T1, REG_T1) REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3) REV8(REG_T3, REG_T3)
REV8(REG_S1, REG_S1) REV8(REG_A5, REG_A5)
REV8(REG_S3, REG_S3) REV8(REG_A7, REG_A7)
"srli t0, t1, 32\n\t" "srli t0, t1, 32\n\t"
"srli t2, t3, 32\n\t" "srli t2, t3, 32\n\t"
"srli s0, s1, 32\n\t" "srli a4, a5, 32\n\t"
"srli s2, s3, 32\n\t" "srli a6, a7, 32\n\t"
"sw t0, 0(%[hash])\n\t" "sw t0, 0(%[hash])\n\t"
"sw t1, 4(%[hash])\n\t" "sw t1, 4(%[hash])\n\t"
"sw t2, 8(%[hash])\n\t" "sw t2, 8(%[hash])\n\t"
"sw t3, 12(%[hash])\n\t" "sw t3, 12(%[hash])\n\t"
"sw s0, 16(%[hash])\n\t" "sw a4, 16(%[hash])\n\t"
"sw s1, 20(%[hash])\n\t" "sw a5, 20(%[hash])\n\t"
"sw s2, 24(%[hash])\n\t" "sw a6, 24(%[hash])\n\t"
"sw s3, 28(%[hash])\n\t" "sw a7, 28(%[hash])\n\t"
#else #else
LOAD_WORD_REV(t0, 0, %[digest], t2, t3, t4) LOAD_WORD_REV(t0, 0, %[digest], t2, t3, t4)
LOAD_WORD_REV(t1, 4, %[digest], t2, t3, t4) LOAD_WORD_REV(t1, 4, %[digest], t2, t3, t4)
LOAD_WORD_REV(s0, 8, %[digest], t2, t3, t4) LOAD_WORD_REV(a4, 8, %[digest], t2, t3, t4)
LOAD_WORD_REV(s1, 12, %[digest], t2, t3, t4) LOAD_WORD_REV(a5, 12, %[digest], t2, t3, t4)
"sw t0, 0(%[hash])\n\t" "sw t0, 0(%[hash])\n\t"
"sw t1, 4(%[hash])\n\t" "sw t1, 4(%[hash])\n\t"
"sw s0, 8(%[hash])\n\t" "sw a4, 8(%[hash])\n\t"
"sw s1, 12(%[hash])\n\t" "sw a5, 12(%[hash])\n\t"
LOAD_WORD_REV(t0, 16, %[digest], t2, t3, t4) LOAD_WORD_REV(t0, 16, %[digest], t2, t3, t4)
LOAD_WORD_REV(t1, 20, %[digest], t2, t3, t4) LOAD_WORD_REV(t1, 20, %[digest], t2, t3, t4)
LOAD_WORD_REV(s0, 24, %[digest], t2, t3, t4) LOAD_WORD_REV(a4, 24, %[digest], t2, t3, t4)
LOAD_WORD_REV(s1, 28, %[digest], t2, t3, t4) LOAD_WORD_REV(a5, 28, %[digest], t2, t3, t4)
"sw t0, 16(%[hash])\n\t" "sw t0, 16(%[hash])\n\t"
"sw t1, 20(%[hash])\n\t" "sw t1, 20(%[hash])\n\t"
"sw s0, 24(%[hash])\n\t" "sw a4, 24(%[hash])\n\t"
"sw s1, 28(%[hash])\n\t" "sw a5, 28(%[hash])\n\t"
#endif #endif
: :
: [digest] "r" (sha256->digest), [hash] "r" (hash) : [digest] "r" (sha256->digest), [hash] "r" (hash)
@ -889,7 +889,7 @@ static WC_INLINE void Sha256Final(wc_Sha256* sha256, byte* hash)
, [rev_idx] "r" (rev_idx) , [rev_idx] "r" (rev_idx)
#endif #endif
: "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6", : "cc", "memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6",
"s0", "s1", "s2", "s3" "a4", "a5", "a6", "a7"
); );
} }

View File

@ -7789,10 +7789,10 @@ WOLFSSL_TEST_SUBROUTINE wc_test_ret_t chacha_test(void)
return WC_TEST_RET_ENC_EC(ret); return WC_TEST_RET_ENC_EC(ret);
if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE)) if (XMEMCMP(plain_big, input_big, CHACHA_BIG_TEST_SIZE))
return WC_TEST_RET_ENC_NC; return WC_TEST_RET_ENC_I(i);
if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE)) if (XMEMCMP(cipher_big, cipher_big_result, CHACHA_BIG_TEST_SIZE))
return WC_TEST_RET_ENC_NC; return WC_TEST_RET_ENC_I(i);
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_NO_MALLOC)

View File

@ -82,7 +82,8 @@ typedef struct ChaCha {
byte extra[12]; byte extra[12];
#endif #endif
word32 left; /* number of bytes leftover */ word32 left; /* number of bytes leftover */
#if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM) #if defined(USE_INTEL_CHACHA_SPEEDUP) || defined(WOLFSSL_ARMASM) || \
defined(WOLFSSL_RISCV_ASM)
word32 over[CHACHA_CHUNK_WORDS]; word32 over[CHACHA_CHUNK_WORDS];
#endif #endif
} ChaCha; } ChaCha;

View File

@ -137,6 +137,12 @@
(0b0010011 << 0) | \ (0b0010011 << 0) | \
(rs << 15) | (rd << 7)) (rs << 15) | (rd << 7))
#define RORIW(rd, rs, imm) \
ASM_WORD((0b0110000 << 25) | (0b101 << 12) | \
(0b0011011 << 0) | \
(imm << 20) | (rs << 15) | (rd << 7))
/* rd = rs1[0..31] | rs2[0..31]. */ /* rd = rs1[0..31] | rs2[0..31]. */
#define PACK(rd, rs1, rs2) \ #define PACK(rd, rs1, rs2) \
ASM_WORD((0b0000100 << 25) | (0b100 << 12) | 0b0110011 | \ ASM_WORD((0b0000100 << 25) | (0b100 << 12) | 0b0110011 | \
@ -184,16 +190,36 @@
/* Move from vector register to vector registor. */ /* Move from vector register to vector registor. */
#define VMV_V_V(vd, vs1) \ #define VMV_V_V(vd, vs1) \
ASM_WORD((0b1010111 << 0) | (0b000 << 12) | (0b1 << 25) | \ ASM_WORD((0b1010111 << 0) | (0b000 << 12) | (0b1 << 25) | \
(0b010111 << 26) | (vd << 7) | (vs1 << 15)) (0b010111 << 26) | ((vd) << 7) | ((vs1) << 15))
/* Splat register to each component of the vector registor. */ /* Splat register to each component of the vector registor. */
#define VMV_V_X(vd, rs1) \ #define VMV_V_X(vd, rs1) \
ASM_WORD((0b1010111 << 0) | (0b100 << 12) | (0b1 << 25) | \ ASM_WORD((0b1010111 << 0) | (0b100 << 12) | (0b1 << 25) | \
(0b010111 << 26) | (vd << 7) | (rs1 << 15)) (0b010111 << 26) | ((vd) << 7) | ((rs1) << 15))
/* Splat immediate to each component of the vector registor. */
#define VMV_V_I(vd, imm) \
ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \
(0b010111 << 26) | ((vd) << 7) | ((imm) << 15))
/* Move n vector registers to vector registers. */ /* Move n vector registers to vector registers. */
#define VMVR_V(vd, vs2, n) \ #define VMVR_V(vd, vs2, n) \
ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \ ASM_WORD((0b1010111 << 0) | (0b011 << 12) | (0b1 << 25) | \
(0b100111 << 26) | (vd << 7) | ((n-1) << 15) | \ (0b100111 << 26) | ((vd) << 7) | ((n-1) << 15) | \
(vs2 << 20)) ((vs2) << 20))
/*
* Logic
*/
/* vd = vs2 << uimm */
#define VSLL_VI(vd, vs2, uimm) \
ASM_WORD((0b100101 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* vd = vs2 >> uimm */
#define VSRL_VI(vd, vs2, uimm) \
ASM_WORD((0b101000 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (uimm << 15) | (vs2 << 20))
/* /*
@ -235,13 +261,13 @@
#define VMV_X_S(rd, vs2) \ #define VMV_X_S(rd, vs2) \
ASM_WORD((0b010000 << 26) | (0b1 << 25) | \ ASM_WORD((0b010000 << 26) | (0b1 << 25) | \
(0b010 << 12) | (0b1010111 << 0) | \ (0b010 << 12) | (0b1010111 << 0) | \
(rd << 7) | (vs2 << 20)) ((rd) << 7) | ((vs2) << 20))
/* vd[0] = x[rs1] */ /* vd[0] = x[rs1] */
#define VMV_S_X(vd, rs1) \ #define VMV_S_X(vd, rs1) \
ASM_WORD((0b010000 << 26) | (0b1 << 25) | \ ASM_WORD((0b010000 << 26) | (0b1 << 25) | \
(0b110 << 12) | (0b1010111 << 0) | \ (0b110 << 12) | (0b1010111 << 0) | \
(vd << 7) | (rs1 << 15)) ((vd) << 7) | ((rs1) << 15))
/* vd[shift..max] = vs2[0..max-shift] /* vd[shift..max] = vs2[0..max-shift]
* Sliding up doesn't change bottom part of destination. * Sliding up doesn't change bottom part of destination.
@ -249,7 +275,7 @@
#define VSLIDEUP_VI(vd, vs2, shift) \ #define VSLIDEUP_VI(vd, vs2, shift) \
ASM_WORD((0b001110 << 26) | (0b1 << 25) | \ ASM_WORD((0b001110 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \ (0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (shift << 15) | (vs2 << 20)) ((vd) << 7) | ((shift) << 15) | ((vs2) << 20))
/* vd[0..max-shift] = vs2[shift..max] /* vd[0..max-shift] = vs2[shift..max]
* Sliding down change top part of destination. * Sliding down change top part of destination.
@ -257,13 +283,18 @@
#define VSLIDEDOWN_VI(vd, vs2, shift) \ #define VSLIDEDOWN_VI(vd, vs2, shift) \
ASM_WORD((0b001111 << 26) | (0b1 << 25) | \ ASM_WORD((0b001111 << 26) | (0b1 << 25) | \
(0b011 << 12) | (0b1010111 << 0) | \ (0b011 << 12) | (0b1010111 << 0) | \
(vd << 7) | (shift << 15) | (vs2 << 20)) ((vd) << 7) | ((shift) << 15) | ((vs2) << 20))
/* vd[i] = vs1[vs2[i]] */ /* vd[i] = vs1[vs2[i]] */
#define VRGATHER_VV(vd, vs1, vs2) \ #define VRGATHER_VV(vd, vs1, vs2) \
ASM_WORD((0b001100 << 26) | (0b1 << 25) | \ ASM_WORD((0b001100 << 26) | (0b1 << 25) | \
(0b000 << 12) | (0b1010111 << 0) | \ (0b000 << 12) | (0b1010111 << 0) | \
(vd << 7) | (vs1 << 15) | (vs2 << 20)) ((vd) << 7) | ((vs1) << 15) | ((vs2) << 20))
#define VID_V(vd) \
ASM_WORD((0b010100 << 26) | (0b1 << 25) | (0b00000 << 20) | \
(0b10001 << 15) | (0b010 << 12) | \
(0b1010111 << 0) | ((vd) << 7))
/* /*
@ -281,15 +312,22 @@
defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM) defined(WOLFSSL_RISCV_VECTOR_CRYPTO_ASM)
/* /*
* Bit Manipulation * Vector Bit Manipulation
*/ */
/* Reverse order of bytes in words of vector regsiter. */ /* Reverse order of bytes in words of vector regsiter. */
#define VREV8(vd, vs2) \ #define VREV8(vd, vs2) \
ASM_WORD((0b010010 << 26) | (0b1 << 25) | (0b01001<< 15) | \ ASM_WORD((0b010010 << 26) | (0b1 << 25) | (0b01001<< 15) | \
(0b010 << 12) | (0b1010111 << 0) | \ (0b010 << 12) | (0b1010111 << 0) | \
(vs2 << 20) | (vd << 7)) (vs2 << 20) | (vd << 7))
/* Reverse order of bytes in words of vector regsiter. */
#define VROR_VI(vd, imm, vs2) \
ASM_WORD((0b01010 << 27) | (0b1 << 25) | (0b011 << 12) | \
(0b1010111 << 0) | ((imm >> 5) << 26) | \
(vs2 << 20) | ((imm & 0x1f) << 15) | (vd << 7))
#endif /* WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION || #endif /* WOLFSSL_RISCV_VECTOR_BASE_BIT_MANIPULATION ||
* WOLFSSL_RISCV_VECTOR_CRYPTO_ASM */ * WOLFSSL_RISCV_VECTOR_CRYPTO_ASM */