SP ASM: improve performance

Thumb2/ARM32: use umaal instruction to speed up mulitplication, squaring
and reduction. umaal not always available so use umlal instead.
Implementations for architectures less than 4 still kept - no umull.
Cleanup point arithmetic to not take shortcuts.
Make masking for constant time faster.
Add alternate asm for different compilers.
Get ARMv4 and ARMv6 compiling.
Remove whitespace at end of lines.
This commit is contained in:
Sean Parkinson
2023-09-11 22:21:16 +10:00
parent f9c2a86456
commit 114c8cc681
30 changed files with 88467 additions and 49343 deletions

View File

@ -2607,6 +2607,22 @@ then
ENABLED_ARMASM_NEON=no
AC_MSG_NOTICE([32bit ARMv7-m found])
;;
armv6*)
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv6 -fomit-frame-pointer -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=6"
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
ENABLED_ARMASM_CRYPTO=no
ENABLED_AESGCM_STREAM=no # not yet implemented
ENABLED_ARMASM_NEON=no
AC_MSG_NOTICE([32bit ARMv6 found])
;;
armv4*)
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv4 -fomit-frame-pointer -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=4"
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
ENABLED_ARMASM_CRYPTO=no
ENABLED_AESGCM_STREAM=no # not yet implemented
ENABLED_ARMASM_NEON=no
AC_MSG_NOTICE([32bit ARMv4 found])
;;
*)
AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm"
# Include options.h
@ -7500,21 +7516,47 @@ if test "$ENABLED_SP_ASM" = "yes" && test "$ENABLED_SP" = "yes"; then
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM64_ASM"
ENABLED_SP_ARM64_ASM=yes
;;
*armv7a*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv7-a -mfpu=neon -DWOLFSSL_ARM_ARCH=7 -marm"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
;;
*cortex* | *armv7m*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv7-r -D__thumb__ -DWOLFSSL_ARM_ARCH=7"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM"
ENABLED_SP_ARM_CORTEX_ASM=yes
;;
*armv6*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv6 -DWOLFSSL_ARM_ARCH=6"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
;;
*armv4*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv4 -DWOLFSSL_ARM_ARCH=4"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
;;
*arm*)
if test "$host_alias" = "thumb" || test "$ARM_TARGET" = "thumb"; then
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM"
ENABLED_SP_ARM_THUMB_ASM=yes
else
if test "$host_alias" = "cortex" || test "$ARM_TARGET" = "cortex"; then
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM"
ENABLED_SP_ARM_CORTEX_ASM=yes
else
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
fi
;;
*x86_64* | *amd64*)

View File

@ -171,8 +171,10 @@ endif !BUILD_ARMASM_CRYPTO
else
if BUILD_ARMASM
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM
@ -203,8 +205,10 @@ else
if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha256.c
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S
endif !BUILD_ARMASM_INLINE
else
@ -229,8 +233,10 @@ else
if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm.S
endif !BUILD_ARMASM_INLINE
else
@ -326,8 +332,10 @@ else
if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha256.c
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S
endif !BUILD_ARMASM_INLINE
else
@ -427,8 +435,10 @@ endif !BUILD_ARMASM_CRYPTO
else
if BUILD_ARMASM
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM
@ -472,8 +482,10 @@ else
if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm.S
endif !BUILD_ARMASM_INLINE
else
@ -713,9 +725,11 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.
endif !BUILD_ARMASM_INLINE
else
if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-curve25519_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-curve25519_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519_c.c
else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-curve25519.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-curve25519.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.S
endif !BUILD_ARMASM_INLINE

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -112,49 +112,49 @@ Transform_Sha256_Len:
sub sp, sp, #0xc0
adr r3, L_SHA256_transform_len_k
# Copy digest to add in at end
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0]
ldr r5, [r0, #4]
#else
ldrd r4, r5, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #8]
ldr r7, [r0, #12]
#else
ldrd r6, r7, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [r0, #16]
ldr r9, [r0, #20]
#else
ldrd r8, r9, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r10, [r0, #24]
ldr r11, [r0, #28]
#else
ldrd r10, r11, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #64]
str r5, [sp, #68]
#else
strd r4, r5, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #72]
str r7, [sp, #76]
#else
strd r6, r7, [sp, #72]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [sp, #80]
str r9, [sp, #84]
#else
strd r8, r9, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r10, [sp, #88]
str r11, [sp, #92]
#else
@ -163,6 +163,136 @@ Transform_Sha256_Len:
# Start of loop processing a block
L_SHA256_transform_len_begin:
# Load, Reverse and Store W - 64 bytes
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp]
str r5, [sp, #4]
#else
strd r4, r5, [sp]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #8]
str r7, [sp, #12]
#else
strd r6, r7, [sp, #8]
#endif
ldr r4, [r1, #16]
ldr r5, [r1, #20]
ldr r6, [r1, #24]
ldr r7, [r1, #28]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #16]
str r5, [sp, #20]
#else
strd r4, r5, [sp, #16]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #24]
str r7, [sp, #28]
#else
strd r6, r7, [sp, #24]
#endif
ldr r4, [r1, #32]
ldr r5, [r1, #36]
ldr r6, [r1, #40]
ldr r7, [r1, #44]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #32]
str r5, [sp, #36]
#else
strd r4, r5, [sp, #32]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #40]
str r7, [sp, #44]
#else
strd r6, r7, [sp, #40]
#endif
ldr r4, [r1, #48]
ldr r5, [r1, #52]
ldr r6, [r1, #56]
ldr r7, [r1, #60]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #48]
str r5, [sp, #52]
#else
strd r4, r5, [sp, #48]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #56]
str r7, [sp, #60]
#else
strd r6, r7, [sp, #56]
#endif
#else
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r6, [r1, #8]
@ -179,25 +309,25 @@ L_SHA256_transform_len_begin:
rev r9, r9
rev r10, r10
rev r11, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp]
str r5, [sp, #4]
#else
strd r4, r5, [sp]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #8]
str r7, [sp, #12]
#else
strd r6, r7, [sp, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [sp, #16]
str r9, [sp, #20]
#else
strd r8, r9, [sp, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r10, [sp, #24]
str r11, [sp, #28]
#else
@ -219,30 +349,31 @@ L_SHA256_transform_len_begin:
rev r9, r9
rev r10, r10
rev r11, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #32]
str r5, [sp, #36]
#else
strd r4, r5, [sp, #32]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #40]
str r7, [sp, #44]
#else
strd r6, r7, [sp, #40]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [sp, #48]
str r9, [sp, #52]
#else
strd r8, r9, [sp, #48]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r10, [sp, #56]
str r11, [sp, #60]
#else
strd r10, r11, [sp, #56]
#endif
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
ldr r11, [r0, #4]
ldr r4, [r0, #8]
eor r11, r11, r4
@ -1517,25 +1648,25 @@ L_SHA256_transform_len_start:
str r8, [r0, #16]
str r9, [r0]
# Add in digest from start
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0]
ldr r5, [r0, #4]
#else
ldrd r4, r5, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #8]
ldr r7, [r0, #12]
#else
ldrd r6, r7, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [sp, #64]
ldr r9, [sp, #68]
#else
ldrd r8, r9, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r10, [sp, #72]
ldr r11, [sp, #76]
#else
@ -1545,49 +1676,49 @@ L_SHA256_transform_len_start:
add r5, r5, r9
add r6, r6, r10
add r7, r7, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [r0]
str r5, [r0, #4]
#else
strd r4, r5, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [r0, #8]
str r7, [r0, #12]
#else
strd r6, r7, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #64]
str r5, [sp, #68]
#else
strd r4, r5, [sp, #64]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #72]
str r7, [sp, #76]
#else
strd r6, r7, [sp, #72]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0, #16]
ldr r5, [r0, #20]
#else
ldrd r4, r5, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #24]
ldr r7, [r0, #28]
#else
ldrd r6, r7, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [sp, #80]
ldr r9, [sp, #84]
#else
ldrd r8, r9, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r10, [sp, #88]
ldr r11, [sp, #92]
#else
@ -1597,25 +1728,25 @@ L_SHA256_transform_len_start:
add r5, r5, r9
add r6, r6, r10
add r7, r7, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [r0, #16]
str r5, [r0, #20]
#else
strd r4, r5, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [r0, #24]
str r7, [r0, #28]
#else
strd r6, r7, [r0, #24]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #80]
str r5, [sp, #84]
#else
strd r4, r5, [sp, #80]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #88]
str r7, [sp, #92]
#else
@ -1708,7 +1839,7 @@ Transform_Sha256_Len:
push {r4, r5, r6, r7, r8, r9, r10, lr}
vpush {d8-d11}
sub sp, sp, #24
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r0, [sp]
str r1, [sp, #4]
#else
@ -1717,25 +1848,25 @@ Transform_Sha256_Len:
str r2, [sp, #8]
adr r12, L_SHA256_transform_neon_len_k
# Load digest into registers
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r2, [r0]
ldr r3, [r0, #4]
#else
ldrd r2, r3, [r0]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0, #8]
ldr r5, [r0, #12]
#else
ldrd r4, r5, [r0, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #16]
ldr r7, [r0, #20]
#else
ldrd r6, r7, [r0, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [r0, #24]
ldr r9, [r0, #28]
#else
@ -2666,7 +2797,7 @@ L_SHA256_transform_neon_len_start:
add r2, r2, r1
ldr r10, [sp]
# Add in digest from start
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10]
ldr r1, [r10, #4]
#else
@ -2674,13 +2805,13 @@ L_SHA256_transform_neon_len_start:
#endif
add r2, r2, r0
add r3, r3, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r2, [r10]
str r3, [r10, #4]
#else
strd r2, r3, [r10]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10, #8]
ldr r1, [r10, #12]
#else
@ -2688,13 +2819,13 @@ L_SHA256_transform_neon_len_start:
#endif
add r4, r4, r0
add r5, r5, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [r10, #8]
str r5, [r10, #12]
#else
strd r4, r5, [r10, #8]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10, #16]
ldr r1, [r10, #20]
#else
@ -2702,13 +2833,13 @@ L_SHA256_transform_neon_len_start:
#endif
add r6, r6, r0
add r7, r7, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [r10, #16]
str r7, [r10, #20]
#else
strd r6, r7, [r10, #16]
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10, #24]
ldr r1, [r10, #28]
#else
@ -2716,7 +2847,7 @@ L_SHA256_transform_neon_len_start:
#endif
add r8, r8, r0
add r9, r9, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [r10, #24]
str r9, [r10, #28]
#else

View File

@ -39,6 +39,18 @@
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h>
@ -73,49 +85,49 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
__asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t"
/* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256]]\n\t"
"ldr r5, [%[sha256], #4]\n\t"
#else
"ldrd r4, r5, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #8]\n\t"
"ldr r7, [%[sha256], #12]\n\t"
#else
"ldrd r6, r7, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [%[sha256], #16]\n\t"
"ldr r9, [%[sha256], #20]\n\t"
#else
"ldrd r8, r9, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r10, [%[sha256], #24]\n\t"
"ldr r11, [%[sha256], #28]\n\t"
#else
"ldrd r10, r11, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #64]\n\t"
"str r5, [sp, #68]\n\t"
#else
"strd r4, r5, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #72]\n\t"
"str r7, [sp, #76]\n\t"
#else
"strd r6, r7, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [sp, #80]\n\t"
"str r9, [sp, #84]\n\t"
#else
"strd r8, r9, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r10, [sp, #88]\n\t"
"str r11, [sp, #92]\n\t"
#else
@ -125,6 +137,136 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"\n"
"L_SHA256_transform_len_begin_%=: \n\t"
/* Load, Reverse and Store W - 64 bytes */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"ldr r4, [%[data]]\n\t"
"ldr r5, [%[data], #4]\n\t"
"ldr r6, [%[data], #8]\n\t"
"ldr r7, [%[data], #12]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp]\n\t"
"str r5, [sp, #4]\n\t"
#else
"strd r4, r5, [sp]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #8]\n\t"
"str r7, [sp, #12]\n\t"
#else
"strd r6, r7, [sp, #8]\n\t"
#endif
"ldr r4, [%[data], #16]\n\t"
"ldr r5, [%[data], #20]\n\t"
"ldr r6, [%[data], #24]\n\t"
"ldr r7, [%[data], #28]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #16]\n\t"
"str r5, [sp, #20]\n\t"
#else
"strd r4, r5, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #24]\n\t"
"str r7, [sp, #28]\n\t"
#else
"strd r6, r7, [sp, #24]\n\t"
#endif
"ldr r4, [%[data], #32]\n\t"
"ldr r5, [%[data], #36]\n\t"
"ldr r6, [%[data], #40]\n\t"
"ldr r7, [%[data], #44]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #32]\n\t"
"str r5, [sp, #36]\n\t"
#else
"strd r4, r5, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #40]\n\t"
"str r7, [sp, #44]\n\t"
#else
"strd r6, r7, [sp, #40]\n\t"
#endif
"ldr r4, [%[data], #48]\n\t"
"ldr r5, [%[data], #52]\n\t"
"ldr r6, [%[data], #56]\n\t"
"ldr r7, [%[data], #60]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #48]\n\t"
"str r5, [sp, #52]\n\t"
#else
"strd r4, r5, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #56]\n\t"
"str r7, [sp, #60]\n\t"
#else
"strd r6, r7, [sp, #56]\n\t"
#endif
#else
"ldr r4, [%[data]]\n\t"
"ldr r5, [%[data], #4]\n\t"
"ldr r6, [%[data], #8]\n\t"
@ -141,25 +283,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r9, r9\n\t"
"rev r10, r10\n\t"
"rev r11, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp]\n\t"
"str r5, [sp, #4]\n\t"
#else
"strd r4, r5, [sp]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #8]\n\t"
"str r7, [sp, #12]\n\t"
#else
"strd r6, r7, [sp, #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [sp, #16]\n\t"
"str r9, [sp, #20]\n\t"
#else
"strd r8, r9, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r10, [sp, #24]\n\t"
"str r11, [sp, #28]\n\t"
#else
@ -181,30 +323,31 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r9, r9\n\t"
"rev r10, r10\n\t"
"rev r11, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #32]\n\t"
"str r5, [sp, #36]\n\t"
#else
"strd r4, r5, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #40]\n\t"
"str r7, [sp, #44]\n\t"
#else
"strd r6, r7, [sp, #40]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [sp, #48]\n\t"
"str r9, [sp, #52]\n\t"
#else
"strd r8, r9, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r10, [sp, #56]\n\t"
"str r11, [sp, #60]\n\t"
#else
"strd r10, r11, [sp, #56]\n\t"
#endif
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
"ldr r11, [%[sha256], #4]\n\t"
"ldr r4, [%[sha256], #8]\n\t"
"eor r11, r11, r4\n\t"
@ -1480,25 +1623,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str r8, [%[sha256], #16]\n\t"
"str r9, [%[sha256]]\n\t"
/* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256]]\n\t"
"ldr r5, [%[sha256], #4]\n\t"
#else
"ldrd r4, r5, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #8]\n\t"
"ldr r7, [%[sha256], #12]\n\t"
#else
"ldrd r6, r7, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [sp, #64]\n\t"
"ldr r9, [sp, #68]\n\t"
#else
"ldrd r8, r9, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r10, [sp, #72]\n\t"
"ldr r11, [sp, #76]\n\t"
#else
@ -1508,49 +1651,49 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add r5, r5, r9\n\t"
"add r6, r6, r10\n\t"
"add r7, r7, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [%[sha256]]\n\t"
"str r5, [%[sha256], #4]\n\t"
#else
"strd r4, r5, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [%[sha256], #8]\n\t"
"str r7, [%[sha256], #12]\n\t"
#else
"strd r6, r7, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #64]\n\t"
"str r5, [sp, #68]\n\t"
#else
"strd r4, r5, [sp, #64]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #72]\n\t"
"str r7, [sp, #76]\n\t"
#else
"strd r6, r7, [sp, #72]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256], #16]\n\t"
"ldr r5, [%[sha256], #20]\n\t"
#else
"ldrd r4, r5, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #24]\n\t"
"ldr r7, [%[sha256], #28]\n\t"
#else
"ldrd r6, r7, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [sp, #80]\n\t"
"ldr r9, [sp, #84]\n\t"
#else
"ldrd r8, r9, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r10, [sp, #88]\n\t"
"ldr r11, [sp, #92]\n\t"
#else
@ -1560,25 +1703,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add r5, r5, r9\n\t"
"add r6, r6, r10\n\t"
"add r7, r7, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [%[sha256], #16]\n\t"
"str r5, [%[sha256], #20]\n\t"
#else
"strd r4, r5, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [%[sha256], #24]\n\t"
"str r7, [%[sha256], #28]\n\t"
#else
"strd r6, r7, [%[sha256], #24]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #80]\n\t"
"str r5, [sp, #84]\n\t"
#else
"strd r4, r5, [sp, #80]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #88]\n\t"
"str r7, [sp, #92]\n\t"
#else
@ -1628,7 +1771,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
__asm__ __volatile__ (
"sub sp, sp, #24\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str %[sha256], [sp]\n\t"
"str %[data], [sp, #4]\n\t"
#else
@ -1637,25 +1780,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str %[len], [sp, #8]\n\t"
"mov r12, %[L_SHA256_transform_neon_len_k]\n\t"
/* Load digest into registers */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[len], [%[sha256]]\n\t"
"ldr r3, [%[sha256], #4]\n\t"
#else
"ldrd %[len], r3, [%[sha256]]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t"
#else
"ldrd r4, r5, [%[sha256], #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #16]\n\t"
"ldr r7, [%[sha256], #20]\n\t"
#else
"ldrd r6, r7, [%[sha256], #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [%[sha256], #24]\n\t"
"ldr r9, [%[sha256], #28]\n\t"
#else
@ -2588,7 +2731,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add %[len], %[len], %[data]\n\t"
"ldr r10, [sp]\n\t"
/* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10]\n\t"
"ldr %[data], [r10, #4]\n\t"
#else
@ -2596,13 +2739,13 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif
"add %[len], %[len], %[sha256]\n\t"
"add r3, r3, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str %[len], [r10]\n\t"
"str r3, [r10, #4]\n\t"
#else
"strd %[len], r3, [r10]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10, #8]\n\t"
"ldr %[data], [r10, #12]\n\t"
#else
@ -2610,13 +2753,13 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif
"add r4, r4, %[sha256]\n\t"
"add r5, r5, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [r10, #8]\n\t"
"str r5, [r10, #12]\n\t"
#else
"strd r4, r5, [r10, #8]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10, #16]\n\t"
"ldr %[data], [r10, #20]\n\t"
#else
@ -2624,13 +2767,13 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif
"add r6, r6, %[sha256]\n\t"
"add r7, r7, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [r10, #16]\n\t"
"str r7, [r10, #20]\n\t"
#else
"strd r6, r7, [r10, #16]\n\t"
#endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10, #24]\n\t"
"ldr %[data], [r10, #28]\n\t"
#else
@ -2638,7 +2781,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif
"add r8, r8, %[sha256]\n\t"
"add r9, r9, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [r10, #24]\n\t"
"str r9, [r10, #28]\n\t"
#else
@ -2661,4 +2804,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif /* !NO_SHA256 */
#endif /* !__aarch64__ && !__thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -593,9 +593,9 @@ _fe_mul:
adds x7, x7, x3
umulh x4, x15, x19
adcs x8, x8, x4
adc x9, x9, xzr
# A[1] * B[3]
umulh x11, x15, x22
adc x9, x9, xzr
mul x10, x15, x22
# A[0] * B[1]
mul x3, x14, x20
@ -1842,9 +1842,9 @@ L_curve25519_bits:
adds x20, x20, x3
umulh x4, x15, x6
adcs x21, x21, x4
adc x22, x22, xzr
# A[1] * B[3]
umulh x26, x15, x9
adc x22, x22, xzr
mul x25, x15, x9
# A[0] * B[1]
mul x3, x14, x7
@ -1958,9 +1958,9 @@ L_curve25519_bits:
adds x20, x20, x3
umulh x4, x11, x25
adcs x21, x21, x4
adc x22, x22, xzr
# A[1] * B[3]
umulh x15, x11, x28
adc x22, x22, xzr
mul x14, x11, x28
# A[0] * B[1]
mul x3, x10, x26
@ -2229,9 +2229,9 @@ L_curve25519_bits:
adds x7, x7, x3
umulh x4, x15, x10
adcs x8, x8, x4
adc x9, x9, xzr
# A[1] * B[3]
umulh x26, x15, x13
adc x9, x9, xzr
mul x25, x15, x13
# A[0] * B[1]
mul x3, x14, x11
@ -2400,9 +2400,9 @@ L_curve25519_bits:
adds x7, x7, x3
umulh x4, x15, x10
adcs x8, x8, x4
adc x9, x9, xzr
# A[1] * B[3]
umulh x26, x15, x13
adc x9, x9, xzr
mul x25, x15, x13
# A[0] * B[1]
mul x3, x14, x11
@ -2708,9 +2708,9 @@ L_curve25519_bits:
adds x11, x11, x3
umulh x4, x20, x14
adcs x12, x12, x4
adc x13, x13, xzr
# A[1] * B[3]
umulh x26, x20, x17
adc x13, x13, xzr
mul x25, x20, x17
# A[0] * B[1]
mul x3, x19, x15
@ -3679,9 +3679,9 @@ L_curve25519_inv_8:
adds x15, x15, x3
umulh x4, x7, x10
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x7, x13
adc x17, x17, xzr
mul x19, x7, x13
# A[0] * B[1]
mul x3, x6, x11
@ -4664,9 +4664,9 @@ _ge_p1p1_to_p2:
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
@ -4782,9 +4782,9 @@ _ge_p1p1_to_p2:
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
@ -4900,9 +4900,9 @@ _ge_p1p1_to_p2:
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
@ -5051,9 +5051,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
@ -5169,9 +5169,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3
umulh x4, x24, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x24, x9
adc x17, x17, xzr
mul x19, x24, x9
# A[0] * B[1]
mul x3, x23, x7
@ -5287,9 +5287,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3
umulh x4, x24, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x24, x9
adc x17, x17, xzr
mul x19, x24, x9
# A[0] * B[1]
mul x3, x23, x7
@ -5403,9 +5403,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3
umulh x4, x11, x6
adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3]
umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9
# A[0] * B[1]
mul x3, x10, x7
@ -6075,9 +6075,9 @@ _ge_madd:
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
@ -6191,9 +6191,9 @@ _ge_madd:
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
@ -6348,9 +6348,9 @@ _ge_madd:
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
@ -6593,9 +6593,9 @@ _ge_msub:
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
@ -6709,9 +6709,9 @@ _ge_msub:
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
@ -6866,9 +6866,9 @@ _ge_msub:
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
@ -7109,9 +7109,9 @@ _ge_add:
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
@ -7228,9 +7228,9 @@ _ge_add:
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
@ -7388,9 +7388,9 @@ _ge_add:
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
@ -7509,9 +7509,9 @@ _ge_add:
adds x9, x9, x25
umulh x26, x5, x12
adcs x10, x10, x26
adc x11, x11, xzr
# A[1] * B[3]
umulh x17, x5, x15
adc x11, x11, xzr
mul x16, x5, x15
# A[0] * B[1]
mul x25, x4, x13
@ -7753,9 +7753,9 @@ _ge_sub:
adds x22, x22, x25
umulh x26, x17, x8
adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3]
umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11
# A[0] * B[1]
mul x25, x16, x9
@ -7880,9 +7880,9 @@ _ge_sub:
adds x5, x5, x25
umulh x26, x13, x16
adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x25, x12, x17
@ -8040,9 +8040,9 @@ _ge_sub:
adds x17, x17, x25
umulh x26, x22, x4
adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3]
umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7
# A[0] * B[1]
mul x25, x21, x5
@ -8169,9 +8169,9 @@ _ge_sub:
adds x9, x9, x25
umulh x26, x5, x12
adcs x10, x10, x26
adc x11, x11, xzr
# A[1] * B[3]
umulh x17, x5, x15
adc x11, x11, xzr
mul x16, x5, x15
# A[0] * B[1]
mul x25, x4, x13
@ -8570,9 +8570,9 @@ _sc_muladd:
adds x5, x5, x21
umulh x22, x13, x16
adcs x6, x6, x22
adc x7, x7, xzr
# A[1] * B[3]
umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20
# A[0] * B[1]
mul x21, x12, x17

View File

@ -490,9 +490,9 @@ void fe_mul(fe r, const fe a, const fe b)
"adds x7, x7, x3\n\t"
"umulh x4, x15, x19\n\t"
"adcs x8, x8, x4\n\t"
"adc x9, x9, xzr\n\t"
/* A[1] * B[3] */
"umulh x11, x15, x22\n\t"
"adc x9, x9, xzr\n\t"
"mul x10, x15, x22\n\t"
/* A[0] * B[1] */
"mul x3, x14, x20\n\t"
@ -1711,9 +1711,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x20, x20, x3\n\t"
"umulh x4, x15, x6\n\t"
"adcs x21, x21, x4\n\t"
"adc x22, x22, xzr\n\t"
/* A[1] * B[3] */
"umulh x26, x15, x9\n\t"
"adc x22, x22, xzr\n\t"
"mul x25, x15, x9\n\t"
/* A[0] * B[1] */
"mul x3, x14, x7\n\t"
@ -1827,9 +1827,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x20, x20, x3\n\t"
"umulh x4, x11, x25\n\t"
"adcs x21, x21, x4\n\t"
"adc x22, x22, xzr\n\t"
/* A[1] * B[3] */
"umulh x15, x11, x28\n\t"
"adc x22, x22, xzr\n\t"
"mul x14, x11, x28\n\t"
/* A[0] * B[1] */
"mul x3, x10, x26\n\t"
@ -2098,9 +2098,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x7, x7, x3\n\t"
"umulh x4, x15, x10\n\t"
"adcs x8, x8, x4\n\t"
"adc x9, x9, xzr\n\t"
/* A[1] * B[3] */
"umulh x26, x15, x13\n\t"
"adc x9, x9, xzr\n\t"
"mul x25, x15, x13\n\t"
/* A[0] * B[1] */
"mul x3, x14, x11\n\t"
@ -2269,9 +2269,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x7, x7, x3\n\t"
"umulh x4, x15, x10\n\t"
"adcs x8, x8, x4\n\t"
"adc x9, x9, xzr\n\t"
/* A[1] * B[3] */
"umulh x26, x15, x13\n\t"
"adc x9, x9, xzr\n\t"
"mul x25, x15, x13\n\t"
/* A[0] * B[1] */
"mul x3, x14, x11\n\t"
@ -2577,9 +2577,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x11, x11, x3\n\t"
"umulh x4, x20, x14\n\t"
"adcs x12, x12, x4\n\t"
"adc x13, x13, xzr\n\t"
/* A[1] * B[3] */
"umulh x26, x20, x17\n\t"
"adc x13, x13, xzr\n\t"
"mul x25, x20, x17\n\t"
/* A[0] * B[1] */
"mul x3, x19, x15\n\t"
@ -3556,9 +3556,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x15, x15, x3\n\t"
"umulh x4, x7, x10\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x7, x13\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x7, x13\n\t"
/* A[0] * B[1] */
"mul x3, x6, x11\n\t"
@ -4520,9 +4520,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t"
/* A[0] * B[1] */
"mul x3, x10, x7\n\t"
@ -4638,9 +4638,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t"
/* A[0] * B[1] */
"mul x3, x10, x7\n\t"
@ -4756,9 +4756,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t"
/* A[0] * B[1] */
"mul x3, x10, x7\n\t"
@ -4890,9 +4890,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t"
/* A[0] * B[1] */
"mul x3, x10, x7\n\t"
@ -5008,9 +5008,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x24, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x24, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x24, x9\n\t"
/* A[0] * B[1] */
"mul x3, x23, x7\n\t"
@ -5126,9 +5126,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x24, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x24, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x24, x9\n\t"
/* A[0] * B[1] */
"mul x3, x23, x7\n\t"
@ -5242,9 +5242,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */
"umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t"
/* A[0] * B[1] */
"mul x3, x10, x7\n\t"
@ -5873,9 +5873,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */
"umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t"
/* A[0] * B[1] */
"mul x25, x16, x9\n\t"
@ -5989,9 +5989,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t"
/* A[0] * B[1] */
"mul x25, x12, x17\n\t"
@ -6146,9 +6146,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t"
/* A[0] * B[1] */
"mul x25, x21, x5\n\t"
@ -6370,9 +6370,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */
"umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t"
/* A[0] * B[1] */
"mul x25, x16, x9\n\t"
@ -6486,9 +6486,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t"
/* A[0] * B[1] */
"mul x25, x12, x17\n\t"
@ -6643,9 +6643,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t"
/* A[0] * B[1] */
"mul x25, x21, x5\n\t"
@ -6865,9 +6865,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */
"umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t"
/* A[0] * B[1] */
"mul x25, x16, x9\n\t"
@ -6984,9 +6984,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t"
/* A[0] * B[1] */
"mul x25, x12, x17\n\t"
@ -7144,9 +7144,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t"
/* A[0] * B[1] */
"mul x25, x21, x5\n\t"
@ -7265,9 +7265,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x9, x9, x25\n\t"
"umulh x26, x5, x12\n\t"
"adcs x10, x10, x26\n\t"
"adc x11, x11, xzr\n\t"
/* A[1] * B[3] */
"umulh x17, x5, x15\n\t"
"adc x11, x11, xzr\n\t"
"mul x16, x5, x15\n\t"
/* A[0] * B[1] */
"mul x25, x4, x13\n\t"
@ -7488,9 +7488,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */
"umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t"
/* A[0] * B[1] */
"mul x25, x16, x9\n\t"
@ -7615,9 +7615,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t"
/* A[0] * B[1] */
"mul x25, x12, x17\n\t"
@ -7775,9 +7775,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t"
/* A[0] * B[1] */
"mul x25, x21, x5\n\t"
@ -7904,9 +7904,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x9, x9, x25\n\t"
"umulh x26, x5, x12\n\t"
"adcs x10, x10, x26\n\t"
"adc x11, x11, xzr\n\t"
/* A[1] * B[3] */
"umulh x17, x5, x15\n\t"
"adc x11, x11, xzr\n\t"
"mul x16, x5, x15\n\t"
/* A[0] * B[1] */
"mul x25, x4, x13\n\t"
@ -8265,9 +8265,9 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
"adds x5, x5, x21\n\t"
"umulh x22, x13, x16\n\t"
"adcs x6, x6, x22\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */
"umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t"
/* A[0] * B[1] */
"mul x21, x12, x17\n\t"

View File

@ -595,7 +595,11 @@ L_AES_invert_key_loop:
STM r0!, {r6, r7, r8, r9}
SUBS r11, r11, #0x2
SUB r10, r10, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_invert_key_loop
#else
BNE.N L_AES_invert_key_loop
#endif
SUB r0, r0, r1, LSL #3
ADD r0, r0, #0x10
SUB r11, r1, #0x1
@ -666,7 +670,11 @@ L_AES_invert_key_mix_loop:
EOR r8, r8, r9, ROR #24
STR r8, [r0], #4
SUBS r11, r11, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_invert_key_mix_loop
#else
BNE.N L_AES_invert_key_mix_loop
#endif
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 165
.size AES_invert_key,.-AES_invert_key
@ -695,9 +703,17 @@ AES_set_encrypt_key:
LDR r8, L_AES_Thumb2_te
ADR lr, L_AES_Thumb2_rcon
CMP r1, #0x80
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_set_encrypt_key_start_128
#else
BEQ.N L_AES_set_encrypt_key_start_128
#endif
CMP r1, #0xc0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_set_encrypt_key_start_192
#else
BEQ.N L_AES_set_encrypt_key_start_192
#endif
LDRD r4, r5, [r0]
LDRD r6, r7, [r0, #8]
REV r4, r4
@ -757,7 +773,11 @@ L_AES_set_encrypt_key_loop_256:
STM r2, {r4, r5, r6, r7}
SUB r2, r2, #0x10
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_set_encrypt_key_loop_256
#else
BNE.N L_AES_set_encrypt_key_loop_256
#endif
UBFX r4, r7, #0, #8
UBFX r5, r7, #8, #8
UBFX r6, r7, #16, #8
@ -817,7 +837,11 @@ L_AES_set_encrypt_key_loop_192:
EOR r7, r7, r6
STM r2, {r0, r1, r4, r5, r6, r7}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_set_encrypt_key_loop_192
#else
BNE.N L_AES_set_encrypt_key_loop_192
#endif
UBFX r0, r7, #0, #8
UBFX r1, r7, #8, #8
UBFX r4, r7, #16, #8
@ -868,7 +892,11 @@ L_AES_set_encrypt_key_loop_128:
EOR r7, r7, r6
STM r2, {r4, r5, r6, r7}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_set_encrypt_key_loop_128
#else
BNE.N L_AES_set_encrypt_key_loop_128
#endif
L_AES_set_encrypt_key_end:
POP {r4, r5, r6, r7, r8, pc}
# Cycle Count = 327
@ -981,7 +1009,11 @@ L_AES_encrypt_block_nr:
EOR r6, r6, r10
EOR r7, r7, r11
SUBS r1, r1, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_encrypt_block_nr
#else
BNE.N L_AES_encrypt_block_nr
#endif
UBFX r8, r5, #16, #8
LSR r11, r4, #24
UBFX lr, r6, #8, #8
@ -1105,9 +1137,17 @@ AES_ECB_encrypt:
LDR r12, [sp, #36]
PUSH {r3}
CMP r12, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_encrypt_start_block_128
#else
BEQ.N L_AES_ECB_encrypt_start_block_128
#endif
CMP r12, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_encrypt_start_block_192
#else
BEQ.N L_AES_ECB_encrypt_start_block_192
#endif
L_AES_ECB_encrypt_loop_block_256:
LDR r4, [lr]
LDR r5, [lr, #4]
@ -1139,7 +1179,11 @@ L_AES_ECB_encrypt_loop_block_256:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_encrypt_loop_block_256
#else
BNE.N L_AES_ECB_encrypt_loop_block_256
#endif
B L_AES_ECB_encrypt_end
L_AES_ECB_encrypt_start_block_192:
L_AES_ECB_encrypt_loop_block_192:
@ -1173,7 +1217,11 @@ L_AES_ECB_encrypt_loop_block_192:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_encrypt_loop_block_192
#else
BNE.N L_AES_ECB_encrypt_loop_block_192
#endif
B L_AES_ECB_encrypt_end
L_AES_ECB_encrypt_start_block_128:
L_AES_ECB_encrypt_loop_block_128:
@ -1207,7 +1255,11 @@ L_AES_ECB_encrypt_loop_block_128:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_encrypt_loop_block_128
#else
BNE.N L_AES_ECB_encrypt_loop_block_128
#endif
L_AES_ECB_encrypt_end:
POP {r3}
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@ -1228,9 +1280,17 @@ AES_CBC_encrypt:
LDM r9, {r4, r5, r6, r7}
PUSH {r3, r9}
CMP r8, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_encrypt_start_block_128
#else
BEQ.N L_AES_CBC_encrypt_start_block_128
#endif
CMP r8, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_encrypt_start_block_192
#else
BEQ.N L_AES_CBC_encrypt_start_block_192
#endif
L_AES_CBC_encrypt_loop_block_256:
LDR r8, [lr]
LDR r9, [lr, #4]
@ -1266,7 +1326,11 @@ L_AES_CBC_encrypt_loop_block_256:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_encrypt_loop_block_256
#else
BNE.N L_AES_CBC_encrypt_loop_block_256
#endif
B L_AES_CBC_encrypt_end
L_AES_CBC_encrypt_start_block_192:
L_AES_CBC_encrypt_loop_block_192:
@ -1304,7 +1368,11 @@ L_AES_CBC_encrypt_loop_block_192:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_encrypt_loop_block_192
#else
BNE.N L_AES_CBC_encrypt_loop_block_192
#endif
B L_AES_CBC_encrypt_end
L_AES_CBC_encrypt_start_block_128:
L_AES_CBC_encrypt_loop_block_128:
@ -1342,7 +1410,11 @@ L_AES_CBC_encrypt_loop_block_128:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_encrypt_loop_block_128
#else
BNE.N L_AES_CBC_encrypt_loop_block_128
#endif
L_AES_CBC_encrypt_end:
POP {r3, r9}
STM r9, {r4, r5, r6, r7}
@ -1369,9 +1441,17 @@ AES_CTR_encrypt:
STM r8, {r4, r5, r6, r7}
PUSH {r3, r8}
CMP r12, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CTR_encrypt_start_block_128
#else
BEQ.N L_AES_CTR_encrypt_start_block_128
#endif
CMP r12, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CTR_encrypt_start_block_192
#else
BEQ.N L_AES_CTR_encrypt_start_block_192
#endif
L_AES_CTR_encrypt_loop_block_256:
PUSH {r1, r2, lr}
LDR lr, [sp, #16]
@ -1411,7 +1491,11 @@ L_AES_CTR_encrypt_loop_block_256:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CTR_encrypt_loop_block_256
#else
BNE.N L_AES_CTR_encrypt_loop_block_256
#endif
B L_AES_CTR_encrypt_end
L_AES_CTR_encrypt_start_block_192:
L_AES_CTR_encrypt_loop_block_192:
@ -1453,7 +1537,11 @@ L_AES_CTR_encrypt_loop_block_192:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CTR_encrypt_loop_block_192
#else
BNE.N L_AES_CTR_encrypt_loop_block_192
#endif
B L_AES_CTR_encrypt_end
L_AES_CTR_encrypt_start_block_128:
L_AES_CTR_encrypt_loop_block_128:
@ -1495,7 +1583,11 @@ L_AES_CTR_encrypt_loop_block_128:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CTR_encrypt_loop_block_128
#else
BNE.N L_AES_CTR_encrypt_loop_block_128
#endif
L_AES_CTR_encrypt_end:
POP {r3, r8}
REV r4, r4
@ -1617,7 +1709,11 @@ L_AES_decrypt_block_nr:
EOR r6, r6, r10
EOR r7, r7, r11
SUBS r1, r1, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_decrypt_block_nr
#else
BNE.N L_AES_decrypt_block_nr
#endif
UBFX r8, r7, #16, #8
LSR r11, r4, #24
UBFX r12, r6, #8, #8
@ -2001,9 +2097,17 @@ AES_ECB_decrypt:
MOV r12, r2
ADR r2, L_AES_Thumb2_td4
CMP r8, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_decrypt_start_block_128
#else
BEQ.N L_AES_ECB_decrypt_start_block_128
#endif
CMP r8, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_decrypt_start_block_192
#else
BEQ.N L_AES_ECB_decrypt_start_block_192
#endif
L_AES_ECB_decrypt_loop_block_256:
LDR r4, [lr]
LDR r5, [lr, #4]
@ -2034,7 +2138,11 @@ L_AES_ECB_decrypt_loop_block_256:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_decrypt_loop_block_256
#else
BNE.N L_AES_ECB_decrypt_loop_block_256
#endif
B L_AES_ECB_decrypt_end
L_AES_ECB_decrypt_start_block_192:
L_AES_ECB_decrypt_loop_block_192:
@ -2067,7 +2175,11 @@ L_AES_ECB_decrypt_loop_block_192:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_decrypt_loop_block_192
#else
BNE.N L_AES_ECB_decrypt_loop_block_192
#endif
B L_AES_ECB_decrypt_end
L_AES_ECB_decrypt_start_block_128:
L_AES_ECB_decrypt_loop_block_128:
@ -2100,7 +2212,11 @@ L_AES_ECB_decrypt_loop_block_128:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_decrypt_loop_block_128
#else
BNE.N L_AES_ECB_decrypt_loop_block_128
#endif
L_AES_ECB_decrypt_end:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 210
@ -2121,9 +2237,17 @@ AES_CBC_decrypt:
ADR r2, L_AES_Thumb2_td4
PUSH {r3, r4}
CMP r8, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_loop_block_128
#else
BEQ.N L_AES_CBC_decrypt_loop_block_128
#endif
CMP r8, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_loop_block_192
#else
BEQ.N L_AES_CBC_decrypt_loop_block_192
#endif
L_AES_CBC_decrypt_loop_block_256:
PUSH {r1, r12, lr}
LDR r4, [lr]
@ -2164,7 +2288,11 @@ L_AES_CBC_decrypt_loop_block_256:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_end_odd
#else
BEQ.N L_AES_CBC_decrypt_end_odd
#endif
PUSH {r1, r12, lr}
LDR r4, [lr]
LDR r5, [lr, #4]
@ -2205,7 +2333,11 @@ L_AES_CBC_decrypt_loop_block_256:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_decrypt_loop_block_256
#else
BNE.N L_AES_CBC_decrypt_loop_block_256
#endif
B L_AES_CBC_decrypt_end
L_AES_CBC_decrypt_loop_block_192:
PUSH {r1, r12, lr}
@ -2247,7 +2379,11 @@ L_AES_CBC_decrypt_loop_block_192:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_end_odd
#else
BEQ.N L_AES_CBC_decrypt_end_odd
#endif
PUSH {r1, r12, lr}
LDR r4, [lr]
LDR r5, [lr, #4]
@ -2288,7 +2424,11 @@ L_AES_CBC_decrypt_loop_block_192:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_decrypt_loop_block_192
#else
BNE.N L_AES_CBC_decrypt_loop_block_192
#endif
B L_AES_CBC_decrypt_end
L_AES_CBC_decrypt_loop_block_128:
PUSH {r1, r12, lr}
@ -2330,7 +2470,11 @@ L_AES_CBC_decrypt_loop_block_128:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_end_odd
#else
BEQ.N L_AES_CBC_decrypt_end_odd
#endif
PUSH {r1, r12, lr}
LDR r4, [lr]
LDR r5, [lr, #4]
@ -2371,7 +2515,11 @@ L_AES_CBC_decrypt_loop_block_128:
SUBS r12, r12, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_decrypt_loop_block_128
#else
BNE.N L_AES_CBC_decrypt_loop_block_128
#endif
B L_AES_CBC_decrypt_end
L_AES_CBC_decrypt_end_odd:
LDR r4, [sp, #4]
@ -2961,7 +3109,11 @@ L_GCM_gmult_len_start_block:
POP {r3}
SUBS r3, r3, #0x10
ADD r2, r2, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_GCM_gmult_len_start_block
#else
BNE.N L_GCM_gmult_len_start_block
#endif
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 742
.size GCM_gmult_len,.-GCM_gmult_len
@ -2989,9 +3141,17 @@ AES_GCM_encrypt:
STM r8, {r4, r5, r6, r7}
PUSH {r3, r8}
CMP r12, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_GCM_encrypt_start_block_128
#else
BEQ.N L_AES_GCM_encrypt_start_block_128
#endif
CMP r12, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_GCM_encrypt_start_block_192
#else
BEQ.N L_AES_GCM_encrypt_start_block_192
#endif
L_AES_GCM_encrypt_loop_block_256:
PUSH {r1, r2, lr}
LDR lr, [sp, #16]
@ -3028,7 +3188,11 @@ L_AES_GCM_encrypt_loop_block_256:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_GCM_encrypt_loop_block_256
#else
BNE.N L_AES_GCM_encrypt_loop_block_256
#endif
B L_AES_GCM_encrypt_end
L_AES_GCM_encrypt_start_block_192:
L_AES_GCM_encrypt_loop_block_192:
@ -3067,7 +3231,11 @@ L_AES_GCM_encrypt_loop_block_192:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_GCM_encrypt_loop_block_192
#else
BNE.N L_AES_GCM_encrypt_loop_block_192
#endif
B L_AES_GCM_encrypt_end
L_AES_GCM_encrypt_start_block_128:
L_AES_GCM_encrypt_loop_block_128:
@ -3106,7 +3274,11 @@ L_AES_GCM_encrypt_loop_block_128:
SUBS r2, r2, #0x10
ADD lr, lr, #0x10
ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_GCM_encrypt_loop_block_128
#else
BNE.N L_AES_GCM_encrypt_loop_block_128
#endif
L_AES_GCM_encrypt_end:
POP {r3, r8}
REV r4, r4

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifndef NO_AES
#include <wolfssl/wolfcrypt/aes.h>
@ -206,7 +218,11 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
"STM %[ks]!, {r6, r7, r8, r9}\n\t"
"SUBS r11, r11, #0x2\n\t"
"SUB r10, r10, #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_invert_key_loop_%=\n\t"
#else
"BNE.N L_AES_invert_key_loop_%=\n\t"
#endif
"SUB %[ks], %[ks], %[rounds], LSL #3\n\t"
"ADD %[ks], %[ks], #0x10\n\t"
"SUB r11, %[rounds], #0x1\n\t"
@ -278,7 +294,11 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
"EOR r8, r8, r9, ROR #24\n\t"
"STR r8, [%[ks]], #4\n\t"
"SUBS r11, r11, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_invert_key_mix_loop_%=\n\t"
#else
"BNE.N L_AES_invert_key_mix_loop_%=\n\t"
#endif
: [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c)
:
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
@ -306,9 +326,17 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"MOV r8, %[L_AES_Thumb2_te]\n\t"
"MOV lr, %[L_AES_Thumb2_rcon]\n\t"
"CMP %[len], #0x80\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_set_encrypt_key_start_128_%=\n\t"
#else
"BEQ.N L_AES_set_encrypt_key_start_128_%=\n\t"
#endif
"CMP %[len], #0xc0\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_set_encrypt_key_start_192_%=\n\t"
#else
"BEQ.N L_AES_set_encrypt_key_start_192_%=\n\t"
#endif
"LDRD r4, r5, [%[key]]\n\t"
"LDRD r6, r7, [%[key], #8]\n\t"
"REV r4, r4\n\t"
@ -369,7 +397,11 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"STM %[ks], {r4, r5, r6, r7}\n\t"
"SUB %[ks], %[ks], #0x10\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_set_encrypt_key_loop_256_%=\n\t"
#else
"BNE.N L_AES_set_encrypt_key_loop_256_%=\n\t"
#endif
"UBFX r4, r7, #0, #8\n\t"
"UBFX r5, r7, #8, #8\n\t"
"UBFX r6, r7, #16, #8\n\t"
@ -431,7 +463,11 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"EOR r7, r7, r6\n\t"
"STM %[ks], {r0, r1, r4, r5, r6, r7}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_set_encrypt_key_loop_192_%=\n\t"
#else
"BNE.N L_AES_set_encrypt_key_loop_192_%=\n\t"
#endif
"UBFX r0, r7, #0, #8\n\t"
"UBFX r1, r7, #8, #8\n\t"
"UBFX r4, r7, #16, #8\n\t"
@ -484,7 +520,11 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"EOR r7, r7, r6\n\t"
"STM %[ks], {r4, r5, r6, r7}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_set_encrypt_key_loop_128_%=\n\t"
#else
"BNE.N L_AES_set_encrypt_key_loop_128_%=\n\t"
#endif
"\n"
"L_AES_set_encrypt_key_end_%=:\n\t"
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c)
@ -605,7 +645,11 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
"SUBS %[nr], %[nr], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_encrypt_block_nr_%=\n\t"
#else
"BNE.N L_AES_encrypt_block_nr_%=\n\t"
#endif
"UBFX r8, r5, #16, #8\n\t"
"LSR r11, r4, #24\n\t"
"UBFX lr, r6, #8, #8\n\t"
@ -733,9 +777,17 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"MOV r12, r4\n\t"
"PUSH {%[ks]}\n\t"
"CMP r12, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_ECB_encrypt_start_block_128_%=\n\t"
#endif
"CMP r12, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_ECB_encrypt_start_block_192_%=\n\t"
#endif
"\n"
"L_AES_ECB_encrypt_loop_block_256_%=:\n\t"
"LDR r4, [lr]\n\t"
@ -768,7 +820,11 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_ECB_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_ECB_encrypt_end_%=\n\t"
"\n"
"L_AES_ECB_encrypt_start_block_192_%=:\n\t"
@ -804,7 +860,11 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_ECB_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_ECB_encrypt_end_%=\n\t"
"\n"
"L_AES_ECB_encrypt_start_block_128_%=:\n\t"
@ -840,7 +900,11 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_ECB_encrypt_loop_block_128_%=\n\t"
#endif
"\n"
"L_AES_ECB_encrypt_end_%=:\n\t"
"POP {%[ks]}\n\t"
@ -848,7 +912,6 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
:
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
}
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
@ -873,9 +936,17 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"LDM r9, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r9}\n\t"
"CMP r8, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_CBC_encrypt_start_block_128_%=\n\t"
#endif
"CMP r8, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_CBC_encrypt_start_block_192_%=\n\t"
#endif
"\n"
"L_AES_CBC_encrypt_loop_block_256_%=:\n\t"
"LDR r8, [lr]\n\t"
@ -912,7 +983,11 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_CBC_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_CBC_encrypt_end_%=\n\t"
"\n"
"L_AES_CBC_encrypt_start_block_192_%=:\n\t"
@ -952,7 +1027,11 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_CBC_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_CBC_encrypt_end_%=\n\t"
"\n"
"L_AES_CBC_encrypt_start_block_128_%=:\n\t"
@ -992,7 +1071,11 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_CBC_encrypt_loop_block_128_%=\n\t"
#endif
"\n"
"L_AES_CBC_encrypt_end_%=:\n\t"
"POP {%[ks], r9}\n\t"
@ -1001,8 +1084,6 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)iv;
}
#endif /* HAVE_AES_CBC */
@ -1032,9 +1113,17 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"STM r8, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r8}\n\t"
"CMP r12, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CTR_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_CTR_encrypt_start_block_128_%=\n\t"
#endif
"CMP r12, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CTR_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_CTR_encrypt_start_block_192_%=\n\t"
#endif
"\n"
"L_AES_CTR_encrypt_loop_block_256_%=:\n\t"
"PUSH {r1, %[len], lr}\n\t"
@ -1075,7 +1164,11 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CTR_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_CTR_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_CTR_encrypt_end_%=\n\t"
"\n"
"L_AES_CTR_encrypt_start_block_192_%=:\n\t"
@ -1119,7 +1212,11 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CTR_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_CTR_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_CTR_encrypt_end_%=\n\t"
"\n"
"L_AES_CTR_encrypt_start_block_128_%=:\n\t"
@ -1163,7 +1260,11 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CTR_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_CTR_encrypt_loop_block_128_%=\n\t"
#endif
"\n"
"L_AES_CTR_encrypt_end_%=:\n\t"
"POP {%[ks], r8}\n\t"
@ -1176,8 +1277,6 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)ctr;
}
#endif /* WOLFSSL_AES_COUNTER */
@ -1294,7 +1393,11 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p)
"EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t"
"SUBS %[nr], %[nr], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_decrypt_block_nr_%=\n\t"
#else
"BNE.N L_AES_decrypt_block_nr_%=\n\t"
#endif
"UBFX r8, r7, #16, #8\n\t"
"LSR r11, r4, #24\n\t"
"UBFX r12, r6, #8, #8\n\t"
@ -1457,9 +1560,17 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"MOV r12, %[len]\n\t"
"MOV r2, %[L_AES_Thumb2_td4]\n\t"
"CMP r8, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_ECB_decrypt_start_block_128_%=\n\t"
#endif
"CMP r8, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_decrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_ECB_decrypt_start_block_192_%=\n\t"
#endif
"\n"
"L_AES_ECB_decrypt_loop_block_256_%=:\n\t"
"LDR r4, [lr]\n\t"
@ -1491,7 +1602,11 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_ECB_decrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_ECB_decrypt_end_%=\n\t"
"\n"
"L_AES_ECB_decrypt_start_block_192_%=:\n\t"
@ -1526,7 +1641,11 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_ECB_decrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_ECB_decrypt_end_%=\n\t"
"\n"
"L_AES_ECB_decrypt_start_block_128_%=:\n\t"
@ -1561,14 +1680,17 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_ECB_decrypt_loop_block_128_%=\n\t"
#endif
"\n"
"L_AES_ECB_decrypt_end_%=:\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c)
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
}
#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
@ -1595,9 +1717,17 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"MOV r2, %[L_AES_Thumb2_td4]\n\t"
"PUSH {%[ks], r4}\n\t"
"CMP r8, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#endif
"CMP r8, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#endif
"\n"
"L_AES_CBC_decrypt_loop_block_256_%=:\n\t"
"PUSH {r1, r12, lr}\n\t"
@ -1639,7 +1769,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_end_odd_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t"
#endif
"PUSH {r1, r12, lr}\n\t"
"LDR r4, [lr]\n\t"
"LDR r5, [lr, #4]\n\t"
@ -1680,7 +1814,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_CBC_decrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_CBC_decrypt_end_%=\n\t"
"\n"
"L_AES_CBC_decrypt_loop_block_192_%=:\n\t"
@ -1723,7 +1861,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_end_odd_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t"
#endif
"PUSH {r1, r12, lr}\n\t"
"LDR r4, [lr]\n\t"
"LDR r5, [lr, #4]\n\t"
@ -1764,7 +1906,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_CBC_decrypt_end_%=\n\t"
"\n"
"L_AES_CBC_decrypt_loop_block_128_%=:\n\t"
@ -1807,7 +1953,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_end_odd_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t"
#endif
"PUSH {r1, r12, lr}\n\t"
"LDR r4, [lr]\n\t"
"LDR r5, [lr, #4]\n\t"
@ -1848,7 +1998,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#endif
"B L_AES_CBC_decrypt_end_%=\n\t"
"\n"
"L_AES_CBC_decrypt_end_odd_%=:\n\t"
@ -1864,8 +2018,6 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
:
: "memory", "r12", "lr", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)iv;
}
#endif /* HAVE_AES_CBC */
@ -2437,7 +2589,11 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
"POP {r3}\n\t"
"SUBS %[len], %[len], #0x10\n\t"
"ADD %[data], %[data], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_GCM_gmult_len_start_block_%=\n\t"
#else
"BNE.N L_GCM_gmult_len_start_block_%=\n\t"
#endif
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c)
:
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
@ -2470,9 +2626,17 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"STM r8, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r8}\n\t"
"CMP r12, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_GCM_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_GCM_encrypt_start_block_128_%=\n\t"
#endif
"CMP r12, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_GCM_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_GCM_encrypt_start_block_192_%=\n\t"
#endif
"\n"
"L_AES_GCM_encrypt_loop_block_256_%=:\n\t"
"PUSH {r1, %[len], lr}\n\t"
@ -2510,7 +2674,11 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_GCM_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_GCM_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_GCM_encrypt_end_%=\n\t"
"\n"
"L_AES_GCM_encrypt_start_block_192_%=:\n\t"
@ -2551,7 +2719,11 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_GCM_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_GCM_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_GCM_encrypt_end_%=\n\t"
"\n"
"L_AES_GCM_encrypt_start_block_128_%=:\n\t"
@ -2592,7 +2764,11 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_GCM_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_GCM_encrypt_loop_block_128_%=\n\t"
#endif
"\n"
"L_AES_GCM_encrypt_end_%=:\n\t"
"POP {%[ks], r8}\n\t"
@ -2605,12 +2781,13 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
:
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
);
(void)nr;
(void)ctr;
}
#endif /* HAVE_AESGCM */
#endif /* !NO_AES */
#endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -2741,12 +2741,20 @@ L_curve25519_bits:
LDR r1, [sp, #180]
SUBS r1, r1, #0x1
STR r1, [sp, #180]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGE L_curve25519_bits
#else
BGE.N L_curve25519_bits
#endif
MOV r1, #0x1f
STR r1, [sp, #180]
SUBS r2, r2, #0x4
STR r2, [sp, #176]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGE L_curve25519_words
#else
BGE.N L_curve25519_words
#endif
# Invert
ADD r1, sp, #0x0
ADD r0, sp, #0x20
@ -2783,7 +2791,11 @@ L_curve25519_inv_1:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_1
#else
BNE.N L_curve25519_inv_1
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -2799,7 +2811,11 @@ L_curve25519_inv_2:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_2
#else
BNE.N L_curve25519_inv_2
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x60
@ -2815,7 +2831,11 @@ L_curve25519_inv_3:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_3
#else
BNE.N L_curve25519_inv_3
#endif
ADD r2, sp, #0x60
ADD r1, sp, #0x80
ADD r0, sp, #0x60
@ -2828,7 +2848,11 @@ L_curve25519_inv_4:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_4
#else
BNE.N L_curve25519_inv_4
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -2844,7 +2868,11 @@ L_curve25519_inv_5:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_5
#else
BNE.N L_curve25519_inv_5
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x60
@ -2860,7 +2888,11 @@ L_curve25519_inv_6:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_6
#else
BNE.N L_curve25519_inv_6
#endif
ADD r2, sp, #0x60
ADD r1, sp, #0x80
ADD r0, sp, #0x60
@ -2873,7 +2905,11 @@ L_curve25519_inv_7:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_7
#else
BNE.N L_curve25519_inv_7
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -2886,7 +2922,11 @@ L_curve25519_inv_8:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_8
#else
BNE.N L_curve25519_inv_8
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x0
@ -3022,7 +3062,11 @@ L_curve25519_bits:
BL fe_mul_op
LDR r2, [sp, #168]
SUBS r2, r2, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGE L_curve25519_bits
#else
BGE.N L_curve25519_bits
#endif
# Cycle Count: 171
LDR r1, [sp, #184]
# Copy
@ -3064,7 +3108,11 @@ L_curve25519_inv_1:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_1
#else
BNE.N L_curve25519_inv_1
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -3080,7 +3128,11 @@ L_curve25519_inv_2:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_2
#else
BNE.N L_curve25519_inv_2
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x60
@ -3096,7 +3148,11 @@ L_curve25519_inv_3:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_3
#else
BNE.N L_curve25519_inv_3
#endif
ADD r2, sp, #0x60
ADD r1, sp, #0x80
ADD r0, sp, #0x60
@ -3109,7 +3165,11 @@ L_curve25519_inv_4:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_4
#else
BNE.N L_curve25519_inv_4
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -3125,7 +3185,11 @@ L_curve25519_inv_5:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_5
#else
BNE.N L_curve25519_inv_5
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x60
@ -3141,7 +3205,11 @@ L_curve25519_inv_6:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_6
#else
BNE.N L_curve25519_inv_6
#endif
ADD r2, sp, #0x60
ADD r1, sp, #0x80
ADD r0, sp, #0x60
@ -3154,7 +3222,11 @@ L_curve25519_inv_7:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_7
#else
BNE.N L_curve25519_inv_7
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -3167,7 +3239,11 @@ L_curve25519_inv_8:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_8
#else
BNE.N L_curve25519_inv_8
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x0
@ -3244,7 +3320,11 @@ L_fe_invert1:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert1
#else
BNE.N L_fe_invert1
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x20
@ -3260,7 +3340,11 @@ L_fe_invert2:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert2
#else
BNE.N L_fe_invert2
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x40
@ -3276,7 +3360,11 @@ L_fe_invert3:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert3
#else
BNE.N L_fe_invert3
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -3289,7 +3377,11 @@ L_fe_invert4:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert4
#else
BNE.N L_fe_invert4
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x20
@ -3305,7 +3397,11 @@ L_fe_invert5:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert5
#else
BNE.N L_fe_invert5
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x40
@ -3321,7 +3417,11 @@ L_fe_invert6:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert6
#else
BNE.N L_fe_invert6
#endif
ADD r2, sp, #0x40
ADD r1, sp, #0x60
ADD r0, sp, #0x40
@ -3334,7 +3434,11 @@ L_fe_invert7:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert7
#else
BNE.N L_fe_invert7
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x20
@ -3347,7 +3451,11 @@ L_fe_invert8:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert8
#else
BNE.N L_fe_invert8
#endif
MOV r2, sp
ADD r1, sp, #0x20
LDR r0, [sp, #128]
@ -3863,7 +3971,11 @@ L_fe_pow22523_1:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_1
#else
BNE.N L_fe_pow22523_1
#endif
MOV r2, sp
ADD r1, sp, #0x20
MOV r0, sp
@ -3879,7 +3991,11 @@ L_fe_pow22523_2:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_2
#else
BNE.N L_fe_pow22523_2
#endif
MOV r2, sp
ADD r1, sp, #0x20
ADD r0, sp, #0x20
@ -3895,7 +4011,11 @@ L_fe_pow22523_3:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_3
#else
BNE.N L_fe_pow22523_3
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x20
@ -3908,7 +4028,11 @@ L_fe_pow22523_4:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_4
#else
BNE.N L_fe_pow22523_4
#endif
MOV r2, sp
ADD r1, sp, #0x20
MOV r0, sp
@ -3924,7 +4048,11 @@ L_fe_pow22523_5:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_5
#else
BNE.N L_fe_pow22523_5
#endif
MOV r2, sp
ADD r1, sp, #0x20
ADD r0, sp, #0x20
@ -3940,7 +4068,11 @@ L_fe_pow22523_6:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_6
#else
BNE.N L_fe_pow22523_6
#endif
ADD r2, sp, #0x20
ADD r1, sp, #0x40
ADD r0, sp, #0x20
@ -3953,7 +4085,11 @@ L_fe_pow22523_7:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_7
#else
BNE.N L_fe_pow22523_7
#endif
MOV r2, sp
ADD r1, sp, #0x20
MOV r0, sp
@ -3966,7 +4102,11 @@ L_fe_pow22523_8:
BL fe_sq_op
POP {r12}
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_8
#else
BNE.N L_fe_pow22523_8
#endif
LDR r2, [sp, #100]
MOV r1, sp
LDR r0, [sp, #96]

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
/* Based on work by: Emil Lenngren
* https://github.com/pornin/X25519-Cortex-M4
*/
@ -2815,12 +2827,20 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"LDR %[n], [sp, #180]\n\t"
"SUBS %[n], %[n], #0x1\n\t"
"STR %[n], [sp, #180]\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BGE L_curve25519_bits_%=\n\t"
#else
"BGE.N L_curve25519_bits_%=\n\t"
#endif
"MOV %[n], #0x1f\n\t"
"STR %[n], [sp, #180]\n\t"
"SUBS %[a], %[a], #0x4\n\t"
"STR %[a], [sp, #176]\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BGE L_curve25519_words_%=\n\t"
#else
"BGE.N L_curve25519_words_%=\n\t"
#endif
/* Invert */
"ADD r1, sp, #0x0\n\t"
"ADD r0, sp, #0x20\n\t"
@ -2858,7 +2878,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_1_%=\n\t"
#else
"BNE.N L_curve25519_inv_1_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -2875,7 +2899,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_2_%=\n\t"
#else
"BNE.N L_curve25519_inv_2_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t"
@ -2892,7 +2920,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_3_%=\n\t"
#else
"BNE.N L_curve25519_inv_3_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t"
@ -2906,7 +2938,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_4_%=\n\t"
#else
"BNE.N L_curve25519_inv_4_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -2923,7 +2959,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_5_%=\n\t"
#else
"BNE.N L_curve25519_inv_5_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t"
@ -2940,7 +2980,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_6_%=\n\t"
#else
"BNE.N L_curve25519_inv_6_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t"
@ -2954,7 +2998,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_7_%=\n\t"
#else
"BNE.N L_curve25519_inv_7_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -2968,7 +3016,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_8_%=\n\t"
#else
"BNE.N L_curve25519_inv_8_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x0\n\t"
@ -3110,7 +3162,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_mul_op\n\t"
"LDR %[a], [sp, #168]\n\t"
"SUBS %[a], %[a], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BGE L_curve25519_bits_%=\n\t"
#else
"BGE.N L_curve25519_bits_%=\n\t"
#endif
/* Cycle Count: 171 */
"LDR %[n], [sp, #184]\n\t"
/* Copy */
@ -3153,7 +3209,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_1_%=\n\t"
#else
"BNE.N L_curve25519_inv_1_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3170,7 +3230,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_2_%=\n\t"
#else
"BNE.N L_curve25519_inv_2_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t"
@ -3187,7 +3251,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_3_%=\n\t"
#else
"BNE.N L_curve25519_inv_3_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t"
@ -3201,7 +3269,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_4_%=\n\t"
#else
"BNE.N L_curve25519_inv_4_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3218,7 +3290,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_5_%=\n\t"
#else
"BNE.N L_curve25519_inv_5_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t"
@ -3235,7 +3311,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_6_%=\n\t"
#else
"BNE.N L_curve25519_inv_6_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t"
@ -3249,7 +3329,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_7_%=\n\t"
#else
"BNE.N L_curve25519_inv_7_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3263,7 +3347,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_8_%=\n\t"
#else
"BNE.N L_curve25519_inv_8_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x0\n\t"
@ -3345,7 +3433,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert1_%=\n\t"
#else
"BNE.N L_fe_invert1_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t"
@ -3362,7 +3454,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert2_%=\n\t"
#else
"BNE.N L_fe_invert2_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3379,7 +3475,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert3_%=\n\t"
#else
"BNE.N L_fe_invert3_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3393,7 +3493,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert4_%=\n\t"
#else
"BNE.N L_fe_invert4_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t"
@ -3410,7 +3514,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert5_%=\n\t"
#else
"BNE.N L_fe_invert5_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3427,7 +3535,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert6_%=\n\t"
#else
"BNE.N L_fe_invert6_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t"
@ -3441,7 +3553,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert7_%=\n\t"
#else
"BNE.N L_fe_invert7_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t"
@ -3455,7 +3571,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert8_%=\n\t"
#else
"BNE.N L_fe_invert8_%=\n\t"
#endif
"MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t"
"LDR r0, [sp, #128]\n\t"
@ -3981,7 +4101,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_1_%=\n\t"
#else
"BNE.N L_fe_pow22523_1_%=\n\t"
#endif
"MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t"
"MOV r0, sp\n\t"
@ -3998,7 +4122,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_2_%=\n\t"
#else
"BNE.N L_fe_pow22523_2_%=\n\t"
#endif
"MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t"
"ADD r0, sp, #0x20\n\t"
@ -4015,7 +4143,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_3_%=\n\t"
#else
"BNE.N L_fe_pow22523_3_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t"
@ -4029,7 +4161,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_4_%=\n\t"
#else
"BNE.N L_fe_pow22523_4_%=\n\t"
#endif
"MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t"
"MOV r0, sp\n\t"
@ -4046,7 +4182,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_5_%=\n\t"
#else
"BNE.N L_fe_pow22523_5_%=\n\t"
#endif
"MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t"
"ADD r0, sp, #0x20\n\t"
@ -4063,7 +4203,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_6_%=\n\t"
#else
"BNE.N L_fe_pow22523_6_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t"
@ -4077,7 +4221,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_7_%=\n\t"
#else
"BNE.N L_fe_pow22523_7_%=\n\t"
#endif
"MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t"
"MOV r0, sp\n\t"
@ -4091,7 +4239,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t"
"POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_8_%=\n\t"
#else
"BNE.N L_fe_pow22523_8_%=\n\t"
#endif
"LDR r2, [sp, #100]\n\t"
"MOV r1, sp\n\t"
"LDR r0, [sp, #96]\n\t"
@ -5289,7 +5441,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"SUB sp, sp, #0x50\n\t"
"ADD lr, sp, #0x44\n\t"
"STM lr, {%[s], %[a], %[c]}\n\t"
"MOV %[r], #0x0\n\t"
"MOV %[s], #0x0\n\t"
"LDR r12, [%[a]]\n\t"
/* A[0] * B[0] */
"LDR lr, [%[b]]\n\t"
@ -5306,25 +5458,25 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"STR %[c], [sp]\n\t"
/* A[0] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"MOV r11, %[r]\n\t"
"MOV r11, %[s]\n\t"
"UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t"
/* A[0] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADCS r6, r6, #0x0\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t"
/* A[0] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADCS r8, r8, #0x0\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t"
/* A[0] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADCS r10, r10, #0x0\n\t"
"ADC %[c], %[r], #0x0\n\t"
"ADC %[c], %[s], #0x0\n\t"
"UMLAL r10, %[c], r12, lr\n\t"
/* A[1] * B[0] */
"LDR r12, [%[a], #4]\n\t"
@ -5335,37 +5487,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r5, r5, r11\n\t"
/* A[1] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t"
/* A[1] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t"
/* A[1] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t"
/* A[1] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t"
/* A[1] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t"
/* A[1] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t"
/* A[1] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r4, %[r], #0x0\n\t"
"ADC r4, %[s], #0x0\n\t"
"UMLAL %[c], r4, r12, lr\n\t"
/* A[2] * B[0] */
"LDR r12, [%[a], #8]\n\t"
@ -5376,37 +5528,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r6, r6, r11\n\t"
/* A[2] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t"
/* A[2] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t"
/* A[2] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t"
/* A[2] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t"
/* A[2] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t"
/* A[2] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t"
/* A[2] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r5, %[r], #0x0\n\t"
"ADC r5, %[s], #0x0\n\t"
"UMLAL r4, r5, r12, lr\n\t"
/* A[3] * B[0] */
"LDR r12, [%[a], #12]\n\t"
@ -5417,37 +5569,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r7, r7, r11\n\t"
/* A[3] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t"
/* A[3] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t"
/* A[3] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t"
/* A[3] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t"
/* A[3] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t"
/* A[3] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t"
/* A[3] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r6, %[r], #0x0\n\t"
"ADC r6, %[s], #0x0\n\t"
"UMLAL r5, r6, r12, lr\n\t"
/* A[4] * B[0] */
"LDR r12, [%[a], #16]\n\t"
@ -5458,37 +5610,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r8, r8, r11\n\t"
/* A[4] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t"
/* A[4] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t"
/* A[4] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t"
/* A[4] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t"
/* A[4] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t"
/* A[4] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t"
/* A[4] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r7, %[r], #0x0\n\t"
"ADC r7, %[s], #0x0\n\t"
"UMLAL r6, r7, r12, lr\n\t"
/* A[5] * B[0] */
"LDR r12, [%[a], #20]\n\t"
@ -5499,37 +5651,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r9, r9, r11\n\t"
/* A[5] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t"
/* A[5] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t"
/* A[5] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t"
/* A[5] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t"
/* A[5] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t"
/* A[5] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t"
/* A[5] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r8, %[r], #0x0\n\t"
"ADC r8, %[s], #0x0\n\t"
"UMLAL r7, r8, r12, lr\n\t"
/* A[6] * B[0] */
"LDR r12, [%[a], #24]\n\t"
@ -5540,37 +5692,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r10, r10, r11\n\t"
/* A[6] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t"
/* A[6] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t"
/* A[6] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t"
/* A[6] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t"
/* A[6] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t"
/* A[6] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t"
/* A[6] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r9, %[r], #0x0\n\t"
"ADC r9, %[s], #0x0\n\t"
"UMLAL r8, r9, r12, lr\n\t"
/* A[7] * B[0] */
"LDR r12, [%[a], #28]\n\t"
@ -5581,37 +5733,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS %[c], %[c], r11\n\t"
/* A[7] * B[1] */
"LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t"
/* A[7] * B[2] */
"LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t"
/* A[7] * B[3] */
"LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t"
/* A[7] * B[4] */
"LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t"
/* A[7] * B[5] */
"LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t"
/* A[7] * B[6] */
"LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t"
"ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t"
/* A[7] * B[7] */
"LDR lr, [%[b], #28]\n\t"
"ADC r10, %[r], #0x0\n\t"
"ADC r10, %[s], #0x0\n\t"
"UMLAL r9, r10, r12, lr\n\t"
"ADD lr, sp, #0x20\n\t"
"STM lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t"
@ -6505,4 +6657,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
#endif /* HAVE_CURVE25519 || HAVE_ED25519 */
#endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -925,7 +925,11 @@ L_SHA256_transform_len_start:
STR r9, [sp, #60]
ADD r3, r3, #0x40
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_start
#else
BNE.N L_SHA256_transform_len_start
#endif
# Round 0
LDR r5, [r0, #16]
LDR r6, [r0, #20]
@ -1466,7 +1470,11 @@ L_SHA256_transform_len_start:
SUBS r2, r2, #0x40
SUB r3, r3, #0xc0
ADD r1, r1, #0x40
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_begin
#else
BNE.N L_SHA256_transform_len_begin
#endif
ADD sp, sp, #0xc0
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 1874

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h>
@ -885,7 +897,11 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"STR r9, [sp, #60]\n\t"
"ADD r3, r3, #0x40\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA256_transform_len_start_%=\n\t"
#else
"BNE.N L_SHA256_transform_len_start_%=\n\t"
#endif
/* Round 0 */
"LDR r5, [%[sha256], #16]\n\t"
"LDR r6, [%[sha256], #20]\n\t"
@ -1426,7 +1442,11 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"SUBS %[len], %[len], #0x40\n\t"
"SUB r3, r3, #0xc0\n\t"
"ADD %[data], %[data], #0x40\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA256_transform_len_begin_%=\n\t"
#else
"BNE.N L_SHA256_transform_len_begin_%=\n\t"
#endif
"ADD sp, sp, #0xc0\n\t"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
:
@ -1438,4 +1458,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif /* !NO_SHA256 */
#endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -2319,7 +2319,11 @@ L_SHA512_transform_len_start:
STRD r4, r5, [sp, #120]
ADD r3, r3, #0x80
SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA512_transform_len_start
#else
BNE.N L_SHA512_transform_len_start
#endif
# Round 0
LDRD r4, r5, [r0, #32]
LSRS r6, r4, #14
@ -3652,7 +3656,11 @@ L_SHA512_transform_len_start:
SUBS r2, r2, #0x80
SUB r3, r3, #0x200
ADD r1, r1, #0x80
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA512_transform_len_begin
#else
BNE.N L_SHA512_transform_len_begin
#endif
EOR r0, r0, r0
ADD sp, sp, #0xc0
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifdef WOLFSSL_SHA512
#include <wolfssl/wolfcrypt/sha512.h>
@ -2207,7 +2219,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"STRD r4, r5, [sp, #120]\n\t"
"ADD r3, r3, #0x80\n\t"
"SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA512_transform_len_start_%=\n\t"
#else
"BNE.N L_SHA512_transform_len_start_%=\n\t"
#endif
/* Round 0 */
"LDRD r4, r5, [%[sha512], #32]\n\t"
"LSRS r6, r4, #14\n\t"
@ -3540,7 +3556,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"SUBS %[len], %[len], #0x80\n\t"
"SUB r3, r3, #0x200\n\t"
"ADD %[data], %[data], #0x80\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA512_transform_len_begin_%=\n\t"
#else
"BNE.N L_SHA512_transform_len_begin_%=\n\t"
#endif
"EOR r0, r0, r0\n\t"
"ADD sp, sp, #0xc0\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
@ -3553,4 +3573,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
#endif /* WOLFSSL_SHA512 */
#endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1245,7 +1245,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
: [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
: "cc" \
)
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH >= 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
/* Count leading zeros - instruction only available on ARMv7 and newer. */
#define SP_ASM_LZCNT(va, vn) \
__asm__ __volatile__ ( \
@ -1272,7 +1272,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
sp_int_digit d)
{
sp_int_digit r = 0;
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
static const char debruijn32[32] = {
0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
@ -1282,7 +1282,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
__asm__ __volatile__ (
/* Shift d so that top bit is set. */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, %[m]\n\t"
"mov r5, %[d]\n\t"
"orr r5, r5, r5, lsr #1\n\t"
@ -1291,8 +1291,8 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
"orr r5, r5, r5, lsr #8\n\t"
"orr r5, r5, r5, lsr #16\n\t"
"add r5, r5, #1\n\t"
"mul r5, r5, r4\n\t"
"lsr r5, r5, #27\n\t"
"mul r6, r5, r4\n\t"
"lsr r5, r6, #27\n\t"
"ldrb r5, [%[t], r5]\n\t"
#else
"clz r5, %[d]\n\t"
@ -1352,7 +1352,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
"sbc r8, r8, r8\n\t"
"sub %[r], %[r], r8\n\t"
: [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
: [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
#else
:

File diff suppressed because it is too large Load Diff

View File

@ -56741,52 +56741,6 @@ _sp_256_mont_sub_4:
#ifndef __APPLE__
.size sp_256_mont_sub_4,.-sp_256_mont_sub_4
#endif /* __APPLE__ */
/* Subtract two Montgomery form numbers (r = a - b % m).
*
* b is less than the modulus.
*
* r Result of subtration.
* a Number to subtract from in Montgomery form.
* b Number to subtract with in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_256_mont_sub_lower_4
.type sp_256_mont_sub_lower_4,@function
.align 16
sp_256_mont_sub_lower_4:
#else
.section __TEXT,__text
.globl _sp_256_mont_sub_lower_4
.p2align 4
_sp_256_mont_sub_lower_4:
#endif /* __APPLE__ */
movq (%rsi), %rax
movq 8(%rsi), %rcx
movq 16(%rsi), %r8
movq 24(%rsi), %r9
subq (%rdx), %rax
movq $0xffffffff, %r10
sbbq 8(%rdx), %rcx
movq $0xffffffff00000001, %r11
sbbq 16(%rdx), %r8
sbbq 24(%rdx), %r9
sbbq %rsi, %rsi
andq %rsi, %r10
andq %rsi, %r11
addq %rsi, %rax
adcq %r10, %rcx
movq %rax, (%rdi)
adcq $0x00, %r8
movq %rcx, 8(%rdi)
adcq %r11, %r9
movq %r8, 16(%rdi)
movq %r9, 24(%rdi)
repz retq
#ifndef __APPLE__
.size sp_256_mont_sub_lower_4,.-sp_256_mont_sub_lower_4
#endif /* __APPLE__ */
/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
*
* r Result of division by 2.
@ -56834,71 +56788,6 @@ _sp_256_div2_4:
#ifndef __APPLE__
.size sp_256_div2_4,.-sp_256_div2_4
#endif /* __APPLE__ */
/* Triple a Montgomery form number (r = a + a + a % m).
*
* a is less than m.
*
* r Result of Tripling.
* a Number to triple in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_256_mont_tpl_lower_4
.type sp_256_mont_tpl_lower_4,@function
.align 16
sp_256_mont_tpl_lower_4:
#else
.section __TEXT,__text
.globl _sp_256_mont_tpl_lower_4
.p2align 4
_sp_256_mont_tpl_lower_4:
#endif /* __APPLE__ */
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
addq %rdx, %rdx
movq $0xffffffff, %r9
adcq %rax, %rax
movq $0xffffffff00000001, %r10
adcq %rcx, %rcx
adcq %r8, %r8
sbbq %r11, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
addq (%rsi), %rdx
movq $0xffffffff, %r9
adcq 8(%rsi), %rax
movq $0xffffffff00000001, %r10
adcq 16(%rsi), %rcx
adcq 24(%rsi), %r8
sbbq %r11, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
adcq $0x00, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
movq %rdx, (%rdi)
sbbq $0x00, %rcx
movq %rax, 8(%rdi)
sbbq %r10, %r8
movq %rcx, 16(%rdi)
movq %r8, 24(%rdi)
repz retq
#ifndef __APPLE__
.size sp_256_mont_tpl_lower_4,.-sp_256_mont_tpl_lower_4
#endif /* __APPLE__ */
/* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
*
* r Result of subtration.
@ -61241,68 +61130,6 @@ _sp_384_mont_sub_6:
#ifndef __APPLE__
.size sp_384_mont_sub_6,.-sp_384_mont_sub_6
#endif /* __APPLE__ */
/* Subtract two Montgomery form numbers (r = a - b % m).
*
* b is less than the modulus.
*
* r Result of subtration.
* a Number to subtract from in Montgomery form.
* b Number to subtract with in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_384_mont_sub_lower_6
.type sp_384_mont_sub_lower_6,@function
.align 16
sp_384_mont_sub_lower_6:
#else
.section __TEXT,__text
.globl _sp_384_mont_sub_lower_6
.p2align 4
_sp_384_mont_sub_lower_6:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
movq (%rsi), %rax
movq 8(%rsi), %rcx
movq 16(%rsi), %r8
movq 24(%rsi), %r9
movq 32(%rsi), %r10
movq 40(%rsi), %r11
subq (%rdx), %rax
movq $0xffffffff, %r12
sbbq 8(%rdx), %rcx
movq $0xffffffff00000000, %r13
sbbq 16(%rdx), %r8
movq $0xfffffffffffffffe, %r14
sbbq 24(%rdx), %r9
sbbq 32(%rdx), %r10
sbbq 40(%rdx), %r11
sbbq %rsi, %rsi
andq %rsi, %r12
andq %rsi, %r13
andq %rsi, %r14
addq %r12, %rax
adcq %r13, %rcx
movq %rax, (%rdi)
adcq %r14, %r8
movq %rcx, 8(%rdi)
adcq %rsi, %r9
movq %r8, 16(%rdi)
adcq %rsi, %r10
movq %r9, 24(%rdi)
adcq %rsi, %r11
movq %r10, 32(%rdi)
movq %r11, 40(%rdi)
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sp_384_mont_sub_lower_6,.-sp_384_mont_sub_lower_6
#endif /* __APPLE__ */
/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
*
* r Result of division by 2.
@ -61380,158 +61207,6 @@ _sp_384_div2_6:
#ifndef __APPLE__
.size sp_384_div2_6,.-sp_384_div2_6
#endif /* __APPLE__ */
/* Double a Montgomery form number (r = a + a % m).
*
* a is less than m.
*
* r Result of doubling.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_384_mont_dbl_lower_6
.type sp_384_mont_dbl_lower_6,@function
.align 16
sp_384_mont_dbl_lower_6:
#else
.section __TEXT,__text
.globl _sp_384_mont_dbl_lower_6
.p2align 4
_sp_384_mont_dbl_lower_6:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
movq 32(%rsi), %r9
movq 40(%rsi), %r10
addq %rdx, %rdx
movq $0xffffffff, %r11
adcq %rax, %rax
movq $0xffffffff00000000, %r12
adcq %rcx, %rcx
movq $0xfffffffffffffffe, %r13
adcq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
sbbq %r14, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
movq %rdx, (%rdi)
sbbq %r13, %rcx
movq %rax, 8(%rdi)
sbbq %r14, %r8
movq %rcx, 16(%rdi)
sbbq %r14, %r9
movq %r8, 24(%rdi)
sbbq %r14, %r10
movq %r9, 32(%rdi)
movq %r10, 40(%rdi)
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sp_384_mont_dbl_lower_6,.-sp_384_mont_dbl_lower_6
#endif /* __APPLE__ */
/* Double a Montgomery form number (r = a + a % m).
*
* a is less than m.
*
* r Result of doubling.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_384_mont_tpl_lower_6
.type sp_384_mont_tpl_lower_6,@function
.align 16
sp_384_mont_tpl_lower_6:
#else
.section __TEXT,__text
.globl _sp_384_mont_tpl_lower_6
.p2align 4
_sp_384_mont_tpl_lower_6:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
movq 32(%rsi), %r9
movq 40(%rsi), %r10
addq %rdx, %rdx
movq $0xffffffff, %r11
adcq %rax, %rax
movq $0xffffffff00000000, %r12
adcq %rcx, %rcx
movq $0xfffffffffffffffe, %r13
adcq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
sbbq %r14, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
movq %rdx, (%rdi)
sbbq %r13, %rcx
sbbq %r14, %r8
sbbq %r14, %r9
sbbq %r14, %r10
addq (%rsi), %rdx
movq $0xffffffff, %r11
adcq 8(%rsi), %rax
movq $0xffffffff00000000, %r12
adcq 16(%rsi), %rcx
movq $0xfffffffffffffffe, %r13
adcq 24(%rsi), %r8
adcq 32(%rsi), %r9
adcq 40(%rsi), %r10
sbbq %r14, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
sbbq %r13, %rcx
sbbq %r14, %r8
sbbq %r14, %r9
sbbq %r14, %r10
adcq $0x00, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
movq %rdx, (%rdi)
sbbq %r13, %rcx
movq %rax, 8(%rdi)
sbbq %r14, %r8
movq %rcx, 16(%rdi)
sbbq %r14, %r9
movq %r8, 24(%rdi)
sbbq %r14, %r10
movq %r9, 32(%rdi)
movq %r10, 40(%rdi)
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sp_384_mont_tpl_lower_6,.-sp_384_mont_tpl_lower_6
#endif /* __APPLE__ */
#ifndef WC_NO_CACHE_RESISTANT
/* Touch each possible point that could be being copied.
*

View File

@ -55582,45 +55582,6 @@ sp_256_mont_sub_4 PROC
ret
sp_256_mont_sub_4 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * b is less than the modulus.
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_sub_lower_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
mov r12, 4294967295
sbb r9, QWORD PTR [r8+8]
mov r13, 18446744069414584321
sbb r10, QWORD PTR [r8+16]
sbb r11, QWORD PTR [r8+24]
sbb rdx, rdx
and r12, rdx
and r13, rdx
add rax, rdx
adc r9, r12
mov QWORD PTR [rcx], rax
adc r10, 0
mov QWORD PTR [rcx+8], r9
adc r11, r13
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
pop r13
pop r12
ret
sp_256_mont_sub_lower_4 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
@ -55661,64 +55622,6 @@ sp_256_div2_4 PROC
ret
sp_256_div2_4 ENDP
_text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * a is less than m.
; *
; * r Result of Tripling.
; * a Number to triple in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_tpl_lower_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
add rax, rax
mov r11, 4294967295
adc r8, r8
mov r12, 18446744069414584321
adc r9, r9
adc r10, r10
sbb r13, r13
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
add rax, QWORD PTR [rdx]
mov r11, 4294967295
adc r8, QWORD PTR [rdx+8]
mov r12, 18446744069414584321
adc r9, QWORD PTR [rdx+16]
adc r10, QWORD PTR [rdx+24]
sbb r13, r13
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
mov QWORD PTR [rcx], rax
sbb r9, 0
mov QWORD PTR [rcx+8], r8
sbb r10, r12
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_tpl_lower_4 ENDP
_text ENDS
; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
; *
; * r Result of subtration.
@ -59792,61 +59695,6 @@ sp_384_mont_sub_6 PROC
ret
sp_384_mont_sub_6 ENDP
_text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * b is less than the modulus.
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_sub_lower_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
sub rax, QWORD PTR [r8]
mov r14, 4294967295
sbb r9, QWORD PTR [r8+8]
mov r15, 18446744069414584320
sbb r10, QWORD PTR [r8+16]
mov rdi, 18446744073709551614
sbb r11, QWORD PTR [r8+24]
sbb r12, QWORD PTR [r8+32]
sbb r13, QWORD PTR [r8+40]
sbb rdx, rdx
and r14, rdx
and r15, rdx
and rdi, rdx
add rax, r14
adc r9, r15
mov QWORD PTR [rcx], rax
adc r10, rdi
mov QWORD PTR [rcx+8], r9
adc r11, rdx
mov QWORD PTR [rcx+16], r10
adc r12, rdx
mov QWORD PTR [rcx+24], r11
adc r13, rdx
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_sub_lower_6 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; *
; * r Result of division by 2.
@ -59917,144 +59765,6 @@ sp_384_div2_6 PROC
ret
sp_384_div2_6 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * a is less than m.
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_dbl_lower_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
add rax, rax
mov r13, 4294967295
adc r8, r8
mov r14, 18446744069414584320
adc r9, r9
mov r15, 18446744073709551614
adc r10, r10
adc r11, r11
adc r12, r12
sbb rdi, rdi
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
mov QWORD PTR [rcx+8], r8
sbb r10, rdi
mov QWORD PTR [rcx+16], r9
sbb r11, rdi
mov QWORD PTR [rcx+24], r10
sbb r12, rdi
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_dbl_lower_6 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * a is less than m.
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_tpl_lower_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
add rax, rax
mov r13, 4294967295
adc r8, r8
mov r14, 18446744069414584320
adc r9, r9
mov r15, 18446744073709551614
adc r10, r10
adc r11, r11
adc r12, r12
sbb rdi, rdi
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
add rax, QWORD PTR [rdx]
mov r13, 4294967295
adc r8, QWORD PTR [rdx+8]
mov r14, 18446744069414584320
adc r9, QWORD PTR [rdx+16]
mov r15, 18446744073709551614
adc r10, QWORD PTR [rdx+24]
adc r11, QWORD PTR [rdx+32]
adc r12, QWORD PTR [rdx+40]
sbb rdi, rdi
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
adc rdi, 0
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
mov QWORD PTR [rcx+8], r8
sbb r10, rdi
mov QWORD PTR [rcx+16], r9
sbb r11, rdi
mov QWORD PTR [rcx+24], r10
sbb r12, rdi
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_tpl_lower_6 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied.
; *