SP ASM: improve performance

Thumb2/ARM32: use umaal instruction to speed up mulitplication, squaring
and reduction. umaal not always available so use umlal instead.
Implementations for architectures less than 4 still kept - no umull.
Cleanup point arithmetic to not take shortcuts.
Make masking for constant time faster.
Add alternate asm for different compilers.
Get ARMv4 and ARMv6 compiling.
Remove whitespace at end of lines.
This commit is contained in:
Sean Parkinson
2023-09-11 22:21:16 +10:00
parent f9c2a86456
commit 114c8cc681
30 changed files with 88467 additions and 49343 deletions

View File

@ -2607,6 +2607,22 @@ then
ENABLED_ARMASM_NEON=no ENABLED_ARMASM_NEON=no
AC_MSG_NOTICE([32bit ARMv7-m found]) AC_MSG_NOTICE([32bit ARMv7-m found])
;; ;;
armv6*)
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv6 -fomit-frame-pointer -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=6"
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
ENABLED_ARMASM_CRYPTO=no
ENABLED_AESGCM_STREAM=no # not yet implemented
ENABLED_ARMASM_NEON=no
AC_MSG_NOTICE([32bit ARMv6 found])
;;
armv4*)
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv4 -fomit-frame-pointer -DWOLFSSL_ARMASM_NO_HW_CRYPTO -DWOLFSSL_ARM_ARCH=4"
AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN"
ENABLED_ARMASM_CRYPTO=no
ENABLED_AESGCM_STREAM=no # not yet implemented
ENABLED_ARMASM_NEON=no
AC_MSG_NOTICE([32bit ARMv4 found])
;;
*) *)
AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm" AM_CPPFLAGS="$AM_CPPFLAGS -mfpu=crypto-neon-fp-armv8 -marm"
# Include options.h # Include options.h
@ -7500,21 +7516,47 @@ if test "$ENABLED_SP_ASM" = "yes" && test "$ENABLED_SP" = "yes"; then
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM64_ASM" AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM64_ASM"
ENABLED_SP_ARM64_ASM=yes ENABLED_SP_ARM64_ASM=yes
;; ;;
*armv7a*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv7-a -mfpu=neon -DWOLFSSL_ARM_ARCH=7 -marm"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
;;
*cortex* | *armv7m*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv7-r -D__thumb__ -DWOLFSSL_ARM_ARCH=7"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM"
ENABLED_SP_ARM_CORTEX_ASM=yes
;;
*armv6*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv6 -DWOLFSSL_ARM_ARCH=6"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
;;
*armv4*)
if test "$ENABLED_ARMASM" = "no"; then
AM_CPPFLAGS="$AM_CPPFLAGS -march=armv4 -DWOLFSSL_ARM_ARCH=4"
fi
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
;;
*arm*) *arm*)
if test "$host_alias" = "thumb" || test "$ARM_TARGET" = "thumb"; then if test "$host_alias" = "thumb" || test "$ARM_TARGET" = "thumb"; then
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM" AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM" AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_THUMB_ASM"
ENABLED_SP_ARM_THUMB_ASM=yes ENABLED_SP_ARM_THUMB_ASM=yes
else else
if test "$host_alias" = "cortex" || test "$ARM_TARGET" = "cortex"; then AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM" AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM" ENABLED_SP_ARM32_ASM=yes
ENABLED_SP_ARM_CORTEX_ASM=yes
else
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM"
AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM32_ASM"
ENABLED_SP_ARM32_ASM=yes
fi
fi fi
;; ;;
*x86_64* | *amd64*) *x86_64* | *amd64*)

View File

@ -171,8 +171,10 @@ endif !BUILD_ARMASM_CRYPTO
else else
if BUILD_ARMASM if BUILD_ARMASM
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM endif BUILD_ARMASM
@ -203,8 +205,10 @@ else
if BUILD_ARMASM if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha256.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha256.c
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
else else
@ -229,8 +233,10 @@ else
if BUILD_ARMASM if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
else else
@ -326,8 +332,10 @@ else
if BUILD_ARMASM if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha256.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha256.c
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha256-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha256-asm.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
else else
@ -427,8 +435,10 @@ endif !BUILD_ARMASM_CRYPTO
else else
if BUILD_ARMASM if BUILD_ARMASM
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
endif BUILD_ARMASM endif BUILD_ARMASM
@ -472,8 +482,10 @@ else
if BUILD_ARMASM if BUILD_ARMASM
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha512-asm.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
else else
@ -713,9 +725,11 @@ src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE
else else
if BUILD_ARMASM_INLINE if BUILD_ARMASM_INLINE
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-curve25519_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-curve25519_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-curve25519_c.c
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519_c.c src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519_c.c
else else
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-curve25519.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-curve25519.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-curve25519.S
src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.S src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.S
endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_INLINE

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -112,49 +112,49 @@ Transform_Sha256_Len:
sub sp, sp, #0xc0 sub sp, sp, #0xc0
adr r3, L_SHA256_transform_len_k adr r3, L_SHA256_transform_len_k
# Copy digest to add in at end # Copy digest to add in at end
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0] ldr r4, [r0]
ldr r5, [r0, #4] ldr r5, [r0, #4]
#else #else
ldrd r4, r5, [r0] ldrd r4, r5, [r0]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #8] ldr r6, [r0, #8]
ldr r7, [r0, #12] ldr r7, [r0, #12]
#else #else
ldrd r6, r7, [r0, #8] ldrd r6, r7, [r0, #8]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [r0, #16] ldr r8, [r0, #16]
ldr r9, [r0, #20] ldr r9, [r0, #20]
#else #else
ldrd r8, r9, [r0, #16] ldrd r8, r9, [r0, #16]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r10, [r0, #24] ldr r10, [r0, #24]
ldr r11, [r0, #28] ldr r11, [r0, #28]
#else #else
ldrd r10, r11, [r0, #24] ldrd r10, r11, [r0, #24]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #64] str r4, [sp, #64]
str r5, [sp, #68] str r5, [sp, #68]
#else #else
strd r4, r5, [sp, #64] strd r4, r5, [sp, #64]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #72] str r6, [sp, #72]
str r7, [sp, #76] str r7, [sp, #76]
#else #else
strd r6, r7, [sp, #72] strd r6, r7, [sp, #72]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [sp, #80] str r8, [sp, #80]
str r9, [sp, #84] str r9, [sp, #84]
#else #else
strd r8, r9, [sp, #80] strd r8, r9, [sp, #80]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r10, [sp, #88] str r10, [sp, #88]
str r11, [sp, #92] str r11, [sp, #92]
#else #else
@ -163,6 +163,136 @@ Transform_Sha256_Len:
# Start of loop processing a block # Start of loop processing a block
L_SHA256_transform_len_begin: L_SHA256_transform_len_begin:
# Load, Reverse and Store W - 64 bytes # Load, Reverse and Store W - 64 bytes
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp]
str r5, [sp, #4]
#else
strd r4, r5, [sp]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #8]
str r7, [sp, #12]
#else
strd r6, r7, [sp, #8]
#endif
ldr r4, [r1, #16]
ldr r5, [r1, #20]
ldr r6, [r1, #24]
ldr r7, [r1, #28]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #16]
str r5, [sp, #20]
#else
strd r4, r5, [sp, #16]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #24]
str r7, [sp, #28]
#else
strd r6, r7, [sp, #24]
#endif
ldr r4, [r1, #32]
ldr r5, [r1, #36]
ldr r6, [r1, #40]
ldr r7, [r1, #44]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #32]
str r5, [sp, #36]
#else
strd r4, r5, [sp, #32]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #40]
str r7, [sp, #44]
#else
strd r6, r7, [sp, #40]
#endif
ldr r4, [r1, #48]
ldr r5, [r1, #52]
ldr r6, [r1, #56]
ldr r7, [r1, #60]
eor r8, r4, r4, ror #16
eor r9, r5, r5, ror #16
eor r10, r6, r6, ror #16
eor r11, r7, r7, ror #16
bic r8, r8, #0xff0000
bic r9, r9, #0xff0000
bic r10, r10, #0xff0000
bic r11, r11, #0xff0000
ror r4, r4, #8
ror r5, r5, #8
ror r6, r6, #8
ror r7, r7, #8
eor r4, r4, r8, lsr #8
eor r5, r5, r9, lsr #8
eor r6, r6, r10, lsr #8
eor r7, r7, r11, lsr #8
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #48]
str r5, [sp, #52]
#else
strd r4, r5, [sp, #48]
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #56]
str r7, [sp, #60]
#else
strd r6, r7, [sp, #56]
#endif
#else
ldr r4, [r1] ldr r4, [r1]
ldr r5, [r1, #4] ldr r5, [r1, #4]
ldr r6, [r1, #8] ldr r6, [r1, #8]
@ -179,25 +309,25 @@ L_SHA256_transform_len_begin:
rev r9, r9 rev r9, r9
rev r10, r10 rev r10, r10
rev r11, r11 rev r11, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp] str r4, [sp]
str r5, [sp, #4] str r5, [sp, #4]
#else #else
strd r4, r5, [sp] strd r4, r5, [sp]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #8] str r6, [sp, #8]
str r7, [sp, #12] str r7, [sp, #12]
#else #else
strd r6, r7, [sp, #8] strd r6, r7, [sp, #8]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [sp, #16] str r8, [sp, #16]
str r9, [sp, #20] str r9, [sp, #20]
#else #else
strd r8, r9, [sp, #16] strd r8, r9, [sp, #16]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r10, [sp, #24] str r10, [sp, #24]
str r11, [sp, #28] str r11, [sp, #28]
#else #else
@ -219,30 +349,31 @@ L_SHA256_transform_len_begin:
rev r9, r9 rev r9, r9
rev r10, r10 rev r10, r10
rev r11, r11 rev r11, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #32] str r4, [sp, #32]
str r5, [sp, #36] str r5, [sp, #36]
#else #else
strd r4, r5, [sp, #32] strd r4, r5, [sp, #32]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #40] str r6, [sp, #40]
str r7, [sp, #44] str r7, [sp, #44]
#else #else
strd r6, r7, [sp, #40] strd r6, r7, [sp, #40]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [sp, #48] str r8, [sp, #48]
str r9, [sp, #52] str r9, [sp, #52]
#else #else
strd r8, r9, [sp, #48] strd r8, r9, [sp, #48]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r10, [sp, #56] str r10, [sp, #56]
str r11, [sp, #60] str r11, [sp, #60]
#else #else
strd r10, r11, [sp, #56] strd r10, r11, [sp, #56]
#endif #endif
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
ldr r11, [r0, #4] ldr r11, [r0, #4]
ldr r4, [r0, #8] ldr r4, [r0, #8]
eor r11, r11, r4 eor r11, r11, r4
@ -1517,25 +1648,25 @@ L_SHA256_transform_len_start:
str r8, [r0, #16] str r8, [r0, #16]
str r9, [r0] str r9, [r0]
# Add in digest from start # Add in digest from start
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0] ldr r4, [r0]
ldr r5, [r0, #4] ldr r5, [r0, #4]
#else #else
ldrd r4, r5, [r0] ldrd r4, r5, [r0]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #8] ldr r6, [r0, #8]
ldr r7, [r0, #12] ldr r7, [r0, #12]
#else #else
ldrd r6, r7, [r0, #8] ldrd r6, r7, [r0, #8]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [sp, #64] ldr r8, [sp, #64]
ldr r9, [sp, #68] ldr r9, [sp, #68]
#else #else
ldrd r8, r9, [sp, #64] ldrd r8, r9, [sp, #64]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r10, [sp, #72] ldr r10, [sp, #72]
ldr r11, [sp, #76] ldr r11, [sp, #76]
#else #else
@ -1545,49 +1676,49 @@ L_SHA256_transform_len_start:
add r5, r5, r9 add r5, r5, r9
add r6, r6, r10 add r6, r6, r10
add r7, r7, r11 add r7, r7, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [r0] str r4, [r0]
str r5, [r0, #4] str r5, [r0, #4]
#else #else
strd r4, r5, [r0] strd r4, r5, [r0]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [r0, #8] str r6, [r0, #8]
str r7, [r0, #12] str r7, [r0, #12]
#else #else
strd r6, r7, [r0, #8] strd r6, r7, [r0, #8]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #64] str r4, [sp, #64]
str r5, [sp, #68] str r5, [sp, #68]
#else #else
strd r4, r5, [sp, #64] strd r4, r5, [sp, #64]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #72] str r6, [sp, #72]
str r7, [sp, #76] str r7, [sp, #76]
#else #else
strd r6, r7, [sp, #72] strd r6, r7, [sp, #72]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0, #16] ldr r4, [r0, #16]
ldr r5, [r0, #20] ldr r5, [r0, #20]
#else #else
ldrd r4, r5, [r0, #16] ldrd r4, r5, [r0, #16]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #24] ldr r6, [r0, #24]
ldr r7, [r0, #28] ldr r7, [r0, #28]
#else #else
ldrd r6, r7, [r0, #24] ldrd r6, r7, [r0, #24]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [sp, #80] ldr r8, [sp, #80]
ldr r9, [sp, #84] ldr r9, [sp, #84]
#else #else
ldrd r8, r9, [sp, #80] ldrd r8, r9, [sp, #80]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r10, [sp, #88] ldr r10, [sp, #88]
ldr r11, [sp, #92] ldr r11, [sp, #92]
#else #else
@ -1597,25 +1728,25 @@ L_SHA256_transform_len_start:
add r5, r5, r9 add r5, r5, r9
add r6, r6, r10 add r6, r6, r10
add r7, r7, r11 add r7, r7, r11
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [r0, #16] str r4, [r0, #16]
str r5, [r0, #20] str r5, [r0, #20]
#else #else
strd r4, r5, [r0, #16] strd r4, r5, [r0, #16]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [r0, #24] str r6, [r0, #24]
str r7, [r0, #28] str r7, [r0, #28]
#else #else
strd r6, r7, [r0, #24] strd r6, r7, [r0, #24]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [sp, #80] str r4, [sp, #80]
str r5, [sp, #84] str r5, [sp, #84]
#else #else
strd r4, r5, [sp, #80] strd r4, r5, [sp, #80]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [sp, #88] str r6, [sp, #88]
str r7, [sp, #92] str r7, [sp, #92]
#else #else
@ -1708,7 +1839,7 @@ Transform_Sha256_Len:
push {r4, r5, r6, r7, r8, r9, r10, lr} push {r4, r5, r6, r7, r8, r9, r10, lr}
vpush {d8-d11} vpush {d8-d11}
sub sp, sp, #24 sub sp, sp, #24
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r0, [sp] str r0, [sp]
str r1, [sp, #4] str r1, [sp, #4]
#else #else
@ -1717,25 +1848,25 @@ Transform_Sha256_Len:
str r2, [sp, #8] str r2, [sp, #8]
adr r12, L_SHA256_transform_neon_len_k adr r12, L_SHA256_transform_neon_len_k
# Load digest into registers # Load digest into registers
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r2, [r0] ldr r2, [r0]
ldr r3, [r0, #4] ldr r3, [r0, #4]
#else #else
ldrd r2, r3, [r0] ldrd r2, r3, [r0]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r4, [r0, #8] ldr r4, [r0, #8]
ldr r5, [r0, #12] ldr r5, [r0, #12]
#else #else
ldrd r4, r5, [r0, #8] ldrd r4, r5, [r0, #8]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r6, [r0, #16] ldr r6, [r0, #16]
ldr r7, [r0, #20] ldr r7, [r0, #20]
#else #else
ldrd r6, r7, [r0, #16] ldrd r6, r7, [r0, #16]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r8, [r0, #24] ldr r8, [r0, #24]
ldr r9, [r0, #28] ldr r9, [r0, #28]
#else #else
@ -2666,7 +2797,7 @@ L_SHA256_transform_neon_len_start:
add r2, r2, r1 add r2, r2, r1
ldr r10, [sp] ldr r10, [sp]
# Add in digest from start # Add in digest from start
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10] ldr r0, [r10]
ldr r1, [r10, #4] ldr r1, [r10, #4]
#else #else
@ -2674,13 +2805,13 @@ L_SHA256_transform_neon_len_start:
#endif #endif
add r2, r2, r0 add r2, r2, r0
add r3, r3, r1 add r3, r3, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r2, [r10] str r2, [r10]
str r3, [r10, #4] str r3, [r10, #4]
#else #else
strd r2, r3, [r10] strd r2, r3, [r10]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10, #8] ldr r0, [r10, #8]
ldr r1, [r10, #12] ldr r1, [r10, #12]
#else #else
@ -2688,13 +2819,13 @@ L_SHA256_transform_neon_len_start:
#endif #endif
add r4, r4, r0 add r4, r4, r0
add r5, r5, r1 add r5, r5, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r4, [r10, #8] str r4, [r10, #8]
str r5, [r10, #12] str r5, [r10, #12]
#else #else
strd r4, r5, [r10, #8] strd r4, r5, [r10, #8]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10, #16] ldr r0, [r10, #16]
ldr r1, [r10, #20] ldr r1, [r10, #20]
#else #else
@ -2702,13 +2833,13 @@ L_SHA256_transform_neon_len_start:
#endif #endif
add r6, r6, r0 add r6, r6, r0
add r7, r7, r1 add r7, r7, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r6, [r10, #16] str r6, [r10, #16]
str r7, [r10, #20] str r7, [r10, #20]
#else #else
strd r6, r7, [r10, #16] strd r6, r7, [r10, #16]
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
ldr r0, [r10, #24] ldr r0, [r10, #24]
ldr r1, [r10, #28] ldr r1, [r10, #28]
#else #else
@ -2716,7 +2847,7 @@ L_SHA256_transform_neon_len_start:
#endif #endif
add r8, r8, r0 add r8, r8, r0
add r9, r9, r1 add r9, r9, r1
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
str r8, [r10, #24] str r8, [r10, #24]
str r9, [r10, #28] str r9, [r10, #28]
#else #else

View File

@ -39,6 +39,18 @@
#include <wolfssl/wolfcrypt/settings.h> #include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h> #include <wolfssl/wolfcrypt/error-crypt.h>
#ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifndef NO_SHA256 #ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h> #include <wolfssl/wolfcrypt/sha256.h>
@ -73,49 +85,49 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
__asm__ __volatile__ ( __asm__ __volatile__ (
"sub sp, sp, #0xc0\n\t" "sub sp, sp, #0xc0\n\t"
/* Copy digest to add in at end */ /* Copy digest to add in at end */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256]]\n\t" "ldr r4, [%[sha256]]\n\t"
"ldr r5, [%[sha256], #4]\n\t" "ldr r5, [%[sha256], #4]\n\t"
#else #else
"ldrd r4, r5, [%[sha256]]\n\t" "ldrd r4, r5, [%[sha256]]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #8]\n\t" "ldr r6, [%[sha256], #8]\n\t"
"ldr r7, [%[sha256], #12]\n\t" "ldr r7, [%[sha256], #12]\n\t"
#else #else
"ldrd r6, r7, [%[sha256], #8]\n\t" "ldrd r6, r7, [%[sha256], #8]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [%[sha256], #16]\n\t" "ldr r8, [%[sha256], #16]\n\t"
"ldr r9, [%[sha256], #20]\n\t" "ldr r9, [%[sha256], #20]\n\t"
#else #else
"ldrd r8, r9, [%[sha256], #16]\n\t" "ldrd r8, r9, [%[sha256], #16]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r10, [%[sha256], #24]\n\t" "ldr r10, [%[sha256], #24]\n\t"
"ldr r11, [%[sha256], #28]\n\t" "ldr r11, [%[sha256], #28]\n\t"
#else #else
"ldrd r10, r11, [%[sha256], #24]\n\t" "ldrd r10, r11, [%[sha256], #24]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #64]\n\t" "str r4, [sp, #64]\n\t"
"str r5, [sp, #68]\n\t" "str r5, [sp, #68]\n\t"
#else #else
"strd r4, r5, [sp, #64]\n\t" "strd r4, r5, [sp, #64]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #72]\n\t" "str r6, [sp, #72]\n\t"
"str r7, [sp, #76]\n\t" "str r7, [sp, #76]\n\t"
#else #else
"strd r6, r7, [sp, #72]\n\t" "strd r6, r7, [sp, #72]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [sp, #80]\n\t" "str r8, [sp, #80]\n\t"
"str r9, [sp, #84]\n\t" "str r9, [sp, #84]\n\t"
#else #else
"strd r8, r9, [sp, #80]\n\t" "strd r8, r9, [sp, #80]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r10, [sp, #88]\n\t" "str r10, [sp, #88]\n\t"
"str r11, [sp, #92]\n\t" "str r11, [sp, #92]\n\t"
#else #else
@ -125,6 +137,136 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"\n" "\n"
"L_SHA256_transform_len_begin_%=: \n\t" "L_SHA256_transform_len_begin_%=: \n\t"
/* Load, Reverse and Store W - 64 bytes */ /* Load, Reverse and Store W - 64 bytes */
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
"ldr r4, [%[data]]\n\t"
"ldr r5, [%[data], #4]\n\t"
"ldr r6, [%[data], #8]\n\t"
"ldr r7, [%[data], #12]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp]\n\t"
"str r5, [sp, #4]\n\t"
#else
"strd r4, r5, [sp]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #8]\n\t"
"str r7, [sp, #12]\n\t"
#else
"strd r6, r7, [sp, #8]\n\t"
#endif
"ldr r4, [%[data], #16]\n\t"
"ldr r5, [%[data], #20]\n\t"
"ldr r6, [%[data], #24]\n\t"
"ldr r7, [%[data], #28]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #16]\n\t"
"str r5, [sp, #20]\n\t"
#else
"strd r4, r5, [sp, #16]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #24]\n\t"
"str r7, [sp, #28]\n\t"
#else
"strd r6, r7, [sp, #24]\n\t"
#endif
"ldr r4, [%[data], #32]\n\t"
"ldr r5, [%[data], #36]\n\t"
"ldr r6, [%[data], #40]\n\t"
"ldr r7, [%[data], #44]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #32]\n\t"
"str r5, [sp, #36]\n\t"
#else
"strd r4, r5, [sp, #32]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #40]\n\t"
"str r7, [sp, #44]\n\t"
#else
"strd r6, r7, [sp, #40]\n\t"
#endif
"ldr r4, [%[data], #48]\n\t"
"ldr r5, [%[data], #52]\n\t"
"ldr r6, [%[data], #56]\n\t"
"ldr r7, [%[data], #60]\n\t"
"eor r8, r4, r4, ror #16\n\t"
"eor r9, r5, r5, ror #16\n\t"
"eor r10, r6, r6, ror #16\n\t"
"eor r11, r7, r7, ror #16\n\t"
"bic r8, r8, #0xff0000\n\t"
"bic r9, r9, #0xff0000\n\t"
"bic r10, r10, #0xff0000\n\t"
"bic r11, r11, #0xff0000\n\t"
"ror r4, r4, #8\n\t"
"ror r5, r5, #8\n\t"
"ror r6, r6, #8\n\t"
"ror r7, r7, #8\n\t"
"eor r4, r4, r8, lsr #8\n\t"
"eor r5, r5, r9, lsr #8\n\t"
"eor r6, r6, r10, lsr #8\n\t"
"eor r7, r7, r11, lsr #8\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #48]\n\t"
"str r5, [sp, #52]\n\t"
#else
"strd r4, r5, [sp, #48]\n\t"
#endif
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #56]\n\t"
"str r7, [sp, #60]\n\t"
#else
"strd r6, r7, [sp, #56]\n\t"
#endif
#else
"ldr r4, [%[data]]\n\t" "ldr r4, [%[data]]\n\t"
"ldr r5, [%[data], #4]\n\t" "ldr r5, [%[data], #4]\n\t"
"ldr r6, [%[data], #8]\n\t" "ldr r6, [%[data], #8]\n\t"
@ -141,25 +283,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r9, r9\n\t" "rev r9, r9\n\t"
"rev r10, r10\n\t" "rev r10, r10\n\t"
"rev r11, r11\n\t" "rev r11, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp]\n\t" "str r4, [sp]\n\t"
"str r5, [sp, #4]\n\t" "str r5, [sp, #4]\n\t"
#else #else
"strd r4, r5, [sp]\n\t" "strd r4, r5, [sp]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #8]\n\t" "str r6, [sp, #8]\n\t"
"str r7, [sp, #12]\n\t" "str r7, [sp, #12]\n\t"
#else #else
"strd r6, r7, [sp, #8]\n\t" "strd r6, r7, [sp, #8]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [sp, #16]\n\t" "str r8, [sp, #16]\n\t"
"str r9, [sp, #20]\n\t" "str r9, [sp, #20]\n\t"
#else #else
"strd r8, r9, [sp, #16]\n\t" "strd r8, r9, [sp, #16]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r10, [sp, #24]\n\t" "str r10, [sp, #24]\n\t"
"str r11, [sp, #28]\n\t" "str r11, [sp, #28]\n\t"
#else #else
@ -181,30 +323,31 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"rev r9, r9\n\t" "rev r9, r9\n\t"
"rev r10, r10\n\t" "rev r10, r10\n\t"
"rev r11, r11\n\t" "rev r11, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #32]\n\t" "str r4, [sp, #32]\n\t"
"str r5, [sp, #36]\n\t" "str r5, [sp, #36]\n\t"
#else #else
"strd r4, r5, [sp, #32]\n\t" "strd r4, r5, [sp, #32]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #40]\n\t" "str r6, [sp, #40]\n\t"
"str r7, [sp, #44]\n\t" "str r7, [sp, #44]\n\t"
#else #else
"strd r6, r7, [sp, #40]\n\t" "strd r6, r7, [sp, #40]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [sp, #48]\n\t" "str r8, [sp, #48]\n\t"
"str r9, [sp, #52]\n\t" "str r9, [sp, #52]\n\t"
#else #else
"strd r8, r9, [sp, #48]\n\t" "strd r8, r9, [sp, #48]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r10, [sp, #56]\n\t" "str r10, [sp, #56]\n\t"
"str r11, [sp, #60]\n\t" "str r11, [sp, #60]\n\t"
#else #else
"strd r10, r11, [sp, #56]\n\t" "strd r10, r11, [sp, #56]\n\t"
#endif #endif
#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH < 6 */
"ldr r11, [%[sha256], #4]\n\t" "ldr r11, [%[sha256], #4]\n\t"
"ldr r4, [%[sha256], #8]\n\t" "ldr r4, [%[sha256], #8]\n\t"
"eor r11, r11, r4\n\t" "eor r11, r11, r4\n\t"
@ -1480,25 +1623,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str r8, [%[sha256], #16]\n\t" "str r8, [%[sha256], #16]\n\t"
"str r9, [%[sha256]]\n\t" "str r9, [%[sha256]]\n\t"
/* Add in digest from start */ /* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256]]\n\t" "ldr r4, [%[sha256]]\n\t"
"ldr r5, [%[sha256], #4]\n\t" "ldr r5, [%[sha256], #4]\n\t"
#else #else
"ldrd r4, r5, [%[sha256]]\n\t" "ldrd r4, r5, [%[sha256]]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #8]\n\t" "ldr r6, [%[sha256], #8]\n\t"
"ldr r7, [%[sha256], #12]\n\t" "ldr r7, [%[sha256], #12]\n\t"
#else #else
"ldrd r6, r7, [%[sha256], #8]\n\t" "ldrd r6, r7, [%[sha256], #8]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [sp, #64]\n\t" "ldr r8, [sp, #64]\n\t"
"ldr r9, [sp, #68]\n\t" "ldr r9, [sp, #68]\n\t"
#else #else
"ldrd r8, r9, [sp, #64]\n\t" "ldrd r8, r9, [sp, #64]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r10, [sp, #72]\n\t" "ldr r10, [sp, #72]\n\t"
"ldr r11, [sp, #76]\n\t" "ldr r11, [sp, #76]\n\t"
#else #else
@ -1508,49 +1651,49 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add r5, r5, r9\n\t" "add r5, r5, r9\n\t"
"add r6, r6, r10\n\t" "add r6, r6, r10\n\t"
"add r7, r7, r11\n\t" "add r7, r7, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [%[sha256]]\n\t" "str r4, [%[sha256]]\n\t"
"str r5, [%[sha256], #4]\n\t" "str r5, [%[sha256], #4]\n\t"
#else #else
"strd r4, r5, [%[sha256]]\n\t" "strd r4, r5, [%[sha256]]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [%[sha256], #8]\n\t" "str r6, [%[sha256], #8]\n\t"
"str r7, [%[sha256], #12]\n\t" "str r7, [%[sha256], #12]\n\t"
#else #else
"strd r6, r7, [%[sha256], #8]\n\t" "strd r6, r7, [%[sha256], #8]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #64]\n\t" "str r4, [sp, #64]\n\t"
"str r5, [sp, #68]\n\t" "str r5, [sp, #68]\n\t"
#else #else
"strd r4, r5, [sp, #64]\n\t" "strd r4, r5, [sp, #64]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #72]\n\t" "str r6, [sp, #72]\n\t"
"str r7, [sp, #76]\n\t" "str r7, [sp, #76]\n\t"
#else #else
"strd r6, r7, [sp, #72]\n\t" "strd r6, r7, [sp, #72]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256], #16]\n\t" "ldr r4, [%[sha256], #16]\n\t"
"ldr r5, [%[sha256], #20]\n\t" "ldr r5, [%[sha256], #20]\n\t"
#else #else
"ldrd r4, r5, [%[sha256], #16]\n\t" "ldrd r4, r5, [%[sha256], #16]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #24]\n\t" "ldr r6, [%[sha256], #24]\n\t"
"ldr r7, [%[sha256], #28]\n\t" "ldr r7, [%[sha256], #28]\n\t"
#else #else
"ldrd r6, r7, [%[sha256], #24]\n\t" "ldrd r6, r7, [%[sha256], #24]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [sp, #80]\n\t" "ldr r8, [sp, #80]\n\t"
"ldr r9, [sp, #84]\n\t" "ldr r9, [sp, #84]\n\t"
#else #else
"ldrd r8, r9, [sp, #80]\n\t" "ldrd r8, r9, [sp, #80]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r10, [sp, #88]\n\t" "ldr r10, [sp, #88]\n\t"
"ldr r11, [sp, #92]\n\t" "ldr r11, [sp, #92]\n\t"
#else #else
@ -1560,25 +1703,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add r5, r5, r9\n\t" "add r5, r5, r9\n\t"
"add r6, r6, r10\n\t" "add r6, r6, r10\n\t"
"add r7, r7, r11\n\t" "add r7, r7, r11\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [%[sha256], #16]\n\t" "str r4, [%[sha256], #16]\n\t"
"str r5, [%[sha256], #20]\n\t" "str r5, [%[sha256], #20]\n\t"
#else #else
"strd r4, r5, [%[sha256], #16]\n\t" "strd r4, r5, [%[sha256], #16]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [%[sha256], #24]\n\t" "str r6, [%[sha256], #24]\n\t"
"str r7, [%[sha256], #28]\n\t" "str r7, [%[sha256], #28]\n\t"
#else #else
"strd r6, r7, [%[sha256], #24]\n\t" "strd r6, r7, [%[sha256], #24]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [sp, #80]\n\t" "str r4, [sp, #80]\n\t"
"str r5, [sp, #84]\n\t" "str r5, [sp, #84]\n\t"
#else #else
"strd r4, r5, [sp, #80]\n\t" "strd r4, r5, [sp, #80]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [sp, #88]\n\t" "str r6, [sp, #88]\n\t"
"str r7, [sp, #92]\n\t" "str r7, [sp, #92]\n\t"
#else #else
@ -1628,7 +1771,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
__asm__ __volatile__ ( __asm__ __volatile__ (
"sub sp, sp, #24\n\t" "sub sp, sp, #24\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str %[sha256], [sp]\n\t" "str %[sha256], [sp]\n\t"
"str %[data], [sp, #4]\n\t" "str %[data], [sp, #4]\n\t"
#else #else
@ -1637,25 +1780,25 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"str %[len], [sp, #8]\n\t" "str %[len], [sp, #8]\n\t"
"mov r12, %[L_SHA256_transform_neon_len_k]\n\t" "mov r12, %[L_SHA256_transform_neon_len_k]\n\t"
/* Load digest into registers */ /* Load digest into registers */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[len], [%[sha256]]\n\t" "ldr %[len], [%[sha256]]\n\t"
"ldr r3, [%[sha256], #4]\n\t" "ldr r3, [%[sha256], #4]\n\t"
#else #else
"ldrd %[len], r3, [%[sha256]]\n\t" "ldrd %[len], r3, [%[sha256]]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, [%[sha256], #8]\n\t" "ldr r4, [%[sha256], #8]\n\t"
"ldr r5, [%[sha256], #12]\n\t" "ldr r5, [%[sha256], #12]\n\t"
#else #else
"ldrd r4, r5, [%[sha256], #8]\n\t" "ldrd r4, r5, [%[sha256], #8]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r6, [%[sha256], #16]\n\t" "ldr r6, [%[sha256], #16]\n\t"
"ldr r7, [%[sha256], #20]\n\t" "ldr r7, [%[sha256], #20]\n\t"
#else #else
"ldrd r6, r7, [%[sha256], #16]\n\t" "ldrd r6, r7, [%[sha256], #16]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r8, [%[sha256], #24]\n\t" "ldr r8, [%[sha256], #24]\n\t"
"ldr r9, [%[sha256], #28]\n\t" "ldr r9, [%[sha256], #28]\n\t"
#else #else
@ -2588,7 +2731,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"add %[len], %[len], %[data]\n\t" "add %[len], %[len], %[data]\n\t"
"ldr r10, [sp]\n\t" "ldr r10, [sp]\n\t"
/* Add in digest from start */ /* Add in digest from start */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10]\n\t" "ldr %[sha256], [r10]\n\t"
"ldr %[data], [r10, #4]\n\t" "ldr %[data], [r10, #4]\n\t"
#else #else
@ -2596,13 +2739,13 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif #endif
"add %[len], %[len], %[sha256]\n\t" "add %[len], %[len], %[sha256]\n\t"
"add r3, r3, %[data]\n\t" "add r3, r3, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str %[len], [r10]\n\t" "str %[len], [r10]\n\t"
"str r3, [r10, #4]\n\t" "str r3, [r10, #4]\n\t"
#else #else
"strd %[len], r3, [r10]\n\t" "strd %[len], r3, [r10]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10, #8]\n\t" "ldr %[sha256], [r10, #8]\n\t"
"ldr %[data], [r10, #12]\n\t" "ldr %[data], [r10, #12]\n\t"
#else #else
@ -2610,13 +2753,13 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif #endif
"add r4, r4, %[sha256]\n\t" "add r4, r4, %[sha256]\n\t"
"add r5, r5, %[data]\n\t" "add r5, r5, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r4, [r10, #8]\n\t" "str r4, [r10, #8]\n\t"
"str r5, [r10, #12]\n\t" "str r5, [r10, #12]\n\t"
#else #else
"strd r4, r5, [r10, #8]\n\t" "strd r4, r5, [r10, #8]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10, #16]\n\t" "ldr %[sha256], [r10, #16]\n\t"
"ldr %[data], [r10, #20]\n\t" "ldr %[data], [r10, #20]\n\t"
#else #else
@ -2624,13 +2767,13 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif #endif
"add r6, r6, %[sha256]\n\t" "add r6, r6, %[sha256]\n\t"
"add r7, r7, %[data]\n\t" "add r7, r7, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r6, [r10, #16]\n\t" "str r6, [r10, #16]\n\t"
"str r7, [r10, #20]\n\t" "str r7, [r10, #20]\n\t"
#else #else
"strd r6, r7, [r10, #16]\n\t" "strd r6, r7, [r10, #16]\n\t"
#endif #endif
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr %[sha256], [r10, #24]\n\t" "ldr %[sha256], [r10, #24]\n\t"
"ldr %[data], [r10, #28]\n\t" "ldr %[data], [r10, #28]\n\t"
#else #else
@ -2638,7 +2781,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif #endif
"add r8, r8, %[sha256]\n\t" "add r8, r8, %[sha256]\n\t"
"add r9, r9, %[data]\n\t" "add r9, r9, %[data]\n\t"
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"str r8, [r10, #24]\n\t" "str r8, [r10, #24]\n\t"
"str r9, [r10, #28]\n\t" "str r9, [r10, #28]\n\t"
#else #else
@ -2661,4 +2804,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif /* !NO_SHA256 */ #endif /* !NO_SHA256 */
#endif /* !__aarch64__ && !__thumb__ */ #endif /* !__aarch64__ && !__thumb__ */
#endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */ #endif /* WOLFSSL_ARMASM_INLINE */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -593,9 +593,9 @@ _fe_mul:
adds x7, x7, x3 adds x7, x7, x3
umulh x4, x15, x19 umulh x4, x15, x19
adcs x8, x8, x4 adcs x8, x8, x4
adc x9, x9, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x11, x15, x22 umulh x11, x15, x22
adc x9, x9, xzr
mul x10, x15, x22 mul x10, x15, x22
# A[0] * B[1] # A[0] * B[1]
mul x3, x14, x20 mul x3, x14, x20
@ -1842,9 +1842,9 @@ L_curve25519_bits:
adds x20, x20, x3 adds x20, x20, x3
umulh x4, x15, x6 umulh x4, x15, x6
adcs x21, x21, x4 adcs x21, x21, x4
adc x22, x22, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x26, x15, x9 umulh x26, x15, x9
adc x22, x22, xzr
mul x25, x15, x9 mul x25, x15, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x14, x7 mul x3, x14, x7
@ -1958,9 +1958,9 @@ L_curve25519_bits:
adds x20, x20, x3 adds x20, x20, x3
umulh x4, x11, x25 umulh x4, x11, x25
adcs x21, x21, x4 adcs x21, x21, x4
adc x22, x22, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x15, x11, x28 umulh x15, x11, x28
adc x22, x22, xzr
mul x14, x11, x28 mul x14, x11, x28
# A[0] * B[1] # A[0] * B[1]
mul x3, x10, x26 mul x3, x10, x26
@ -2229,9 +2229,9 @@ L_curve25519_bits:
adds x7, x7, x3 adds x7, x7, x3
umulh x4, x15, x10 umulh x4, x15, x10
adcs x8, x8, x4 adcs x8, x8, x4
adc x9, x9, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x26, x15, x13 umulh x26, x15, x13
adc x9, x9, xzr
mul x25, x15, x13 mul x25, x15, x13
# A[0] * B[1] # A[0] * B[1]
mul x3, x14, x11 mul x3, x14, x11
@ -2400,9 +2400,9 @@ L_curve25519_bits:
adds x7, x7, x3 adds x7, x7, x3
umulh x4, x15, x10 umulh x4, x15, x10
adcs x8, x8, x4 adcs x8, x8, x4
adc x9, x9, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x26, x15, x13 umulh x26, x15, x13
adc x9, x9, xzr
mul x25, x15, x13 mul x25, x15, x13
# A[0] * B[1] # A[0] * B[1]
mul x3, x14, x11 mul x3, x14, x11
@ -2708,9 +2708,9 @@ L_curve25519_bits:
adds x11, x11, x3 adds x11, x11, x3
umulh x4, x20, x14 umulh x4, x20, x14
adcs x12, x12, x4 adcs x12, x12, x4
adc x13, x13, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x26, x20, x17 umulh x26, x20, x17
adc x13, x13, xzr
mul x25, x20, x17 mul x25, x20, x17
# A[0] * B[1] # A[0] * B[1]
mul x3, x19, x15 mul x3, x19, x15
@ -3679,9 +3679,9 @@ L_curve25519_inv_8:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x7, x10 umulh x4, x7, x10
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x7, x13 umulh x20, x7, x13
adc x17, x17, xzr
mul x19, x7, x13 mul x19, x7, x13
# A[0] * B[1] # A[0] * B[1]
mul x3, x6, x11 mul x3, x6, x11
@ -4664,9 +4664,9 @@ _ge_p1p1_to_p2:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x11, x6 umulh x4, x11, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x11, x9 umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9 mul x19, x11, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x10, x7 mul x3, x10, x7
@ -4782,9 +4782,9 @@ _ge_p1p1_to_p2:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x11, x6 umulh x4, x11, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x11, x9 umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9 mul x19, x11, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x10, x7 mul x3, x10, x7
@ -4900,9 +4900,9 @@ _ge_p1p1_to_p2:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x11, x6 umulh x4, x11, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x11, x9 umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9 mul x19, x11, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x10, x7 mul x3, x10, x7
@ -5051,9 +5051,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x11, x6 umulh x4, x11, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x11, x9 umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9 mul x19, x11, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x10, x7 mul x3, x10, x7
@ -5169,9 +5169,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x24, x6 umulh x4, x24, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x24, x9 umulh x20, x24, x9
adc x17, x17, xzr
mul x19, x24, x9 mul x19, x24, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x23, x7 mul x3, x23, x7
@ -5287,9 +5287,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x24, x6 umulh x4, x24, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x24, x9 umulh x20, x24, x9
adc x17, x17, xzr
mul x19, x24, x9 mul x19, x24, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x23, x7 mul x3, x23, x7
@ -5403,9 +5403,9 @@ _ge_p1p1_to_p3:
adds x15, x15, x3 adds x15, x15, x3
umulh x4, x11, x6 umulh x4, x11, x6
adcs x16, x16, x4 adcs x16, x16, x4
adc x17, x17, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x20, x11, x9 umulh x20, x11, x9
adc x17, x17, xzr
mul x19, x11, x9 mul x19, x11, x9
# A[0] * B[1] # A[0] * B[1]
mul x3, x10, x7 mul x3, x10, x7
@ -6075,9 +6075,9 @@ _ge_madd:
adds x22, x22, x25 adds x22, x22, x25
umulh x26, x17, x8 umulh x26, x17, x8
adcs x23, x23, x26 adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x5, x17, x11 umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11 mul x4, x17, x11
# A[0] * B[1] # A[0] * B[1]
mul x25, x16, x9 mul x25, x16, x9
@ -6191,9 +6191,9 @@ _ge_madd:
adds x5, x5, x25 adds x5, x5, x25
umulh x26, x13, x16 umulh x26, x13, x16
adcs x6, x6, x26 adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x13, x20 umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20 mul x8, x13, x20
# A[0] * B[1] # A[0] * B[1]
mul x25, x12, x17 mul x25, x12, x17
@ -6348,9 +6348,9 @@ _ge_madd:
adds x17, x17, x25 adds x17, x17, x25
umulh x26, x22, x4 umulh x26, x22, x4
adcs x19, x19, x26 adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x22, x7 umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7 mul x8, x22, x7
# A[0] * B[1] # A[0] * B[1]
mul x25, x21, x5 mul x25, x21, x5
@ -6593,9 +6593,9 @@ _ge_msub:
adds x22, x22, x25 adds x22, x22, x25
umulh x26, x17, x8 umulh x26, x17, x8
adcs x23, x23, x26 adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x5, x17, x11 umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11 mul x4, x17, x11
# A[0] * B[1] # A[0] * B[1]
mul x25, x16, x9 mul x25, x16, x9
@ -6709,9 +6709,9 @@ _ge_msub:
adds x5, x5, x25 adds x5, x5, x25
umulh x26, x13, x16 umulh x26, x13, x16
adcs x6, x6, x26 adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x13, x20 umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20 mul x8, x13, x20
# A[0] * B[1] # A[0] * B[1]
mul x25, x12, x17 mul x25, x12, x17
@ -6866,9 +6866,9 @@ _ge_msub:
adds x17, x17, x25 adds x17, x17, x25
umulh x26, x22, x4 umulh x26, x22, x4
adcs x19, x19, x26 adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x22, x7 umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7 mul x8, x22, x7
# A[0] * B[1] # A[0] * B[1]
mul x25, x21, x5 mul x25, x21, x5
@ -7109,9 +7109,9 @@ _ge_add:
adds x22, x22, x25 adds x22, x22, x25
umulh x26, x17, x8 umulh x26, x17, x8
adcs x23, x23, x26 adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x5, x17, x11 umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11 mul x4, x17, x11
# A[0] * B[1] # A[0] * B[1]
mul x25, x16, x9 mul x25, x16, x9
@ -7228,9 +7228,9 @@ _ge_add:
adds x5, x5, x25 adds x5, x5, x25
umulh x26, x13, x16 umulh x26, x13, x16
adcs x6, x6, x26 adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x13, x20 umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20 mul x8, x13, x20
# A[0] * B[1] # A[0] * B[1]
mul x25, x12, x17 mul x25, x12, x17
@ -7388,9 +7388,9 @@ _ge_add:
adds x17, x17, x25 adds x17, x17, x25
umulh x26, x22, x4 umulh x26, x22, x4
adcs x19, x19, x26 adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x22, x7 umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7 mul x8, x22, x7
# A[0] * B[1] # A[0] * B[1]
mul x25, x21, x5 mul x25, x21, x5
@ -7509,9 +7509,9 @@ _ge_add:
adds x9, x9, x25 adds x9, x9, x25
umulh x26, x5, x12 umulh x26, x5, x12
adcs x10, x10, x26 adcs x10, x10, x26
adc x11, x11, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x17, x5, x15 umulh x17, x5, x15
adc x11, x11, xzr
mul x16, x5, x15 mul x16, x5, x15
# A[0] * B[1] # A[0] * B[1]
mul x25, x4, x13 mul x25, x4, x13
@ -7753,9 +7753,9 @@ _ge_sub:
adds x22, x22, x25 adds x22, x22, x25
umulh x26, x17, x8 umulh x26, x17, x8
adcs x23, x23, x26 adcs x23, x23, x26
adc x24, x24, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x5, x17, x11 umulh x5, x17, x11
adc x24, x24, xzr
mul x4, x17, x11 mul x4, x17, x11
# A[0] * B[1] # A[0] * B[1]
mul x25, x16, x9 mul x25, x16, x9
@ -7880,9 +7880,9 @@ _ge_sub:
adds x5, x5, x25 adds x5, x5, x25
umulh x26, x13, x16 umulh x26, x13, x16
adcs x6, x6, x26 adcs x6, x6, x26
adc x7, x7, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x13, x20 umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20 mul x8, x13, x20
# A[0] * B[1] # A[0] * B[1]
mul x25, x12, x17 mul x25, x12, x17
@ -8040,9 +8040,9 @@ _ge_sub:
adds x17, x17, x25 adds x17, x17, x25
umulh x26, x22, x4 umulh x26, x22, x4
adcs x19, x19, x26 adcs x19, x19, x26
adc x20, x20, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x22, x7 umulh x9, x22, x7
adc x20, x20, xzr
mul x8, x22, x7 mul x8, x22, x7
# A[0] * B[1] # A[0] * B[1]
mul x25, x21, x5 mul x25, x21, x5
@ -8169,9 +8169,9 @@ _ge_sub:
adds x9, x9, x25 adds x9, x9, x25
umulh x26, x5, x12 umulh x26, x5, x12
adcs x10, x10, x26 adcs x10, x10, x26
adc x11, x11, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x17, x5, x15 umulh x17, x5, x15
adc x11, x11, xzr
mul x16, x5, x15 mul x16, x5, x15
# A[0] * B[1] # A[0] * B[1]
mul x25, x4, x13 mul x25, x4, x13
@ -8570,9 +8570,9 @@ _sc_muladd:
adds x5, x5, x21 adds x5, x5, x21
umulh x22, x13, x16 umulh x22, x13, x16
adcs x6, x6, x22 adcs x6, x6, x22
adc x7, x7, xzr
# A[1] * B[3] # A[1] * B[3]
umulh x9, x13, x20 umulh x9, x13, x20
adc x7, x7, xzr
mul x8, x13, x20 mul x8, x13, x20
# A[0] * B[1] # A[0] * B[1]
mul x21, x12, x17 mul x21, x12, x17

View File

@ -490,9 +490,9 @@ void fe_mul(fe r, const fe a, const fe b)
"adds x7, x7, x3\n\t" "adds x7, x7, x3\n\t"
"umulh x4, x15, x19\n\t" "umulh x4, x15, x19\n\t"
"adcs x8, x8, x4\n\t" "adcs x8, x8, x4\n\t"
"adc x9, x9, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x11, x15, x22\n\t" "umulh x11, x15, x22\n\t"
"adc x9, x9, xzr\n\t"
"mul x10, x15, x22\n\t" "mul x10, x15, x22\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x14, x20\n\t" "mul x3, x14, x20\n\t"
@ -1711,9 +1711,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x20, x20, x3\n\t" "adds x20, x20, x3\n\t"
"umulh x4, x15, x6\n\t" "umulh x4, x15, x6\n\t"
"adcs x21, x21, x4\n\t" "adcs x21, x21, x4\n\t"
"adc x22, x22, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x26, x15, x9\n\t" "umulh x26, x15, x9\n\t"
"adc x22, x22, xzr\n\t"
"mul x25, x15, x9\n\t" "mul x25, x15, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x14, x7\n\t" "mul x3, x14, x7\n\t"
@ -1827,9 +1827,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x20, x20, x3\n\t" "adds x20, x20, x3\n\t"
"umulh x4, x11, x25\n\t" "umulh x4, x11, x25\n\t"
"adcs x21, x21, x4\n\t" "adcs x21, x21, x4\n\t"
"adc x22, x22, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x15, x11, x28\n\t" "umulh x15, x11, x28\n\t"
"adc x22, x22, xzr\n\t"
"mul x14, x11, x28\n\t" "mul x14, x11, x28\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x10, x26\n\t" "mul x3, x10, x26\n\t"
@ -2098,9 +2098,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x7, x7, x3\n\t" "adds x7, x7, x3\n\t"
"umulh x4, x15, x10\n\t" "umulh x4, x15, x10\n\t"
"adcs x8, x8, x4\n\t" "adcs x8, x8, x4\n\t"
"adc x9, x9, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x26, x15, x13\n\t" "umulh x26, x15, x13\n\t"
"adc x9, x9, xzr\n\t"
"mul x25, x15, x13\n\t" "mul x25, x15, x13\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x14, x11\n\t" "mul x3, x14, x11\n\t"
@ -2269,9 +2269,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x7, x7, x3\n\t" "adds x7, x7, x3\n\t"
"umulh x4, x15, x10\n\t" "umulh x4, x15, x10\n\t"
"adcs x8, x8, x4\n\t" "adcs x8, x8, x4\n\t"
"adc x9, x9, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x26, x15, x13\n\t" "umulh x26, x15, x13\n\t"
"adc x9, x9, xzr\n\t"
"mul x25, x15, x13\n\t" "mul x25, x15, x13\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x14, x11\n\t" "mul x3, x14, x11\n\t"
@ -2577,9 +2577,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x11, x11, x3\n\t" "adds x11, x11, x3\n\t"
"umulh x4, x20, x14\n\t" "umulh x4, x20, x14\n\t"
"adcs x12, x12, x4\n\t" "adcs x12, x12, x4\n\t"
"adc x13, x13, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x26, x20, x17\n\t" "umulh x26, x20, x17\n\t"
"adc x13, x13, xzr\n\t"
"mul x25, x20, x17\n\t" "mul x25, x20, x17\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x19, x15\n\t" "mul x3, x19, x15\n\t"
@ -3556,9 +3556,9 @@ int curve25519(byte* r, const byte* n, const byte* a)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x7, x10\n\t" "umulh x4, x7, x10\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x7, x13\n\t" "umulh x20, x7, x13\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x7, x13\n\t" "mul x19, x7, x13\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x6, x11\n\t" "mul x3, x6, x11\n\t"
@ -4520,9 +4520,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t" "umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x11, x9\n\t" "umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t" "mul x19, x11, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x10, x7\n\t" "mul x3, x10, x7\n\t"
@ -4638,9 +4638,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t" "umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x11, x9\n\t" "umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t" "mul x19, x11, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x10, x7\n\t" "mul x3, x10, x7\n\t"
@ -4756,9 +4756,9 @@ void ge_p1p1_to_p2(ge_p2* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t" "umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x11, x9\n\t" "umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t" "mul x19, x11, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x10, x7\n\t" "mul x3, x10, x7\n\t"
@ -4890,9 +4890,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t" "umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x11, x9\n\t" "umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t" "mul x19, x11, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x10, x7\n\t" "mul x3, x10, x7\n\t"
@ -5008,9 +5008,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x24, x6\n\t" "umulh x4, x24, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x24, x9\n\t" "umulh x20, x24, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x24, x9\n\t" "mul x19, x24, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x23, x7\n\t" "mul x3, x23, x7\n\t"
@ -5126,9 +5126,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x24, x6\n\t" "umulh x4, x24, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x24, x9\n\t" "umulh x20, x24, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x24, x9\n\t" "mul x19, x24, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x23, x7\n\t" "mul x3, x23, x7\n\t"
@ -5242,9 +5242,9 @@ void ge_p1p1_to_p3(ge_p3* r, const ge_p1p1* p)
"adds x15, x15, x3\n\t" "adds x15, x15, x3\n\t"
"umulh x4, x11, x6\n\t" "umulh x4, x11, x6\n\t"
"adcs x16, x16, x4\n\t" "adcs x16, x16, x4\n\t"
"adc x17, x17, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x20, x11, x9\n\t" "umulh x20, x11, x9\n\t"
"adc x17, x17, xzr\n\t"
"mul x19, x11, x9\n\t" "mul x19, x11, x9\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x3, x10, x7\n\t" "mul x3, x10, x7\n\t"
@ -5873,9 +5873,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x22, x22, x25\n\t" "adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t" "umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t" "adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x5, x17, x11\n\t" "umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t" "mul x4, x17, x11\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x16, x9\n\t" "mul x25, x16, x9\n\t"
@ -5989,9 +5989,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x5, x5, x25\n\t" "adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t" "umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t" "adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x13, x20\n\t" "umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t" "mul x8, x13, x20\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x12, x17\n\t" "mul x25, x12, x17\n\t"
@ -6146,9 +6146,9 @@ void ge_madd(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x17, x17, x25\n\t" "adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t" "umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t" "adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x22, x7\n\t" "umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t" "mul x8, x22, x7\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x21, x5\n\t" "mul x25, x21, x5\n\t"
@ -6370,9 +6370,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x22, x22, x25\n\t" "adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t" "umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t" "adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x5, x17, x11\n\t" "umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t" "mul x4, x17, x11\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x16, x9\n\t" "mul x25, x16, x9\n\t"
@ -6486,9 +6486,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x5, x5, x25\n\t" "adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t" "umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t" "adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x13, x20\n\t" "umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t" "mul x8, x13, x20\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x12, x17\n\t" "mul x25, x12, x17\n\t"
@ -6643,9 +6643,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q)
"adds x17, x17, x25\n\t" "adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t" "umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t" "adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x22, x7\n\t" "umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t" "mul x8, x22, x7\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x21, x5\n\t" "mul x25, x21, x5\n\t"
@ -6865,9 +6865,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x22, x22, x25\n\t" "adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t" "umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t" "adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x5, x17, x11\n\t" "umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t" "mul x4, x17, x11\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x16, x9\n\t" "mul x25, x16, x9\n\t"
@ -6984,9 +6984,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x5, x5, x25\n\t" "adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t" "umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t" "adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x13, x20\n\t" "umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t" "mul x8, x13, x20\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x12, x17\n\t" "mul x25, x12, x17\n\t"
@ -7144,9 +7144,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x17, x17, x25\n\t" "adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t" "umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t" "adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x22, x7\n\t" "umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t" "mul x8, x22, x7\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x21, x5\n\t" "mul x25, x21, x5\n\t"
@ -7265,9 +7265,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x9, x9, x25\n\t" "adds x9, x9, x25\n\t"
"umulh x26, x5, x12\n\t" "umulh x26, x5, x12\n\t"
"adcs x10, x10, x26\n\t" "adcs x10, x10, x26\n\t"
"adc x11, x11, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x17, x5, x15\n\t" "umulh x17, x5, x15\n\t"
"adc x11, x11, xzr\n\t"
"mul x16, x5, x15\n\t" "mul x16, x5, x15\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x4, x13\n\t" "mul x25, x4, x13\n\t"
@ -7488,9 +7488,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x22, x22, x25\n\t" "adds x22, x22, x25\n\t"
"umulh x26, x17, x8\n\t" "umulh x26, x17, x8\n\t"
"adcs x23, x23, x26\n\t" "adcs x23, x23, x26\n\t"
"adc x24, x24, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x5, x17, x11\n\t" "umulh x5, x17, x11\n\t"
"adc x24, x24, xzr\n\t"
"mul x4, x17, x11\n\t" "mul x4, x17, x11\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x16, x9\n\t" "mul x25, x16, x9\n\t"
@ -7615,9 +7615,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x5, x5, x25\n\t" "adds x5, x5, x25\n\t"
"umulh x26, x13, x16\n\t" "umulh x26, x13, x16\n\t"
"adcs x6, x6, x26\n\t" "adcs x6, x6, x26\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x13, x20\n\t" "umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t" "mul x8, x13, x20\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x12, x17\n\t" "mul x25, x12, x17\n\t"
@ -7775,9 +7775,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x17, x17, x25\n\t" "adds x17, x17, x25\n\t"
"umulh x26, x22, x4\n\t" "umulh x26, x22, x4\n\t"
"adcs x19, x19, x26\n\t" "adcs x19, x19, x26\n\t"
"adc x20, x20, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x22, x7\n\t" "umulh x9, x22, x7\n\t"
"adc x20, x20, xzr\n\t"
"mul x8, x22, x7\n\t" "mul x8, x22, x7\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x21, x5\n\t" "mul x25, x21, x5\n\t"
@ -7904,9 +7904,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q)
"adds x9, x9, x25\n\t" "adds x9, x9, x25\n\t"
"umulh x26, x5, x12\n\t" "umulh x26, x5, x12\n\t"
"adcs x10, x10, x26\n\t" "adcs x10, x10, x26\n\t"
"adc x11, x11, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x17, x5, x15\n\t" "umulh x17, x5, x15\n\t"
"adc x11, x11, xzr\n\t"
"mul x16, x5, x15\n\t" "mul x16, x5, x15\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x25, x4, x13\n\t" "mul x25, x4, x13\n\t"
@ -8265,9 +8265,9 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c)
"adds x5, x5, x21\n\t" "adds x5, x5, x21\n\t"
"umulh x22, x13, x16\n\t" "umulh x22, x13, x16\n\t"
"adcs x6, x6, x22\n\t" "adcs x6, x6, x22\n\t"
"adc x7, x7, xzr\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"umulh x9, x13, x20\n\t" "umulh x9, x13, x20\n\t"
"adc x7, x7, xzr\n\t"
"mul x8, x13, x20\n\t" "mul x8, x13, x20\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"mul x21, x12, x17\n\t" "mul x21, x12, x17\n\t"

View File

@ -595,7 +595,11 @@ L_AES_invert_key_loop:
STM r0!, {r6, r7, r8, r9} STM r0!, {r6, r7, r8, r9}
SUBS r11, r11, #0x2 SUBS r11, r11, #0x2
SUB r10, r10, #0x10 SUB r10, r10, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_invert_key_loop BNE L_AES_invert_key_loop
#else
BNE.N L_AES_invert_key_loop
#endif
SUB r0, r0, r1, LSL #3 SUB r0, r0, r1, LSL #3
ADD r0, r0, #0x10 ADD r0, r0, #0x10
SUB r11, r1, #0x1 SUB r11, r1, #0x1
@ -666,7 +670,11 @@ L_AES_invert_key_mix_loop:
EOR r8, r8, r9, ROR #24 EOR r8, r8, r9, ROR #24
STR r8, [r0], #4 STR r8, [r0], #4
SUBS r11, r11, #0x1 SUBS r11, r11, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_invert_key_mix_loop BNE L_AES_invert_key_mix_loop
#else
BNE.N L_AES_invert_key_mix_loop
#endif
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 165 # Cycle Count = 165
.size AES_invert_key,.-AES_invert_key .size AES_invert_key,.-AES_invert_key
@ -695,9 +703,17 @@ AES_set_encrypt_key:
LDR r8, L_AES_Thumb2_te LDR r8, L_AES_Thumb2_te
ADR lr, L_AES_Thumb2_rcon ADR lr, L_AES_Thumb2_rcon
CMP r1, #0x80 CMP r1, #0x80
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_set_encrypt_key_start_128 BEQ L_AES_set_encrypt_key_start_128
#else
BEQ.N L_AES_set_encrypt_key_start_128
#endif
CMP r1, #0xc0 CMP r1, #0xc0
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_set_encrypt_key_start_192 BEQ L_AES_set_encrypt_key_start_192
#else
BEQ.N L_AES_set_encrypt_key_start_192
#endif
LDRD r4, r5, [r0] LDRD r4, r5, [r0]
LDRD r6, r7, [r0, #8] LDRD r6, r7, [r0, #8]
REV r4, r4 REV r4, r4
@ -757,7 +773,11 @@ L_AES_set_encrypt_key_loop_256:
STM r2, {r4, r5, r6, r7} STM r2, {r4, r5, r6, r7}
SUB r2, r2, #0x10 SUB r2, r2, #0x10
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_set_encrypt_key_loop_256 BNE L_AES_set_encrypt_key_loop_256
#else
BNE.N L_AES_set_encrypt_key_loop_256
#endif
UBFX r4, r7, #0, #8 UBFX r4, r7, #0, #8
UBFX r5, r7, #8, #8 UBFX r5, r7, #8, #8
UBFX r6, r7, #16, #8 UBFX r6, r7, #16, #8
@ -817,7 +837,11 @@ L_AES_set_encrypt_key_loop_192:
EOR r7, r7, r6 EOR r7, r7, r6
STM r2, {r0, r1, r4, r5, r6, r7} STM r2, {r0, r1, r4, r5, r6, r7}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_set_encrypt_key_loop_192 BNE L_AES_set_encrypt_key_loop_192
#else
BNE.N L_AES_set_encrypt_key_loop_192
#endif
UBFX r0, r7, #0, #8 UBFX r0, r7, #0, #8
UBFX r1, r7, #8, #8 UBFX r1, r7, #8, #8
UBFX r4, r7, #16, #8 UBFX r4, r7, #16, #8
@ -868,7 +892,11 @@ L_AES_set_encrypt_key_loop_128:
EOR r7, r7, r6 EOR r7, r7, r6
STM r2, {r4, r5, r6, r7} STM r2, {r4, r5, r6, r7}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_set_encrypt_key_loop_128 BNE L_AES_set_encrypt_key_loop_128
#else
BNE.N L_AES_set_encrypt_key_loop_128
#endif
L_AES_set_encrypt_key_end: L_AES_set_encrypt_key_end:
POP {r4, r5, r6, r7, r8, pc} POP {r4, r5, r6, r7, r8, pc}
# Cycle Count = 327 # Cycle Count = 327
@ -981,7 +1009,11 @@ L_AES_encrypt_block_nr:
EOR r6, r6, r10 EOR r6, r6, r10
EOR r7, r7, r11 EOR r7, r7, r11
SUBS r1, r1, #0x1 SUBS r1, r1, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_encrypt_block_nr BNE L_AES_encrypt_block_nr
#else
BNE.N L_AES_encrypt_block_nr
#endif
UBFX r8, r5, #16, #8 UBFX r8, r5, #16, #8
LSR r11, r4, #24 LSR r11, r4, #24
UBFX lr, r6, #8, #8 UBFX lr, r6, #8, #8
@ -1105,9 +1137,17 @@ AES_ECB_encrypt:
LDR r12, [sp, #36] LDR r12, [sp, #36]
PUSH {r3} PUSH {r3}
CMP r12, #0xa CMP r12, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_encrypt_start_block_128 BEQ L_AES_ECB_encrypt_start_block_128
#else
BEQ.N L_AES_ECB_encrypt_start_block_128
#endif
CMP r12, #0xc CMP r12, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_encrypt_start_block_192 BEQ L_AES_ECB_encrypt_start_block_192
#else
BEQ.N L_AES_ECB_encrypt_start_block_192
#endif
L_AES_ECB_encrypt_loop_block_256: L_AES_ECB_encrypt_loop_block_256:
LDR r4, [lr] LDR r4, [lr]
LDR r5, [lr, #4] LDR r5, [lr, #4]
@ -1139,7 +1179,11 @@ L_AES_ECB_encrypt_loop_block_256:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_encrypt_loop_block_256 BNE L_AES_ECB_encrypt_loop_block_256
#else
BNE.N L_AES_ECB_encrypt_loop_block_256
#endif
B L_AES_ECB_encrypt_end B L_AES_ECB_encrypt_end
L_AES_ECB_encrypt_start_block_192: L_AES_ECB_encrypt_start_block_192:
L_AES_ECB_encrypt_loop_block_192: L_AES_ECB_encrypt_loop_block_192:
@ -1173,7 +1217,11 @@ L_AES_ECB_encrypt_loop_block_192:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_encrypt_loop_block_192 BNE L_AES_ECB_encrypt_loop_block_192
#else
BNE.N L_AES_ECB_encrypt_loop_block_192
#endif
B L_AES_ECB_encrypt_end B L_AES_ECB_encrypt_end
L_AES_ECB_encrypt_start_block_128: L_AES_ECB_encrypt_start_block_128:
L_AES_ECB_encrypt_loop_block_128: L_AES_ECB_encrypt_loop_block_128:
@ -1207,7 +1255,11 @@ L_AES_ECB_encrypt_loop_block_128:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_encrypt_loop_block_128 BNE L_AES_ECB_encrypt_loop_block_128
#else
BNE.N L_AES_ECB_encrypt_loop_block_128
#endif
L_AES_ECB_encrypt_end: L_AES_ECB_encrypt_end:
POP {r3} POP {r3}
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
@ -1228,9 +1280,17 @@ AES_CBC_encrypt:
LDM r9, {r4, r5, r6, r7} LDM r9, {r4, r5, r6, r7}
PUSH {r3, r9} PUSH {r3, r9}
CMP r8, #0xa CMP r8, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_encrypt_start_block_128 BEQ L_AES_CBC_encrypt_start_block_128
#else
BEQ.N L_AES_CBC_encrypt_start_block_128
#endif
CMP r8, #0xc CMP r8, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_encrypt_start_block_192 BEQ L_AES_CBC_encrypt_start_block_192
#else
BEQ.N L_AES_CBC_encrypt_start_block_192
#endif
L_AES_CBC_encrypt_loop_block_256: L_AES_CBC_encrypt_loop_block_256:
LDR r8, [lr] LDR r8, [lr]
LDR r9, [lr, #4] LDR r9, [lr, #4]
@ -1266,7 +1326,11 @@ L_AES_CBC_encrypt_loop_block_256:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_encrypt_loop_block_256 BNE L_AES_CBC_encrypt_loop_block_256
#else
BNE.N L_AES_CBC_encrypt_loop_block_256
#endif
B L_AES_CBC_encrypt_end B L_AES_CBC_encrypt_end
L_AES_CBC_encrypt_start_block_192: L_AES_CBC_encrypt_start_block_192:
L_AES_CBC_encrypt_loop_block_192: L_AES_CBC_encrypt_loop_block_192:
@ -1304,7 +1368,11 @@ L_AES_CBC_encrypt_loop_block_192:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_encrypt_loop_block_192 BNE L_AES_CBC_encrypt_loop_block_192
#else
BNE.N L_AES_CBC_encrypt_loop_block_192
#endif
B L_AES_CBC_encrypt_end B L_AES_CBC_encrypt_end
L_AES_CBC_encrypt_start_block_128: L_AES_CBC_encrypt_start_block_128:
L_AES_CBC_encrypt_loop_block_128: L_AES_CBC_encrypt_loop_block_128:
@ -1342,7 +1410,11 @@ L_AES_CBC_encrypt_loop_block_128:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_encrypt_loop_block_128 BNE L_AES_CBC_encrypt_loop_block_128
#else
BNE.N L_AES_CBC_encrypt_loop_block_128
#endif
L_AES_CBC_encrypt_end: L_AES_CBC_encrypt_end:
POP {r3, r9} POP {r3, r9}
STM r9, {r4, r5, r6, r7} STM r9, {r4, r5, r6, r7}
@ -1369,9 +1441,17 @@ AES_CTR_encrypt:
STM r8, {r4, r5, r6, r7} STM r8, {r4, r5, r6, r7}
PUSH {r3, r8} PUSH {r3, r8}
CMP r12, #0xa CMP r12, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CTR_encrypt_start_block_128 BEQ L_AES_CTR_encrypt_start_block_128
#else
BEQ.N L_AES_CTR_encrypt_start_block_128
#endif
CMP r12, #0xc CMP r12, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CTR_encrypt_start_block_192 BEQ L_AES_CTR_encrypt_start_block_192
#else
BEQ.N L_AES_CTR_encrypt_start_block_192
#endif
L_AES_CTR_encrypt_loop_block_256: L_AES_CTR_encrypt_loop_block_256:
PUSH {r1, r2, lr} PUSH {r1, r2, lr}
LDR lr, [sp, #16] LDR lr, [sp, #16]
@ -1411,7 +1491,11 @@ L_AES_CTR_encrypt_loop_block_256:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CTR_encrypt_loop_block_256 BNE L_AES_CTR_encrypt_loop_block_256
#else
BNE.N L_AES_CTR_encrypt_loop_block_256
#endif
B L_AES_CTR_encrypt_end B L_AES_CTR_encrypt_end
L_AES_CTR_encrypt_start_block_192: L_AES_CTR_encrypt_start_block_192:
L_AES_CTR_encrypt_loop_block_192: L_AES_CTR_encrypt_loop_block_192:
@ -1453,7 +1537,11 @@ L_AES_CTR_encrypt_loop_block_192:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CTR_encrypt_loop_block_192 BNE L_AES_CTR_encrypt_loop_block_192
#else
BNE.N L_AES_CTR_encrypt_loop_block_192
#endif
B L_AES_CTR_encrypt_end B L_AES_CTR_encrypt_end
L_AES_CTR_encrypt_start_block_128: L_AES_CTR_encrypt_start_block_128:
L_AES_CTR_encrypt_loop_block_128: L_AES_CTR_encrypt_loop_block_128:
@ -1495,7 +1583,11 @@ L_AES_CTR_encrypt_loop_block_128:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CTR_encrypt_loop_block_128 BNE L_AES_CTR_encrypt_loop_block_128
#else
BNE.N L_AES_CTR_encrypt_loop_block_128
#endif
L_AES_CTR_encrypt_end: L_AES_CTR_encrypt_end:
POP {r3, r8} POP {r3, r8}
REV r4, r4 REV r4, r4
@ -1617,7 +1709,11 @@ L_AES_decrypt_block_nr:
EOR r6, r6, r10 EOR r6, r6, r10
EOR r7, r7, r11 EOR r7, r7, r11
SUBS r1, r1, #0x1 SUBS r1, r1, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_decrypt_block_nr BNE L_AES_decrypt_block_nr
#else
BNE.N L_AES_decrypt_block_nr
#endif
UBFX r8, r7, #16, #8 UBFX r8, r7, #16, #8
LSR r11, r4, #24 LSR r11, r4, #24
UBFX r12, r6, #8, #8 UBFX r12, r6, #8, #8
@ -2001,9 +2097,17 @@ AES_ECB_decrypt:
MOV r12, r2 MOV r12, r2
ADR r2, L_AES_Thumb2_td4 ADR r2, L_AES_Thumb2_td4
CMP r8, #0xa CMP r8, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_decrypt_start_block_128 BEQ L_AES_ECB_decrypt_start_block_128
#else
BEQ.N L_AES_ECB_decrypt_start_block_128
#endif
CMP r8, #0xc CMP r8, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_ECB_decrypt_start_block_192 BEQ L_AES_ECB_decrypt_start_block_192
#else
BEQ.N L_AES_ECB_decrypt_start_block_192
#endif
L_AES_ECB_decrypt_loop_block_256: L_AES_ECB_decrypt_loop_block_256:
LDR r4, [lr] LDR r4, [lr]
LDR r5, [lr, #4] LDR r5, [lr, #4]
@ -2034,7 +2138,11 @@ L_AES_ECB_decrypt_loop_block_256:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_decrypt_loop_block_256 BNE L_AES_ECB_decrypt_loop_block_256
#else
BNE.N L_AES_ECB_decrypt_loop_block_256
#endif
B L_AES_ECB_decrypt_end B L_AES_ECB_decrypt_end
L_AES_ECB_decrypt_start_block_192: L_AES_ECB_decrypt_start_block_192:
L_AES_ECB_decrypt_loop_block_192: L_AES_ECB_decrypt_loop_block_192:
@ -2067,7 +2175,11 @@ L_AES_ECB_decrypt_loop_block_192:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_decrypt_loop_block_192 BNE L_AES_ECB_decrypt_loop_block_192
#else
BNE.N L_AES_ECB_decrypt_loop_block_192
#endif
B L_AES_ECB_decrypt_end B L_AES_ECB_decrypt_end
L_AES_ECB_decrypt_start_block_128: L_AES_ECB_decrypt_start_block_128:
L_AES_ECB_decrypt_loop_block_128: L_AES_ECB_decrypt_loop_block_128:
@ -2100,7 +2212,11 @@ L_AES_ECB_decrypt_loop_block_128:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_ECB_decrypt_loop_block_128 BNE L_AES_ECB_decrypt_loop_block_128
#else
BNE.N L_AES_ECB_decrypt_loop_block_128
#endif
L_AES_ECB_decrypt_end: L_AES_ECB_decrypt_end:
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 210 # Cycle Count = 210
@ -2121,9 +2237,17 @@ AES_CBC_decrypt:
ADR r2, L_AES_Thumb2_td4 ADR r2, L_AES_Thumb2_td4
PUSH {r3, r4} PUSH {r3, r4}
CMP r8, #0xa CMP r8, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_loop_block_128 BEQ L_AES_CBC_decrypt_loop_block_128
#else
BEQ.N L_AES_CBC_decrypt_loop_block_128
#endif
CMP r8, #0xc CMP r8, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_loop_block_192 BEQ L_AES_CBC_decrypt_loop_block_192
#else
BEQ.N L_AES_CBC_decrypt_loop_block_192
#endif
L_AES_CBC_decrypt_loop_block_256: L_AES_CBC_decrypt_loop_block_256:
PUSH {r1, r12, lr} PUSH {r1, r12, lr}
LDR r4, [lr] LDR r4, [lr]
@ -2164,7 +2288,11 @@ L_AES_CBC_decrypt_loop_block_256:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_end_odd BEQ L_AES_CBC_decrypt_end_odd
#else
BEQ.N L_AES_CBC_decrypt_end_odd
#endif
PUSH {r1, r12, lr} PUSH {r1, r12, lr}
LDR r4, [lr] LDR r4, [lr]
LDR r5, [lr, #4] LDR r5, [lr, #4]
@ -2205,7 +2333,11 @@ L_AES_CBC_decrypt_loop_block_256:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_decrypt_loop_block_256 BNE L_AES_CBC_decrypt_loop_block_256
#else
BNE.N L_AES_CBC_decrypt_loop_block_256
#endif
B L_AES_CBC_decrypt_end B L_AES_CBC_decrypt_end
L_AES_CBC_decrypt_loop_block_192: L_AES_CBC_decrypt_loop_block_192:
PUSH {r1, r12, lr} PUSH {r1, r12, lr}
@ -2247,7 +2379,11 @@ L_AES_CBC_decrypt_loop_block_192:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_end_odd BEQ L_AES_CBC_decrypt_end_odd
#else
BEQ.N L_AES_CBC_decrypt_end_odd
#endif
PUSH {r1, r12, lr} PUSH {r1, r12, lr}
LDR r4, [lr] LDR r4, [lr]
LDR r5, [lr, #4] LDR r5, [lr, #4]
@ -2288,7 +2424,11 @@ L_AES_CBC_decrypt_loop_block_192:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_decrypt_loop_block_192 BNE L_AES_CBC_decrypt_loop_block_192
#else
BNE.N L_AES_CBC_decrypt_loop_block_192
#endif
B L_AES_CBC_decrypt_end B L_AES_CBC_decrypt_end
L_AES_CBC_decrypt_loop_block_128: L_AES_CBC_decrypt_loop_block_128:
PUSH {r1, r12, lr} PUSH {r1, r12, lr}
@ -2330,7 +2470,11 @@ L_AES_CBC_decrypt_loop_block_128:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_CBC_decrypt_end_odd BEQ L_AES_CBC_decrypt_end_odd
#else
BEQ.N L_AES_CBC_decrypt_end_odd
#endif
PUSH {r1, r12, lr} PUSH {r1, r12, lr}
LDR r4, [lr] LDR r4, [lr]
LDR r5, [lr, #4] LDR r5, [lr, #4]
@ -2371,7 +2515,11 @@ L_AES_CBC_decrypt_loop_block_128:
SUBS r12, r12, #0x10 SUBS r12, r12, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_CBC_decrypt_loop_block_128 BNE L_AES_CBC_decrypt_loop_block_128
#else
BNE.N L_AES_CBC_decrypt_loop_block_128
#endif
B L_AES_CBC_decrypt_end B L_AES_CBC_decrypt_end
L_AES_CBC_decrypt_end_odd: L_AES_CBC_decrypt_end_odd:
LDR r4, [sp, #4] LDR r4, [sp, #4]
@ -2961,7 +3109,11 @@ L_GCM_gmult_len_start_block:
POP {r3} POP {r3}
SUBS r3, r3, #0x10 SUBS r3, r3, #0x10
ADD r2, r2, #0x10 ADD r2, r2, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_GCM_gmult_len_start_block BNE L_GCM_gmult_len_start_block
#else
BNE.N L_GCM_gmult_len_start_block
#endif
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 742 # Cycle Count = 742
.size GCM_gmult_len,.-GCM_gmult_len .size GCM_gmult_len,.-GCM_gmult_len
@ -2989,9 +3141,17 @@ AES_GCM_encrypt:
STM r8, {r4, r5, r6, r7} STM r8, {r4, r5, r6, r7}
PUSH {r3, r8} PUSH {r3, r8}
CMP r12, #0xa CMP r12, #0xa
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_GCM_encrypt_start_block_128 BEQ L_AES_GCM_encrypt_start_block_128
#else
BEQ.N L_AES_GCM_encrypt_start_block_128
#endif
CMP r12, #0xc CMP r12, #0xc
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BEQ L_AES_GCM_encrypt_start_block_192 BEQ L_AES_GCM_encrypt_start_block_192
#else
BEQ.N L_AES_GCM_encrypt_start_block_192
#endif
L_AES_GCM_encrypt_loop_block_256: L_AES_GCM_encrypt_loop_block_256:
PUSH {r1, r2, lr} PUSH {r1, r2, lr}
LDR lr, [sp, #16] LDR lr, [sp, #16]
@ -3028,7 +3188,11 @@ L_AES_GCM_encrypt_loop_block_256:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_GCM_encrypt_loop_block_256 BNE L_AES_GCM_encrypt_loop_block_256
#else
BNE.N L_AES_GCM_encrypt_loop_block_256
#endif
B L_AES_GCM_encrypt_end B L_AES_GCM_encrypt_end
L_AES_GCM_encrypt_start_block_192: L_AES_GCM_encrypt_start_block_192:
L_AES_GCM_encrypt_loop_block_192: L_AES_GCM_encrypt_loop_block_192:
@ -3067,7 +3231,11 @@ L_AES_GCM_encrypt_loop_block_192:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_GCM_encrypt_loop_block_192 BNE L_AES_GCM_encrypt_loop_block_192
#else
BNE.N L_AES_GCM_encrypt_loop_block_192
#endif
B L_AES_GCM_encrypt_end B L_AES_GCM_encrypt_end
L_AES_GCM_encrypt_start_block_128: L_AES_GCM_encrypt_start_block_128:
L_AES_GCM_encrypt_loop_block_128: L_AES_GCM_encrypt_loop_block_128:
@ -3106,7 +3274,11 @@ L_AES_GCM_encrypt_loop_block_128:
SUBS r2, r2, #0x10 SUBS r2, r2, #0x10
ADD lr, lr, #0x10 ADD lr, lr, #0x10
ADD r1, r1, #0x10 ADD r1, r1, #0x10
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_AES_GCM_encrypt_loop_block_128 BNE L_AES_GCM_encrypt_loop_block_128
#else
BNE.N L_AES_GCM_encrypt_loop_block_128
#endif
L_AES_GCM_encrypt_end: L_AES_GCM_encrypt_end:
POP {r3, r8} POP {r3, r8}
REV r4, r4 REV r4, r4

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */ #endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h> #include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifndef NO_AES #ifndef NO_AES
#include <wolfssl/wolfcrypt/aes.h> #include <wolfssl/wolfcrypt/aes.h>
@ -206,7 +218,11 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
"STM %[ks]!, {r6, r7, r8, r9}\n\t" "STM %[ks]!, {r6, r7, r8, r9}\n\t"
"SUBS r11, r11, #0x2\n\t" "SUBS r11, r11, #0x2\n\t"
"SUB r10, r10, #0x10\n\t" "SUB r10, r10, #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_invert_key_loop_%=\n\t" "BNE L_AES_invert_key_loop_%=\n\t"
#else
"BNE.N L_AES_invert_key_loop_%=\n\t"
#endif
"SUB %[ks], %[ks], %[rounds], LSL #3\n\t" "SUB %[ks], %[ks], %[rounds], LSL #3\n\t"
"ADD %[ks], %[ks], #0x10\n\t" "ADD %[ks], %[ks], #0x10\n\t"
"SUB r11, %[rounds], #0x1\n\t" "SUB r11, %[rounds], #0x1\n\t"
@ -278,7 +294,11 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p)
"EOR r8, r8, r9, ROR #24\n\t" "EOR r8, r8, r9, ROR #24\n\t"
"STR r8, [%[ks]], #4\n\t" "STR r8, [%[ks]], #4\n\t"
"SUBS r11, r11, #0x1\n\t" "SUBS r11, r11, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_invert_key_mix_loop_%=\n\t" "BNE L_AES_invert_key_mix_loop_%=\n\t"
#else
"BNE.N L_AES_invert_key_mix_loop_%=\n\t"
#endif
: [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c)
: :
: "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
@ -306,9 +326,17 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"MOV r8, %[L_AES_Thumb2_te]\n\t" "MOV r8, %[L_AES_Thumb2_te]\n\t"
"MOV lr, %[L_AES_Thumb2_rcon]\n\t" "MOV lr, %[L_AES_Thumb2_rcon]\n\t"
"CMP %[len], #0x80\n\t" "CMP %[len], #0x80\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_set_encrypt_key_start_128_%=\n\t" "BEQ L_AES_set_encrypt_key_start_128_%=\n\t"
#else
"BEQ.N L_AES_set_encrypt_key_start_128_%=\n\t"
#endif
"CMP %[len], #0xc0\n\t" "CMP %[len], #0xc0\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_set_encrypt_key_start_192_%=\n\t" "BEQ L_AES_set_encrypt_key_start_192_%=\n\t"
#else
"BEQ.N L_AES_set_encrypt_key_start_192_%=\n\t"
#endif
"LDRD r4, r5, [%[key]]\n\t" "LDRD r4, r5, [%[key]]\n\t"
"LDRD r6, r7, [%[key], #8]\n\t" "LDRD r6, r7, [%[key], #8]\n\t"
"REV r4, r4\n\t" "REV r4, r4\n\t"
@ -369,7 +397,11 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"STM %[ks], {r4, r5, r6, r7}\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t"
"SUB %[ks], %[ks], #0x10\n\t" "SUB %[ks], %[ks], #0x10\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_set_encrypt_key_loop_256_%=\n\t" "BNE L_AES_set_encrypt_key_loop_256_%=\n\t"
#else
"BNE.N L_AES_set_encrypt_key_loop_256_%=\n\t"
#endif
"UBFX r4, r7, #0, #8\n\t" "UBFX r4, r7, #0, #8\n\t"
"UBFX r5, r7, #8, #8\n\t" "UBFX r5, r7, #8, #8\n\t"
"UBFX r6, r7, #16, #8\n\t" "UBFX r6, r7, #16, #8\n\t"
@ -431,7 +463,11 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"EOR r7, r7, r6\n\t" "EOR r7, r7, r6\n\t"
"STM %[ks], {r0, r1, r4, r5, r6, r7}\n\t" "STM %[ks], {r0, r1, r4, r5, r6, r7}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_set_encrypt_key_loop_192_%=\n\t" "BNE L_AES_set_encrypt_key_loop_192_%=\n\t"
#else
"BNE.N L_AES_set_encrypt_key_loop_192_%=\n\t"
#endif
"UBFX r0, r7, #0, #8\n\t" "UBFX r0, r7, #0, #8\n\t"
"UBFX r1, r7, #8, #8\n\t" "UBFX r1, r7, #8, #8\n\t"
"UBFX r4, r7, #16, #8\n\t" "UBFX r4, r7, #16, #8\n\t"
@ -484,7 +520,11 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char
"EOR r7, r7, r6\n\t" "EOR r7, r7, r6\n\t"
"STM %[ks], {r4, r5, r6, r7}\n\t" "STM %[ks], {r4, r5, r6, r7}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_set_encrypt_key_loop_128_%=\n\t" "BNE L_AES_set_encrypt_key_loop_128_%=\n\t"
#else
"BNE.N L_AES_set_encrypt_key_loop_128_%=\n\t"
#endif
"\n" "\n"
"L_AES_set_encrypt_key_end_%=:\n\t" "L_AES_set_encrypt_key_end_%=:\n\t"
: [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c)
@ -605,7 +645,11 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t
"EOR r6, r6, r10\n\t" "EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t" "EOR r7, r7, r11\n\t"
"SUBS %[nr], %[nr], #0x1\n\t" "SUBS %[nr], %[nr], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_encrypt_block_nr_%=\n\t" "BNE L_AES_encrypt_block_nr_%=\n\t"
#else
"BNE.N L_AES_encrypt_block_nr_%=\n\t"
#endif
"UBFX r8, r5, #16, #8\n\t" "UBFX r8, r5, #16, #8\n\t"
"LSR r11, r4, #24\n\t" "LSR r11, r4, #24\n\t"
"UBFX lr, r6, #8, #8\n\t" "UBFX lr, r6, #8, #8\n\t"
@ -733,9 +777,17 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"MOV r12, r4\n\t" "MOV r12, r4\n\t"
"PUSH {%[ks]}\n\t" "PUSH {%[ks]}\n\t"
"CMP r12, #0xa\n\t" "CMP r12, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_encrypt_start_block_128_%=\n\t" "BEQ L_AES_ECB_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_ECB_encrypt_start_block_128_%=\n\t"
#endif
"CMP r12, #0xc\n\t" "CMP r12, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_encrypt_start_block_192_%=\n\t" "BEQ L_AES_ECB_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_ECB_encrypt_start_block_192_%=\n\t"
#endif
"\n" "\n"
"L_AES_ECB_encrypt_loop_block_256_%=:\n\t" "L_AES_ECB_encrypt_loop_block_256_%=:\n\t"
"LDR r4, [lr]\n\t" "LDR r4, [lr]\n\t"
@ -768,7 +820,11 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_encrypt_loop_block_256_%=\n\t" "BNE L_AES_ECB_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_ECB_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_ECB_encrypt_end_%=\n\t" "B L_AES_ECB_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_ECB_encrypt_start_block_192_%=:\n\t" "L_AES_ECB_encrypt_start_block_192_%=:\n\t"
@ -804,7 +860,11 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_encrypt_loop_block_192_%=\n\t" "BNE L_AES_ECB_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_ECB_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_ECB_encrypt_end_%=\n\t" "B L_AES_ECB_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_ECB_encrypt_start_block_128_%=:\n\t" "L_AES_ECB_encrypt_start_block_128_%=:\n\t"
@ -840,7 +900,11 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_encrypt_loop_block_128_%=\n\t" "BNE L_AES_ECB_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_ECB_encrypt_loop_block_128_%=\n\t"
#endif
"\n" "\n"
"L_AES_ECB_encrypt_end_%=:\n\t" "L_AES_ECB_encrypt_end_%=:\n\t"
"POP {%[ks]}\n\t" "POP {%[ks]}\n\t"
@ -848,7 +912,6 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
: :
: "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11"
); );
(void)nr;
} }
#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
@ -873,9 +936,17 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"LDM r9, {r4, r5, r6, r7}\n\t" "LDM r9, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r9}\n\t" "PUSH {%[ks], r9}\n\t"
"CMP r8, #0xa\n\t" "CMP r8, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_encrypt_start_block_128_%=\n\t" "BEQ L_AES_CBC_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_CBC_encrypt_start_block_128_%=\n\t"
#endif
"CMP r8, #0xc\n\t" "CMP r8, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_encrypt_start_block_192_%=\n\t" "BEQ L_AES_CBC_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_CBC_encrypt_start_block_192_%=\n\t"
#endif
"\n" "\n"
"L_AES_CBC_encrypt_loop_block_256_%=:\n\t" "L_AES_CBC_encrypt_loop_block_256_%=:\n\t"
"LDR r8, [lr]\n\t" "LDR r8, [lr]\n\t"
@ -912,7 +983,11 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_encrypt_loop_block_256_%=\n\t" "BNE L_AES_CBC_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_CBC_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_CBC_encrypt_end_%=\n\t" "B L_AES_CBC_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CBC_encrypt_start_block_192_%=:\n\t" "L_AES_CBC_encrypt_start_block_192_%=:\n\t"
@ -952,7 +1027,11 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_encrypt_loop_block_192_%=\n\t" "BNE L_AES_CBC_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_CBC_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_CBC_encrypt_end_%=\n\t" "B L_AES_CBC_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CBC_encrypt_start_block_128_%=:\n\t" "L_AES_CBC_encrypt_start_block_128_%=:\n\t"
@ -992,7 +1071,11 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_encrypt_loop_block_128_%=\n\t" "BNE L_AES_CBC_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_CBC_encrypt_loop_block_128_%=\n\t"
#endif
"\n" "\n"
"L_AES_CBC_encrypt_end_%=:\n\t" "L_AES_CBC_encrypt_end_%=:\n\t"
"POP {%[ks], r9}\n\t" "POP {%[ks], r9}\n\t"
@ -1001,8 +1084,6 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
: :
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
); );
(void)nr;
(void)iv;
} }
#endif /* HAVE_AES_CBC */ #endif /* HAVE_AES_CBC */
@ -1032,9 +1113,17 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"STM r8, {r4, r5, r6, r7}\n\t" "STM r8, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r8}\n\t" "PUSH {%[ks], r8}\n\t"
"CMP r12, #0xa\n\t" "CMP r12, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CTR_encrypt_start_block_128_%=\n\t" "BEQ L_AES_CTR_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_CTR_encrypt_start_block_128_%=\n\t"
#endif
"CMP r12, #0xc\n\t" "CMP r12, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CTR_encrypt_start_block_192_%=\n\t" "BEQ L_AES_CTR_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_CTR_encrypt_start_block_192_%=\n\t"
#endif
"\n" "\n"
"L_AES_CTR_encrypt_loop_block_256_%=:\n\t" "L_AES_CTR_encrypt_loop_block_256_%=:\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
@ -1075,7 +1164,11 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CTR_encrypt_loop_block_256_%=\n\t" "BNE L_AES_CTR_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_CTR_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_CTR_encrypt_end_%=\n\t" "B L_AES_CTR_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CTR_encrypt_start_block_192_%=:\n\t" "L_AES_CTR_encrypt_start_block_192_%=:\n\t"
@ -1119,7 +1212,11 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CTR_encrypt_loop_block_192_%=\n\t" "BNE L_AES_CTR_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_CTR_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_CTR_encrypt_end_%=\n\t" "B L_AES_CTR_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CTR_encrypt_start_block_128_%=:\n\t" "L_AES_CTR_encrypt_start_block_128_%=:\n\t"
@ -1163,7 +1260,11 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CTR_encrypt_loop_block_128_%=\n\t" "BNE L_AES_CTR_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_CTR_encrypt_loop_block_128_%=\n\t"
#endif
"\n" "\n"
"L_AES_CTR_encrypt_end_%=:\n\t" "L_AES_CTR_encrypt_end_%=:\n\t"
"POP {%[ks], r8}\n\t" "POP {%[ks], r8}\n\t"
@ -1176,8 +1277,6 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
: :
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
); );
(void)nr;
(void)ctr;
} }
#endif /* WOLFSSL_AES_COUNTER */ #endif /* WOLFSSL_AES_COUNTER */
@ -1294,7 +1393,11 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p)
"EOR r6, r6, r10\n\t" "EOR r6, r6, r10\n\t"
"EOR r7, r7, r11\n\t" "EOR r7, r7, r11\n\t"
"SUBS %[nr], %[nr], #0x1\n\t" "SUBS %[nr], %[nr], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_decrypt_block_nr_%=\n\t" "BNE L_AES_decrypt_block_nr_%=\n\t"
#else
"BNE.N L_AES_decrypt_block_nr_%=\n\t"
#endif
"UBFX r8, r7, #16, #8\n\t" "UBFX r8, r7, #16, #8\n\t"
"LSR r11, r4, #24\n\t" "LSR r11, r4, #24\n\t"
"UBFX r12, r6, #8, #8\n\t" "UBFX r12, r6, #8, #8\n\t"
@ -1457,9 +1560,17 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"MOV r12, %[len]\n\t" "MOV r12, %[len]\n\t"
"MOV r2, %[L_AES_Thumb2_td4]\n\t" "MOV r2, %[L_AES_Thumb2_td4]\n\t"
"CMP r8, #0xa\n\t" "CMP r8, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t" "BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_ECB_decrypt_start_block_128_%=\n\t"
#endif
"CMP r8, #0xc\n\t" "CMP r8, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_ECB_decrypt_start_block_192_%=\n\t" "BEQ L_AES_ECB_decrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_ECB_decrypt_start_block_192_%=\n\t"
#endif
"\n" "\n"
"L_AES_ECB_decrypt_loop_block_256_%=:\n\t" "L_AES_ECB_decrypt_loop_block_256_%=:\n\t"
"LDR r4, [lr]\n\t" "LDR r4, [lr]\n\t"
@ -1491,7 +1602,11 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t" "BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_ECB_decrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_ECB_decrypt_end_%=\n\t" "B L_AES_ECB_decrypt_end_%=\n\t"
"\n" "\n"
"L_AES_ECB_decrypt_start_block_192_%=:\n\t" "L_AES_ECB_decrypt_start_block_192_%=:\n\t"
@ -1526,7 +1641,11 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t" "BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_ECB_decrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_ECB_decrypt_end_%=\n\t" "B L_AES_ECB_decrypt_end_%=\n\t"
"\n" "\n"
"L_AES_ECB_decrypt_start_block_128_%=:\n\t" "L_AES_ECB_decrypt_start_block_128_%=:\n\t"
@ -1561,14 +1680,17 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t" "BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_ECB_decrypt_loop_block_128_%=\n\t"
#endif
"\n" "\n"
"L_AES_ECB_decrypt_end_%=:\n\t" "L_AES_ECB_decrypt_end_%=:\n\t"
: [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c)
: :
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
); );
(void)nr;
} }
#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */
@ -1595,9 +1717,17 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"MOV r2, %[L_AES_Thumb2_td4]\n\t" "MOV r2, %[L_AES_Thumb2_td4]\n\t"
"PUSH {%[ks], r4}\n\t" "PUSH {%[ks], r4}\n\t"
"CMP r8, #0xa\n\t" "CMP r8, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t" "BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#endif
"CMP r8, #0xc\n\t" "CMP r8, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t" "BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#endif
"\n" "\n"
"L_AES_CBC_decrypt_loop_block_256_%=:\n\t" "L_AES_CBC_decrypt_loop_block_256_%=:\n\t"
"PUSH {r1, r12, lr}\n\t" "PUSH {r1, r12, lr}\n\t"
@ -1639,7 +1769,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t"
#endif
"PUSH {r1, r12, lr}\n\t" "PUSH {r1, r12, lr}\n\t"
"LDR r4, [lr]\n\t" "LDR r4, [lr]\n\t"
"LDR r5, [lr, #4]\n\t" "LDR r5, [lr, #4]\n\t"
@ -1680,7 +1814,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t" "BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_CBC_decrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_CBC_decrypt_end_%=\n\t" "B L_AES_CBC_decrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CBC_decrypt_loop_block_192_%=:\n\t" "L_AES_CBC_decrypt_loop_block_192_%=:\n\t"
@ -1723,7 +1861,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t"
#endif
"PUSH {r1, r12, lr}\n\t" "PUSH {r1, r12, lr}\n\t"
"LDR r4, [lr]\n\t" "LDR r4, [lr]\n\t"
"LDR r5, [lr, #4]\n\t" "LDR r5, [lr, #4]\n\t"
@ -1764,7 +1906,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t" "BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_CBC_decrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_CBC_decrypt_end_%=\n\t" "B L_AES_CBC_decrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CBC_decrypt_loop_block_128_%=:\n\t" "L_AES_CBC_decrypt_loop_block_128_%=:\n\t"
@ -1807,7 +1953,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t"
#else
"BEQ.N L_AES_CBC_decrypt_end_odd_%=\n\t"
#endif
"PUSH {r1, r12, lr}\n\t" "PUSH {r1, r12, lr}\n\t"
"LDR r4, [lr]\n\t" "LDR r4, [lr]\n\t"
"LDR r5, [lr, #4]\n\t" "LDR r5, [lr, #4]\n\t"
@ -1848,7 +1998,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS r12, r12, #0x10\n\t" "SUBS r12, r12, #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t" "BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_CBC_decrypt_loop_block_128_%=\n\t"
#endif
"B L_AES_CBC_decrypt_end_%=\n\t" "B L_AES_CBC_decrypt_end_%=\n\t"
"\n" "\n"
"L_AES_CBC_decrypt_end_odd_%=:\n\t" "L_AES_CBC_decrypt_end_odd_%=:\n\t"
@ -1864,8 +2018,6 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
: :
: "memory", "r12", "lr", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r8", "r9", "r10", "r11"
); );
(void)nr;
(void)iv;
} }
#endif /* HAVE_AES_CBC */ #endif /* HAVE_AES_CBC */
@ -2437,7 +2589,11 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned
"POP {r3}\n\t" "POP {r3}\n\t"
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD %[data], %[data], #0x10\n\t" "ADD %[data], %[data], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_GCM_gmult_len_start_block_%=\n\t" "BNE L_GCM_gmult_len_start_block_%=\n\t"
#else
"BNE.N L_GCM_gmult_len_start_block_%=\n\t"
#endif
: [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c)
: :
: "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
@ -2470,9 +2626,17 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"STM r8, {r4, r5, r6, r7}\n\t" "STM r8, {r4, r5, r6, r7}\n\t"
"PUSH {%[ks], r8}\n\t" "PUSH {%[ks], r8}\n\t"
"CMP r12, #0xa\n\t" "CMP r12, #0xa\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_GCM_encrypt_start_block_128_%=\n\t" "BEQ L_AES_GCM_encrypt_start_block_128_%=\n\t"
#else
"BEQ.N L_AES_GCM_encrypt_start_block_128_%=\n\t"
#endif
"CMP r12, #0xc\n\t" "CMP r12, #0xc\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BEQ L_AES_GCM_encrypt_start_block_192_%=\n\t" "BEQ L_AES_GCM_encrypt_start_block_192_%=\n\t"
#else
"BEQ.N L_AES_GCM_encrypt_start_block_192_%=\n\t"
#endif
"\n" "\n"
"L_AES_GCM_encrypt_loop_block_256_%=:\n\t" "L_AES_GCM_encrypt_loop_block_256_%=:\n\t"
"PUSH {r1, %[len], lr}\n\t" "PUSH {r1, %[len], lr}\n\t"
@ -2510,7 +2674,11 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_GCM_encrypt_loop_block_256_%=\n\t" "BNE L_AES_GCM_encrypt_loop_block_256_%=\n\t"
#else
"BNE.N L_AES_GCM_encrypt_loop_block_256_%=\n\t"
#endif
"B L_AES_GCM_encrypt_end_%=\n\t" "B L_AES_GCM_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_GCM_encrypt_start_block_192_%=:\n\t" "L_AES_GCM_encrypt_start_block_192_%=:\n\t"
@ -2551,7 +2719,11 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_GCM_encrypt_loop_block_192_%=\n\t" "BNE L_AES_GCM_encrypt_loop_block_192_%=\n\t"
#else
"BNE.N L_AES_GCM_encrypt_loop_block_192_%=\n\t"
#endif
"B L_AES_GCM_encrypt_end_%=\n\t" "B L_AES_GCM_encrypt_end_%=\n\t"
"\n" "\n"
"L_AES_GCM_encrypt_start_block_128_%=:\n\t" "L_AES_GCM_encrypt_start_block_128_%=:\n\t"
@ -2592,7 +2764,11 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
"SUBS %[len], %[len], #0x10\n\t" "SUBS %[len], %[len], #0x10\n\t"
"ADD lr, lr, #0x10\n\t" "ADD lr, lr, #0x10\n\t"
"ADD %[out], %[out], #0x10\n\t" "ADD %[out], %[out], #0x10\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_AES_GCM_encrypt_loop_block_128_%=\n\t" "BNE L_AES_GCM_encrypt_loop_block_128_%=\n\t"
#else
"BNE.N L_AES_GCM_encrypt_loop_block_128_%=\n\t"
#endif
"\n" "\n"
"L_AES_GCM_encrypt_end_%=:\n\t" "L_AES_GCM_encrypt_end_%=:\n\t"
"POP {%[ks], r8}\n\t" "POP {%[ks], r8}\n\t"
@ -2605,12 +2781,13 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l
: :
: "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11"
); );
(void)nr;
(void)ctr;
} }
#endif /* HAVE_AESGCM */ #endif /* HAVE_AESGCM */
#endif /* !NO_AES */ #endif /* !NO_AES */
#endif /* !__aarch64__ && __thumb__ */ #endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */ #endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -2741,12 +2741,20 @@ L_curve25519_bits:
LDR r1, [sp, #180] LDR r1, [sp, #180]
SUBS r1, r1, #0x1 SUBS r1, r1, #0x1
STR r1, [sp, #180] STR r1, [sp, #180]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGE L_curve25519_bits BGE L_curve25519_bits
#else
BGE.N L_curve25519_bits
#endif
MOV r1, #0x1f MOV r1, #0x1f
STR r1, [sp, #180] STR r1, [sp, #180]
SUBS r2, r2, #0x4 SUBS r2, r2, #0x4
STR r2, [sp, #176] STR r2, [sp, #176]
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGE L_curve25519_words BGE L_curve25519_words
#else
BGE.N L_curve25519_words
#endif
# Invert # Invert
ADD r1, sp, #0x0 ADD r1, sp, #0x0
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -2783,7 +2791,11 @@ L_curve25519_inv_1:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_1 BNE L_curve25519_inv_1
#else
BNE.N L_curve25519_inv_1
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -2799,7 +2811,11 @@ L_curve25519_inv_2:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_2 BNE L_curve25519_inv_2
#else
BNE.N L_curve25519_inv_2
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -2815,7 +2831,11 @@ L_curve25519_inv_3:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_3 BNE L_curve25519_inv_3
#else
BNE.N L_curve25519_inv_3
#endif
ADD r2, sp, #0x60 ADD r2, sp, #0x60
ADD r1, sp, #0x80 ADD r1, sp, #0x80
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -2828,7 +2848,11 @@ L_curve25519_inv_4:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_4 BNE L_curve25519_inv_4
#else
BNE.N L_curve25519_inv_4
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -2844,7 +2868,11 @@ L_curve25519_inv_5:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_5 BNE L_curve25519_inv_5
#else
BNE.N L_curve25519_inv_5
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -2860,7 +2888,11 @@ L_curve25519_inv_6:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_6 BNE L_curve25519_inv_6
#else
BNE.N L_curve25519_inv_6
#endif
ADD r2, sp, #0x60 ADD r2, sp, #0x60
ADD r1, sp, #0x80 ADD r1, sp, #0x80
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -2873,7 +2905,11 @@ L_curve25519_inv_7:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_7 BNE L_curve25519_inv_7
#else
BNE.N L_curve25519_inv_7
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -2886,7 +2922,11 @@ L_curve25519_inv_8:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_8 BNE L_curve25519_inv_8
#else
BNE.N L_curve25519_inv_8
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x0 ADD r0, sp, #0x0
@ -3022,7 +3062,11 @@ L_curve25519_bits:
BL fe_mul_op BL fe_mul_op
LDR r2, [sp, #168] LDR r2, [sp, #168]
SUBS r2, r2, #0x1 SUBS r2, r2, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BGE L_curve25519_bits BGE L_curve25519_bits
#else
BGE.N L_curve25519_bits
#endif
# Cycle Count: 171 # Cycle Count: 171
LDR r1, [sp, #184] LDR r1, [sp, #184]
# Copy # Copy
@ -3064,7 +3108,11 @@ L_curve25519_inv_1:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_1 BNE L_curve25519_inv_1
#else
BNE.N L_curve25519_inv_1
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3080,7 +3128,11 @@ L_curve25519_inv_2:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_2 BNE L_curve25519_inv_2
#else
BNE.N L_curve25519_inv_2
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -3096,7 +3148,11 @@ L_curve25519_inv_3:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_3 BNE L_curve25519_inv_3
#else
BNE.N L_curve25519_inv_3
#endif
ADD r2, sp, #0x60 ADD r2, sp, #0x60
ADD r1, sp, #0x80 ADD r1, sp, #0x80
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -3109,7 +3165,11 @@ L_curve25519_inv_4:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_4 BNE L_curve25519_inv_4
#else
BNE.N L_curve25519_inv_4
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3125,7 +3185,11 @@ L_curve25519_inv_5:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_5 BNE L_curve25519_inv_5
#else
BNE.N L_curve25519_inv_5
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -3141,7 +3205,11 @@ L_curve25519_inv_6:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_6 BNE L_curve25519_inv_6
#else
BNE.N L_curve25519_inv_6
#endif
ADD r2, sp, #0x60 ADD r2, sp, #0x60
ADD r1, sp, #0x80 ADD r1, sp, #0x80
ADD r0, sp, #0x60 ADD r0, sp, #0x60
@ -3154,7 +3222,11 @@ L_curve25519_inv_7:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_7 BNE L_curve25519_inv_7
#else
BNE.N L_curve25519_inv_7
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3167,7 +3239,11 @@ L_curve25519_inv_8:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_curve25519_inv_8 BNE L_curve25519_inv_8
#else
BNE.N L_curve25519_inv_8
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x0 ADD r0, sp, #0x0
@ -3244,7 +3320,11 @@ L_fe_invert1:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert1 BNE L_fe_invert1
#else
BNE.N L_fe_invert1
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3260,7 +3340,11 @@ L_fe_invert2:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert2 BNE L_fe_invert2
#else
BNE.N L_fe_invert2
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3276,7 +3360,11 @@ L_fe_invert3:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert3 BNE L_fe_invert3
#else
BNE.N L_fe_invert3
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3289,7 +3377,11 @@ L_fe_invert4:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert4 BNE L_fe_invert4
#else
BNE.N L_fe_invert4
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3305,7 +3397,11 @@ L_fe_invert5:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert5 BNE L_fe_invert5
#else
BNE.N L_fe_invert5
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3321,7 +3417,11 @@ L_fe_invert6:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert6 BNE L_fe_invert6
#else
BNE.N L_fe_invert6
#endif
ADD r2, sp, #0x40 ADD r2, sp, #0x40
ADD r1, sp, #0x60 ADD r1, sp, #0x60
ADD r0, sp, #0x40 ADD r0, sp, #0x40
@ -3334,7 +3434,11 @@ L_fe_invert7:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert7 BNE L_fe_invert7
#else
BNE.N L_fe_invert7
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3347,7 +3451,11 @@ L_fe_invert8:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_invert8 BNE L_fe_invert8
#else
BNE.N L_fe_invert8
#endif
MOV r2, sp MOV r2, sp
ADD r1, sp, #0x20 ADD r1, sp, #0x20
LDR r0, [sp, #128] LDR r0, [sp, #128]
@ -3863,7 +3971,11 @@ L_fe_pow22523_1:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_1 BNE L_fe_pow22523_1
#else
BNE.N L_fe_pow22523_1
#endif
MOV r2, sp MOV r2, sp
ADD r1, sp, #0x20 ADD r1, sp, #0x20
MOV r0, sp MOV r0, sp
@ -3879,7 +3991,11 @@ L_fe_pow22523_2:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_2 BNE L_fe_pow22523_2
#else
BNE.N L_fe_pow22523_2
#endif
MOV r2, sp MOV r2, sp
ADD r1, sp, #0x20 ADD r1, sp, #0x20
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3895,7 +4011,11 @@ L_fe_pow22523_3:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_3 BNE L_fe_pow22523_3
#else
BNE.N L_fe_pow22523_3
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3908,7 +4028,11 @@ L_fe_pow22523_4:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_4 BNE L_fe_pow22523_4
#else
BNE.N L_fe_pow22523_4
#endif
MOV r2, sp MOV r2, sp
ADD r1, sp, #0x20 ADD r1, sp, #0x20
MOV r0, sp MOV r0, sp
@ -3924,7 +4048,11 @@ L_fe_pow22523_5:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_5 BNE L_fe_pow22523_5
#else
BNE.N L_fe_pow22523_5
#endif
MOV r2, sp MOV r2, sp
ADD r1, sp, #0x20 ADD r1, sp, #0x20
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3940,7 +4068,11 @@ L_fe_pow22523_6:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_6 BNE L_fe_pow22523_6
#else
BNE.N L_fe_pow22523_6
#endif
ADD r2, sp, #0x20 ADD r2, sp, #0x20
ADD r1, sp, #0x40 ADD r1, sp, #0x40
ADD r0, sp, #0x20 ADD r0, sp, #0x20
@ -3953,7 +4085,11 @@ L_fe_pow22523_7:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_7 BNE L_fe_pow22523_7
#else
BNE.N L_fe_pow22523_7
#endif
MOV r2, sp MOV r2, sp
ADD r1, sp, #0x20 ADD r1, sp, #0x20
MOV r0, sp MOV r0, sp
@ -3966,7 +4102,11 @@ L_fe_pow22523_8:
BL fe_sq_op BL fe_sq_op
POP {r12} POP {r12}
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_fe_pow22523_8 BNE L_fe_pow22523_8
#else
BNE.N L_fe_pow22523_8
#endif
LDR r2, [sp, #100] LDR r2, [sp, #100]
MOV r1, sp MOV r1, sp
LDR r0, [sp, #96] LDR r0, [sp, #96]

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */ #endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h> #include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
/* Based on work by: Emil Lenngren /* Based on work by: Emil Lenngren
* https://github.com/pornin/X25519-Cortex-M4 * https://github.com/pornin/X25519-Cortex-M4
*/ */
@ -2815,12 +2827,20 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"LDR %[n], [sp, #180]\n\t" "LDR %[n], [sp, #180]\n\t"
"SUBS %[n], %[n], #0x1\n\t" "SUBS %[n], %[n], #0x1\n\t"
"STR %[n], [sp, #180]\n\t" "STR %[n], [sp, #180]\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BGE L_curve25519_bits_%=\n\t" "BGE L_curve25519_bits_%=\n\t"
#else
"BGE.N L_curve25519_bits_%=\n\t"
#endif
"MOV %[n], #0x1f\n\t" "MOV %[n], #0x1f\n\t"
"STR %[n], [sp, #180]\n\t" "STR %[n], [sp, #180]\n\t"
"SUBS %[a], %[a], #0x4\n\t" "SUBS %[a], %[a], #0x4\n\t"
"STR %[a], [sp, #176]\n\t" "STR %[a], [sp, #176]\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BGE L_curve25519_words_%=\n\t" "BGE L_curve25519_words_%=\n\t"
#else
"BGE.N L_curve25519_words_%=\n\t"
#endif
/* Invert */ /* Invert */
"ADD r1, sp, #0x0\n\t" "ADD r1, sp, #0x0\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -2858,7 +2878,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_1_%=\n\t" "BNE L_curve25519_inv_1_%=\n\t"
#else
"BNE.N L_curve25519_inv_1_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -2875,7 +2899,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_2_%=\n\t" "BNE L_curve25519_inv_2_%=\n\t"
#else
"BNE.N L_curve25519_inv_2_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -2892,7 +2920,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_3_%=\n\t" "BNE L_curve25519_inv_3_%=\n\t"
#else
"BNE.N L_curve25519_inv_3_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t" "ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t" "ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -2906,7 +2938,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_4_%=\n\t" "BNE L_curve25519_inv_4_%=\n\t"
#else
"BNE.N L_curve25519_inv_4_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -2923,7 +2959,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_5_%=\n\t" "BNE L_curve25519_inv_5_%=\n\t"
#else
"BNE.N L_curve25519_inv_5_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -2940,7 +2980,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_6_%=\n\t" "BNE L_curve25519_inv_6_%=\n\t"
#else
"BNE.N L_curve25519_inv_6_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t" "ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t" "ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -2954,7 +2998,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_7_%=\n\t" "BNE L_curve25519_inv_7_%=\n\t"
#else
"BNE.N L_curve25519_inv_7_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -2968,7 +3016,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_8_%=\n\t" "BNE L_curve25519_inv_8_%=\n\t"
#else
"BNE.N L_curve25519_inv_8_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x0\n\t" "ADD r0, sp, #0x0\n\t"
@ -3110,7 +3162,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_mul_op\n\t" "BL fe_mul_op\n\t"
"LDR %[a], [sp, #168]\n\t" "LDR %[a], [sp, #168]\n\t"
"SUBS %[a], %[a], #0x1\n\t" "SUBS %[a], %[a], #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BGE L_curve25519_bits_%=\n\t" "BGE L_curve25519_bits_%=\n\t"
#else
"BGE.N L_curve25519_bits_%=\n\t"
#endif
/* Cycle Count: 171 */ /* Cycle Count: 171 */
"LDR %[n], [sp, #184]\n\t" "LDR %[n], [sp, #184]\n\t"
/* Copy */ /* Copy */
@ -3153,7 +3209,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_1_%=\n\t" "BNE L_curve25519_inv_1_%=\n\t"
#else
"BNE.N L_curve25519_inv_1_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3170,7 +3230,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_2_%=\n\t" "BNE L_curve25519_inv_2_%=\n\t"
#else
"BNE.N L_curve25519_inv_2_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -3187,7 +3251,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_3_%=\n\t" "BNE L_curve25519_inv_3_%=\n\t"
#else
"BNE.N L_curve25519_inv_3_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t" "ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t" "ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -3201,7 +3269,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_4_%=\n\t" "BNE L_curve25519_inv_4_%=\n\t"
#else
"BNE.N L_curve25519_inv_4_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3218,7 +3290,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_5_%=\n\t" "BNE L_curve25519_inv_5_%=\n\t"
#else
"BNE.N L_curve25519_inv_5_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -3235,7 +3311,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_6_%=\n\t" "BNE L_curve25519_inv_6_%=\n\t"
#else
"BNE.N L_curve25519_inv_6_%=\n\t"
#endif
"ADD r2, sp, #0x60\n\t" "ADD r2, sp, #0x60\n\t"
"ADD r1, sp, #0x80\n\t" "ADD r1, sp, #0x80\n\t"
"ADD r0, sp, #0x60\n\t" "ADD r0, sp, #0x60\n\t"
@ -3249,7 +3329,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_7_%=\n\t" "BNE L_curve25519_inv_7_%=\n\t"
#else
"BNE.N L_curve25519_inv_7_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3263,7 +3347,11 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_curve25519_inv_8_%=\n\t" "BNE L_curve25519_inv_8_%=\n\t"
#else
"BNE.N L_curve25519_inv_8_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x0\n\t" "ADD r0, sp, #0x0\n\t"
@ -3345,7 +3433,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert1_%=\n\t" "BNE L_fe_invert1_%=\n\t"
#else
"BNE.N L_fe_invert1_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -3362,7 +3454,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert2_%=\n\t" "BNE L_fe_invert2_%=\n\t"
#else
"BNE.N L_fe_invert2_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3379,7 +3475,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert3_%=\n\t" "BNE L_fe_invert3_%=\n\t"
#else
"BNE.N L_fe_invert3_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3393,7 +3493,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert4_%=\n\t" "BNE L_fe_invert4_%=\n\t"
#else
"BNE.N L_fe_invert4_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -3410,7 +3514,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert5_%=\n\t" "BNE L_fe_invert5_%=\n\t"
#else
"BNE.N L_fe_invert5_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3427,7 +3535,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert6_%=\n\t" "BNE L_fe_invert6_%=\n\t"
#else
"BNE.N L_fe_invert6_%=\n\t"
#endif
"ADD r2, sp, #0x40\n\t" "ADD r2, sp, #0x40\n\t"
"ADD r1, sp, #0x60\n\t" "ADD r1, sp, #0x60\n\t"
"ADD r0, sp, #0x40\n\t" "ADD r0, sp, #0x40\n\t"
@ -3441,7 +3553,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert7_%=\n\t" "BNE L_fe_invert7_%=\n\t"
#else
"BNE.N L_fe_invert7_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -3455,7 +3571,11 @@ void fe_invert(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_invert8_%=\n\t" "BNE L_fe_invert8_%=\n\t"
#else
"BNE.N L_fe_invert8_%=\n\t"
#endif
"MOV r2, sp\n\t" "MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t" "ADD r1, sp, #0x20\n\t"
"LDR r0, [sp, #128]\n\t" "LDR r0, [sp, #128]\n\t"
@ -3981,7 +4101,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_1_%=\n\t" "BNE L_fe_pow22523_1_%=\n\t"
#else
"BNE.N L_fe_pow22523_1_%=\n\t"
#endif
"MOV r2, sp\n\t" "MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t" "ADD r1, sp, #0x20\n\t"
"MOV r0, sp\n\t" "MOV r0, sp\n\t"
@ -3998,7 +4122,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_2_%=\n\t" "BNE L_fe_pow22523_2_%=\n\t"
#else
"BNE.N L_fe_pow22523_2_%=\n\t"
#endif
"MOV r2, sp\n\t" "MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t" "ADD r1, sp, #0x20\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -4015,7 +4143,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_3_%=\n\t" "BNE L_fe_pow22523_3_%=\n\t"
#else
"BNE.N L_fe_pow22523_3_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -4029,7 +4161,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_4_%=\n\t" "BNE L_fe_pow22523_4_%=\n\t"
#else
"BNE.N L_fe_pow22523_4_%=\n\t"
#endif
"MOV r2, sp\n\t" "MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t" "ADD r1, sp, #0x20\n\t"
"MOV r0, sp\n\t" "MOV r0, sp\n\t"
@ -4046,7 +4182,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_5_%=\n\t" "BNE L_fe_pow22523_5_%=\n\t"
#else
"BNE.N L_fe_pow22523_5_%=\n\t"
#endif
"MOV r2, sp\n\t" "MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t" "ADD r1, sp, #0x20\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -4063,7 +4203,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_6_%=\n\t" "BNE L_fe_pow22523_6_%=\n\t"
#else
"BNE.N L_fe_pow22523_6_%=\n\t"
#endif
"ADD r2, sp, #0x20\n\t" "ADD r2, sp, #0x20\n\t"
"ADD r1, sp, #0x40\n\t" "ADD r1, sp, #0x40\n\t"
"ADD r0, sp, #0x20\n\t" "ADD r0, sp, #0x20\n\t"
@ -4077,7 +4221,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_7_%=\n\t" "BNE L_fe_pow22523_7_%=\n\t"
#else
"BNE.N L_fe_pow22523_7_%=\n\t"
#endif
"MOV r2, sp\n\t" "MOV r2, sp\n\t"
"ADD r1, sp, #0x20\n\t" "ADD r1, sp, #0x20\n\t"
"MOV r0, sp\n\t" "MOV r0, sp\n\t"
@ -4091,7 +4239,11 @@ void fe_pow22523(fe r_p, const fe a_p)
"BL fe_sq_op\n\t" "BL fe_sq_op\n\t"
"POP {r12}\n\t" "POP {r12}\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_fe_pow22523_8_%=\n\t" "BNE L_fe_pow22523_8_%=\n\t"
#else
"BNE.N L_fe_pow22523_8_%=\n\t"
#endif
"LDR r2, [sp, #100]\n\t" "LDR r2, [sp, #100]\n\t"
"MOV r1, sp\n\t" "MOV r1, sp\n\t"
"LDR r0, [sp, #96]\n\t" "LDR r0, [sp, #96]\n\t"
@ -5289,7 +5441,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"SUB sp, sp, #0x50\n\t" "SUB sp, sp, #0x50\n\t"
"ADD lr, sp, #0x44\n\t" "ADD lr, sp, #0x44\n\t"
"STM lr, {%[s], %[a], %[c]}\n\t" "STM lr, {%[s], %[a], %[c]}\n\t"
"MOV %[r], #0x0\n\t" "MOV %[s], #0x0\n\t"
"LDR r12, [%[a]]\n\t" "LDR r12, [%[a]]\n\t"
/* A[0] * B[0] */ /* A[0] * B[0] */
"LDR lr, [%[b]]\n\t" "LDR lr, [%[b]]\n\t"
@ -5306,25 +5458,25 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"STR %[c], [sp]\n\t" "STR %[c], [sp]\n\t"
/* A[0] * B[1] */ /* A[0] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"MOV r11, %[r]\n\t" "MOV r11, %[s]\n\t"
"UMLAL r4, r11, r12, lr\n\t" "UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[0] * B[3] */ /* A[0] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADCS r6, r6, #0x0\n\t" "ADCS r6, r6, #0x0\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t" "UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[0] * B[5] */ /* A[0] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADCS r8, r8, #0x0\n\t" "ADCS r8, r8, #0x0\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t" "UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[0] * B[7] */ /* A[0] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADCS r10, r10, #0x0\n\t" "ADCS r10, r10, #0x0\n\t"
"ADC %[c], %[r], #0x0\n\t" "ADC %[c], %[s], #0x0\n\t"
"UMLAL r10, %[c], r12, lr\n\t" "UMLAL r10, %[c], r12, lr\n\t"
/* A[1] * B[0] */ /* A[1] * B[0] */
"LDR r12, [%[a], #4]\n\t" "LDR r12, [%[a], #4]\n\t"
@ -5335,37 +5487,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[1] * B[1] */ /* A[1] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t" "UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t" "ADDS r6, r6, r11\n\t"
/* A[1] * B[2] */ /* A[1] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t" "UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[1] * B[3] */ /* A[1] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t" "UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t" "ADDS r8, r8, r11\n\t"
/* A[1] * B[4] */ /* A[1] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t" "UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[1] * B[5] */ /* A[1] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t" "UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t" "ADDS r10, r10, r11\n\t"
/* A[1] * B[6] */ /* A[1] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t" "UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[1] * B[7] */ /* A[1] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r4, %[r], #0x0\n\t" "ADC r4, %[s], #0x0\n\t"
"UMLAL %[c], r4, r12, lr\n\t" "UMLAL %[c], r4, r12, lr\n\t"
/* A[2] * B[0] */ /* A[2] * B[0] */
"LDR r12, [%[a], #8]\n\t" "LDR r12, [%[a], #8]\n\t"
@ -5376,37 +5528,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r6, r6, r11\n\t" "ADDS r6, r6, r11\n\t"
/* A[2] * B[1] */ /* A[2] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t" "UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[2] * B[2] */ /* A[2] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t" "UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t" "ADDS r8, r8, r11\n\t"
/* A[2] * B[3] */ /* A[2] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t" "UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[2] * B[4] */ /* A[2] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t" "UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t" "ADDS r10, r10, r11\n\t"
/* A[2] * B[5] */ /* A[2] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t" "UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[2] * B[6] */ /* A[2] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t" "UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t" "ADDS r4, r4, r11\n\t"
/* A[2] * B[7] */ /* A[2] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r5, %[r], #0x0\n\t" "ADC r5, %[s], #0x0\n\t"
"UMLAL r4, r5, r12, lr\n\t" "UMLAL r4, r5, r12, lr\n\t"
/* A[3] * B[0] */ /* A[3] * B[0] */
"LDR r12, [%[a], #12]\n\t" "LDR r12, [%[a], #12]\n\t"
@ -5417,37 +5569,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[3] * B[1] */ /* A[3] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t" "UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t" "ADDS r8, r8, r11\n\t"
/* A[3] * B[2] */ /* A[3] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t" "UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[3] * B[3] */ /* A[3] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t" "UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t" "ADDS r10, r10, r11\n\t"
/* A[3] * B[4] */ /* A[3] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t" "UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[3] * B[5] */ /* A[3] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t" "UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t" "ADDS r4, r4, r11\n\t"
/* A[3] * B[6] */ /* A[3] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t" "UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[3] * B[7] */ /* A[3] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r6, %[r], #0x0\n\t" "ADC r6, %[s], #0x0\n\t"
"UMLAL r5, r6, r12, lr\n\t" "UMLAL r5, r6, r12, lr\n\t"
/* A[4] * B[0] */ /* A[4] * B[0] */
"LDR r12, [%[a], #16]\n\t" "LDR r12, [%[a], #16]\n\t"
@ -5458,37 +5610,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r8, r8, r11\n\t" "ADDS r8, r8, r11\n\t"
/* A[4] * B[1] */ /* A[4] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t" "UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[4] * B[2] */ /* A[4] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t" "UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t" "ADDS r10, r10, r11\n\t"
/* A[4] * B[3] */ /* A[4] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t" "UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[4] * B[4] */ /* A[4] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t" "UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t" "ADDS r4, r4, r11\n\t"
/* A[4] * B[5] */ /* A[4] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t" "UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[4] * B[6] */ /* A[4] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t" "UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t" "ADDS r6, r6, r11\n\t"
/* A[4] * B[7] */ /* A[4] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r7, %[r], #0x0\n\t" "ADC r7, %[s], #0x0\n\t"
"UMLAL r6, r7, r12, lr\n\t" "UMLAL r6, r7, r12, lr\n\t"
/* A[5] * B[0] */ /* A[5] * B[0] */
"LDR r12, [%[a], #20]\n\t" "LDR r12, [%[a], #20]\n\t"
@ -5499,37 +5651,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[5] * B[1] */ /* A[5] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r9, r11, r12, lr\n\t" "UMLAL r9, r11, r12, lr\n\t"
"ADDS r10, r10, r11\n\t" "ADDS r10, r10, r11\n\t"
/* A[5] * B[2] */ /* A[5] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t" "UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[5] * B[3] */ /* A[5] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t" "UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t" "ADDS r4, r4, r11\n\t"
/* A[5] * B[4] */ /* A[5] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t" "UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[5] * B[5] */ /* A[5] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t" "UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t" "ADDS r6, r6, r11\n\t"
/* A[5] * B[6] */ /* A[5] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t" "UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[5] * B[7] */ /* A[5] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r8, %[r], #0x0\n\t" "ADC r8, %[s], #0x0\n\t"
"UMLAL r7, r8, r12, lr\n\t" "UMLAL r7, r8, r12, lr\n\t"
/* A[6] * B[0] */ /* A[6] * B[0] */
"LDR r12, [%[a], #24]\n\t" "LDR r12, [%[a], #24]\n\t"
@ -5540,37 +5692,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS r10, r10, r11\n\t" "ADDS r10, r10, r11\n\t"
/* A[6] * B[1] */ /* A[6] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r10, r11, r12, lr\n\t" "UMLAL r10, r11, r12, lr\n\t"
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[6] * B[2] */ /* A[6] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t" "UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t" "ADDS r4, r4, r11\n\t"
/* A[6] * B[3] */ /* A[6] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t" "UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[6] * B[4] */ /* A[6] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t" "UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t" "ADDS r6, r6, r11\n\t"
/* A[6] * B[5] */ /* A[6] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t" "UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[6] * B[6] */ /* A[6] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t" "UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t" "ADDS r8, r8, r11\n\t"
/* A[6] * B[7] */ /* A[6] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r9, %[r], #0x0\n\t" "ADC r9, %[s], #0x0\n\t"
"UMLAL r8, r9, r12, lr\n\t" "UMLAL r8, r9, r12, lr\n\t"
/* A[7] * B[0] */ /* A[7] * B[0] */
"LDR r12, [%[a], #28]\n\t" "LDR r12, [%[a], #28]\n\t"
@ -5581,37 +5733,37 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
"ADDS %[c], %[c], r11\n\t" "ADDS %[c], %[c], r11\n\t"
/* A[7] * B[1] */ /* A[7] * B[1] */
"LDR lr, [%[b], #4]\n\t" "LDR lr, [%[b], #4]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL %[c], r11, r12, lr\n\t" "UMLAL %[c], r11, r12, lr\n\t"
"ADDS r4, r4, r11\n\t" "ADDS r4, r4, r11\n\t"
/* A[7] * B[2] */ /* A[7] * B[2] */
"LDR lr, [%[b], #8]\n\t" "LDR lr, [%[b], #8]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r4, r11, r12, lr\n\t" "UMLAL r4, r11, r12, lr\n\t"
"ADDS r5, r5, r11\n\t" "ADDS r5, r5, r11\n\t"
/* A[7] * B[3] */ /* A[7] * B[3] */
"LDR lr, [%[b], #12]\n\t" "LDR lr, [%[b], #12]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r5, r11, r12, lr\n\t" "UMLAL r5, r11, r12, lr\n\t"
"ADDS r6, r6, r11\n\t" "ADDS r6, r6, r11\n\t"
/* A[7] * B[4] */ /* A[7] * B[4] */
"LDR lr, [%[b], #16]\n\t" "LDR lr, [%[b], #16]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r6, r11, r12, lr\n\t" "UMLAL r6, r11, r12, lr\n\t"
"ADDS r7, r7, r11\n\t" "ADDS r7, r7, r11\n\t"
/* A[7] * B[5] */ /* A[7] * B[5] */
"LDR lr, [%[b], #20]\n\t" "LDR lr, [%[b], #20]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r7, r11, r12, lr\n\t" "UMLAL r7, r11, r12, lr\n\t"
"ADDS r8, r8, r11\n\t" "ADDS r8, r8, r11\n\t"
/* A[7] * B[6] */ /* A[7] * B[6] */
"LDR lr, [%[b], #24]\n\t" "LDR lr, [%[b], #24]\n\t"
"ADC r11, %[r], #0x0\n\t" "ADC r11, %[s], #0x0\n\t"
"UMLAL r8, r11, r12, lr\n\t" "UMLAL r8, r11, r12, lr\n\t"
"ADDS r9, r9, r11\n\t" "ADDS r9, r9, r11\n\t"
/* A[7] * B[7] */ /* A[7] * B[7] */
"LDR lr, [%[b], #28]\n\t" "LDR lr, [%[b], #28]\n\t"
"ADC r10, %[r], #0x0\n\t" "ADC r10, %[s], #0x0\n\t"
"UMLAL r9, r10, r12, lr\n\t" "UMLAL r9, r10, r12, lr\n\t"
"ADD lr, sp, #0x20\n\t" "ADD lr, sp, #0x20\n\t"
"STM lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t" "STM lr, {%[c], r4, r5, r6, r7, r8, r9, r10}\n\t"
@ -6505,4 +6657,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p)
#endif /* HAVE_CURVE25519 || HAVE_ED25519 */ #endif /* HAVE_CURVE25519 || HAVE_ED25519 */
#endif /* !__aarch64__ && __thumb__ */ #endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */ #endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -925,7 +925,11 @@ L_SHA256_transform_len_start:
STR r9, [sp, #60] STR r9, [sp, #60]
ADD r3, r3, #0x40 ADD r3, r3, #0x40
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_start BNE L_SHA256_transform_len_start
#else
BNE.N L_SHA256_transform_len_start
#endif
# Round 0 # Round 0
LDR r5, [r0, #16] LDR r5, [r0, #16]
LDR r6, [r0, #20] LDR r6, [r0, #20]
@ -1466,7 +1470,11 @@ L_SHA256_transform_len_start:
SUBS r2, r2, #0x40 SUBS r2, r2, #0x40
SUB r3, r3, #0xc0 SUB r3, r3, #0xc0
ADD r1, r1, #0x40 ADD r1, r1, #0x40
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA256_transform_len_begin BNE L_SHA256_transform_len_begin
#else
BNE.N L_SHA256_transform_len_begin
#endif
ADD sp, sp, #0xc0 ADD sp, sp, #0xc0
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}
# Cycle Count = 1874 # Cycle Count = 1874

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */ #endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h> #include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifndef NO_SHA256 #ifndef NO_SHA256
#include <wolfssl/wolfcrypt/sha256.h> #include <wolfssl/wolfcrypt/sha256.h>
@ -885,7 +897,11 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"STR r9, [sp, #60]\n\t" "STR r9, [sp, #60]\n\t"
"ADD r3, r3, #0x40\n\t" "ADD r3, r3, #0x40\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA256_transform_len_start_%=\n\t" "BNE L_SHA256_transform_len_start_%=\n\t"
#else
"BNE.N L_SHA256_transform_len_start_%=\n\t"
#endif
/* Round 0 */ /* Round 0 */
"LDR r5, [%[sha256], #16]\n\t" "LDR r5, [%[sha256], #16]\n\t"
"LDR r6, [%[sha256], #20]\n\t" "LDR r6, [%[sha256], #20]\n\t"
@ -1426,7 +1442,11 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
"SUBS %[len], %[len], #0x40\n\t" "SUBS %[len], %[len], #0x40\n\t"
"SUB r3, r3, #0xc0\n\t" "SUB r3, r3, #0xc0\n\t"
"ADD %[data], %[data], #0x40\n\t" "ADD %[data], %[data], #0x40\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA256_transform_len_begin_%=\n\t" "BNE L_SHA256_transform_len_begin_%=\n\t"
#else
"BNE.N L_SHA256_transform_len_begin_%=\n\t"
#endif
"ADD sp, sp, #0xc0\n\t" "ADD sp, sp, #0xc0\n\t"
: [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c)
: :
@ -1438,4 +1458,7 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p)
#endif /* !NO_SHA256 */ #endif /* !NO_SHA256 */
#endif /* !__aarch64__ && __thumb__ */ #endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */ #endif /* WOLFSSL_ARMASM_INLINE */

View File

@ -2319,7 +2319,11 @@ L_SHA512_transform_len_start:
STRD r4, r5, [sp, #120] STRD r4, r5, [sp, #120]
ADD r3, r3, #0x80 ADD r3, r3, #0x80
SUBS r12, r12, #0x1 SUBS r12, r12, #0x1
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA512_transform_len_start BNE L_SHA512_transform_len_start
#else
BNE.N L_SHA512_transform_len_start
#endif
# Round 0 # Round 0
LDRD r4, r5, [r0, #32] LDRD r4, r5, [r0, #32]
LSRS r6, r4, #14 LSRS r6, r4, #14
@ -3652,7 +3656,11 @@ L_SHA512_transform_len_start:
SUBS r2, r2, #0x80 SUBS r2, r2, #0x80
SUB r3, r3, #0x200 SUB r3, r3, #0x200
ADD r1, r1, #0x80 ADD r1, r1, #0x80
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
BNE L_SHA512_transform_len_begin BNE L_SHA512_transform_len_begin
#else
BNE.N L_SHA512_transform_len_begin
#endif
EOR r0, r0, r0 EOR r0, r0, r0
ADD sp, sp, #0xc0 ADD sp, sp, #0xc0
POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc}

View File

@ -37,6 +37,18 @@
#endif /* HAVE_CONFIG_H */ #endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h> #include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__)
#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
#endif /* __IAR_SYSTEMS_ICC__ */
#ifdef __KEIL__
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifdef WOLFSSL_SHA512 #ifdef WOLFSSL_SHA512
#include <wolfssl/wolfcrypt/sha512.h> #include <wolfssl/wolfcrypt/sha512.h>
@ -2207,7 +2219,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"STRD r4, r5, [sp, #120]\n\t" "STRD r4, r5, [sp, #120]\n\t"
"ADD r3, r3, #0x80\n\t" "ADD r3, r3, #0x80\n\t"
"SUBS r12, r12, #0x1\n\t" "SUBS r12, r12, #0x1\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA512_transform_len_start_%=\n\t" "BNE L_SHA512_transform_len_start_%=\n\t"
#else
"BNE.N L_SHA512_transform_len_start_%=\n\t"
#endif
/* Round 0 */ /* Round 0 */
"LDRD r4, r5, [%[sha512], #32]\n\t" "LDRD r4, r5, [%[sha512], #32]\n\t"
"LSRS r6, r4, #14\n\t" "LSRS r6, r4, #14\n\t"
@ -3540,7 +3556,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
"SUBS %[len], %[len], #0x80\n\t" "SUBS %[len], %[len], #0x80\n\t"
"SUB r3, r3, #0x200\n\t" "SUB r3, r3, #0x200\n\t"
"ADD %[data], %[data], #0x80\n\t" "ADD %[data], %[data], #0x80\n\t"
#if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__)
"BNE L_SHA512_transform_len_begin_%=\n\t" "BNE L_SHA512_transform_len_begin_%=\n\t"
#else
"BNE.N L_SHA512_transform_len_begin_%=\n\t"
#endif
"EOR r0, r0, r0\n\t" "EOR r0, r0, r0\n\t"
"ADD sp, sp, #0xc0\n\t" "ADD sp, sp, #0xc0\n\t"
: [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c)
@ -3553,4 +3573,7 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p)
#endif /* WOLFSSL_SHA512 */ #endif /* WOLFSSL_SHA512 */
#endif /* !__aarch64__ && __thumb__ */ #endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) */
#endif /* WOLFSSL_ARMASM */
#endif /* WOLFSSL_ARMASM_INLINE */ #endif /* WOLFSSL_ARMASM_INLINE */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1245,7 +1245,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
: [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \ : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
: "cc" \ : "cc" \
) )
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH >= 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 7)
/* Count leading zeros - instruction only available on ARMv7 and newer. */ /* Count leading zeros - instruction only available on ARMv7 and newer. */
#define SP_ASM_LZCNT(va, vn) \ #define SP_ASM_LZCNT(va, vn) \
__asm__ __volatile__ ( \ __asm__ __volatile__ ( \
@ -1272,7 +1272,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
sp_int_digit d) sp_int_digit d)
{ {
sp_int_digit r = 0; sp_int_digit r = 0;
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
static const char debruijn32[32] = { static const char debruijn32[32] = {
0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19, 0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18 1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
@ -1282,7 +1282,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
__asm__ __volatile__ ( __asm__ __volatile__ (
/* Shift d so that top bit is set. */ /* Shift d so that top bit is set. */
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
"ldr r4, %[m]\n\t" "ldr r4, %[m]\n\t"
"mov r5, %[d]\n\t" "mov r5, %[d]\n\t"
"orr r5, r5, r5, lsr #1\n\t" "orr r5, r5, r5, lsr #1\n\t"
@ -1291,8 +1291,8 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
"orr r5, r5, r5, lsr #8\n\t" "orr r5, r5, r5, lsr #8\n\t"
"orr r5, r5, r5, lsr #16\n\t" "orr r5, r5, r5, lsr #16\n\t"
"add r5, r5, #1\n\t" "add r5, r5, #1\n\t"
"mul r5, r5, r4\n\t" "mul r6, r5, r4\n\t"
"lsr r5, r5, #27\n\t" "lsr r5, r6, #27\n\t"
"ldrb r5, [%[t], r5]\n\t" "ldrb r5, [%[t], r5]\n\t"
#else #else
"clz r5, %[d]\n\t" "clz r5, %[d]\n\t"
@ -1352,7 +1352,7 @@ static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
"sbc r8, r8, r8\n\t" "sbc r8, r8, r8\n\t"
"sub %[r], %[r], r8\n\t" "sub %[r], %[r], r8\n\t"
: [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d) : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7)
: [t] "r" (debruijn32), [m] "m" (debruijn32_mul) : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
#else #else
: :

File diff suppressed because it is too large Load Diff

View File

@ -56741,52 +56741,6 @@ _sp_256_mont_sub_4:
#ifndef __APPLE__ #ifndef __APPLE__
.size sp_256_mont_sub_4,.-sp_256_mont_sub_4 .size sp_256_mont_sub_4,.-sp_256_mont_sub_4
#endif /* __APPLE__ */ #endif /* __APPLE__ */
/* Subtract two Montgomery form numbers (r = a - b % m).
*
* b is less than the modulus.
*
* r Result of subtration.
* a Number to subtract from in Montgomery form.
* b Number to subtract with in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_256_mont_sub_lower_4
.type sp_256_mont_sub_lower_4,@function
.align 16
sp_256_mont_sub_lower_4:
#else
.section __TEXT,__text
.globl _sp_256_mont_sub_lower_4
.p2align 4
_sp_256_mont_sub_lower_4:
#endif /* __APPLE__ */
movq (%rsi), %rax
movq 8(%rsi), %rcx
movq 16(%rsi), %r8
movq 24(%rsi), %r9
subq (%rdx), %rax
movq $0xffffffff, %r10
sbbq 8(%rdx), %rcx
movq $0xffffffff00000001, %r11
sbbq 16(%rdx), %r8
sbbq 24(%rdx), %r9
sbbq %rsi, %rsi
andq %rsi, %r10
andq %rsi, %r11
addq %rsi, %rax
adcq %r10, %rcx
movq %rax, (%rdi)
adcq $0x00, %r8
movq %rcx, 8(%rdi)
adcq %r11, %r9
movq %r8, 16(%rdi)
movq %r9, 24(%rdi)
repz retq
#ifndef __APPLE__
.size sp_256_mont_sub_lower_4,.-sp_256_mont_sub_lower_4
#endif /* __APPLE__ */
/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
* *
* r Result of division by 2. * r Result of division by 2.
@ -56834,71 +56788,6 @@ _sp_256_div2_4:
#ifndef __APPLE__ #ifndef __APPLE__
.size sp_256_div2_4,.-sp_256_div2_4 .size sp_256_div2_4,.-sp_256_div2_4
#endif /* __APPLE__ */ #endif /* __APPLE__ */
/* Triple a Montgomery form number (r = a + a + a % m).
*
* a is less than m.
*
* r Result of Tripling.
* a Number to triple in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_256_mont_tpl_lower_4
.type sp_256_mont_tpl_lower_4,@function
.align 16
sp_256_mont_tpl_lower_4:
#else
.section __TEXT,__text
.globl _sp_256_mont_tpl_lower_4
.p2align 4
_sp_256_mont_tpl_lower_4:
#endif /* __APPLE__ */
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
addq %rdx, %rdx
movq $0xffffffff, %r9
adcq %rax, %rax
movq $0xffffffff00000001, %r10
adcq %rcx, %rcx
adcq %r8, %r8
sbbq %r11, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
addq (%rsi), %rdx
movq $0xffffffff, %r9
adcq 8(%rsi), %rax
movq $0xffffffff00000001, %r10
adcq 16(%rsi), %rcx
adcq 24(%rsi), %r8
sbbq %r11, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
sbbq $0x00, %rcx
sbbq %r10, %r8
adcq $0x00, %r11
andq %r11, %r9
andq %r11, %r10
subq %r11, %rdx
sbbq %r9, %rax
movq %rdx, (%rdi)
sbbq $0x00, %rcx
movq %rax, 8(%rdi)
sbbq %r10, %r8
movq %rcx, 16(%rdi)
movq %r8, 24(%rdi)
repz retq
#ifndef __APPLE__
.size sp_256_mont_tpl_lower_4,.-sp_256_mont_tpl_lower_4
#endif /* __APPLE__ */
/* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m). /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
* *
* r Result of subtration. * r Result of subtration.
@ -61241,68 +61130,6 @@ _sp_384_mont_sub_6:
#ifndef __APPLE__ #ifndef __APPLE__
.size sp_384_mont_sub_6,.-sp_384_mont_sub_6 .size sp_384_mont_sub_6,.-sp_384_mont_sub_6
#endif /* __APPLE__ */ #endif /* __APPLE__ */
/* Subtract two Montgomery form numbers (r = a - b % m).
*
* b is less than the modulus.
*
* r Result of subtration.
* a Number to subtract from in Montgomery form.
* b Number to subtract with in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_384_mont_sub_lower_6
.type sp_384_mont_sub_lower_6,@function
.align 16
sp_384_mont_sub_lower_6:
#else
.section __TEXT,__text
.globl _sp_384_mont_sub_lower_6
.p2align 4
_sp_384_mont_sub_lower_6:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
movq (%rsi), %rax
movq 8(%rsi), %rcx
movq 16(%rsi), %r8
movq 24(%rsi), %r9
movq 32(%rsi), %r10
movq 40(%rsi), %r11
subq (%rdx), %rax
movq $0xffffffff, %r12
sbbq 8(%rdx), %rcx
movq $0xffffffff00000000, %r13
sbbq 16(%rdx), %r8
movq $0xfffffffffffffffe, %r14
sbbq 24(%rdx), %r9
sbbq 32(%rdx), %r10
sbbq 40(%rdx), %r11
sbbq %rsi, %rsi
andq %rsi, %r12
andq %rsi, %r13
andq %rsi, %r14
addq %r12, %rax
adcq %r13, %rcx
movq %rax, (%rdi)
adcq %r14, %r8
movq %rcx, 8(%rdi)
adcq %rsi, %r9
movq %r8, 16(%rdi)
adcq %rsi, %r10
movq %r9, 24(%rdi)
adcq %rsi, %r11
movq %r10, 32(%rdi)
movq %r11, 40(%rdi)
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sp_384_mont_sub_lower_6,.-sp_384_mont_sub_lower_6
#endif /* __APPLE__ */
/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
* *
* r Result of division by 2. * r Result of division by 2.
@ -61380,158 +61207,6 @@ _sp_384_div2_6:
#ifndef __APPLE__ #ifndef __APPLE__
.size sp_384_div2_6,.-sp_384_div2_6 .size sp_384_div2_6,.-sp_384_div2_6
#endif /* __APPLE__ */ #endif /* __APPLE__ */
/* Double a Montgomery form number (r = a + a % m).
*
* a is less than m.
*
* r Result of doubling.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_384_mont_dbl_lower_6
.type sp_384_mont_dbl_lower_6,@function
.align 16
sp_384_mont_dbl_lower_6:
#else
.section __TEXT,__text
.globl _sp_384_mont_dbl_lower_6
.p2align 4
_sp_384_mont_dbl_lower_6:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
movq 32(%rsi), %r9
movq 40(%rsi), %r10
addq %rdx, %rdx
movq $0xffffffff, %r11
adcq %rax, %rax
movq $0xffffffff00000000, %r12
adcq %rcx, %rcx
movq $0xfffffffffffffffe, %r13
adcq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
sbbq %r14, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
movq %rdx, (%rdi)
sbbq %r13, %rcx
movq %rax, 8(%rdi)
sbbq %r14, %r8
movq %rcx, 16(%rdi)
sbbq %r14, %r9
movq %r8, 24(%rdi)
sbbq %r14, %r10
movq %r9, 32(%rdi)
movq %r10, 40(%rdi)
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sp_384_mont_dbl_lower_6,.-sp_384_mont_dbl_lower_6
#endif /* __APPLE__ */
/* Double a Montgomery form number (r = a + a % m).
*
* a is less than m.
*
* r Result of doubling.
* a Number to double in Montgomery form.
* m Modulus (prime).
*/
#ifndef __APPLE__
.text
.globl sp_384_mont_tpl_lower_6
.type sp_384_mont_tpl_lower_6,@function
.align 16
sp_384_mont_tpl_lower_6:
#else
.section __TEXT,__text
.globl _sp_384_mont_tpl_lower_6
.p2align 4
_sp_384_mont_tpl_lower_6:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
movq 32(%rsi), %r9
movq 40(%rsi), %r10
addq %rdx, %rdx
movq $0xffffffff, %r11
adcq %rax, %rax
movq $0xffffffff00000000, %r12
adcq %rcx, %rcx
movq $0xfffffffffffffffe, %r13
adcq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
sbbq %r14, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
movq %rdx, (%rdi)
sbbq %r13, %rcx
sbbq %r14, %r8
sbbq %r14, %r9
sbbq %r14, %r10
addq (%rsi), %rdx
movq $0xffffffff, %r11
adcq 8(%rsi), %rax
movq $0xffffffff00000000, %r12
adcq 16(%rsi), %rcx
movq $0xfffffffffffffffe, %r13
adcq 24(%rsi), %r8
adcq 32(%rsi), %r9
adcq 40(%rsi), %r10
sbbq %r14, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
sbbq %r13, %rcx
sbbq %r14, %r8
sbbq %r14, %r9
sbbq %r14, %r10
adcq $0x00, %r14
andq %r14, %r11
andq %r14, %r12
andq %r14, %r13
subq %r11, %rdx
sbbq %r12, %rax
movq %rdx, (%rdi)
sbbq %r13, %rcx
movq %rax, 8(%rdi)
sbbq %r14, %r8
movq %rcx, 16(%rdi)
sbbq %r14, %r9
movq %r8, 24(%rdi)
sbbq %r14, %r10
movq %r9, 32(%rdi)
movq %r10, 40(%rdi)
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sp_384_mont_tpl_lower_6,.-sp_384_mont_tpl_lower_6
#endif /* __APPLE__ */
#ifndef WC_NO_CACHE_RESISTANT #ifndef WC_NO_CACHE_RESISTANT
/* Touch each possible point that could be being copied. /* Touch each possible point that could be being copied.
* *

View File

@ -55582,45 +55582,6 @@ sp_256_mont_sub_4 PROC
ret ret
sp_256_mont_sub_4 ENDP sp_256_mont_sub_4 ENDP
_text ENDS _text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * b is less than the modulus.
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_sub_lower_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
sub rax, QWORD PTR [r8]
mov r12, 4294967295
sbb r9, QWORD PTR [r8+8]
mov r13, 18446744069414584321
sbb r10, QWORD PTR [r8+16]
sbb r11, QWORD PTR [r8+24]
sbb rdx, rdx
and r12, rdx
and r13, rdx
add rax, rdx
adc r9, r12
mov QWORD PTR [rcx], rax
adc r10, 0
mov QWORD PTR [rcx+8], r9
adc r11, r13
mov QWORD PTR [rcx+16], r10
mov QWORD PTR [rcx+24], r11
pop r13
pop r12
ret
sp_256_mont_sub_lower_4 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; * ; *
; * r Result of division by 2. ; * r Result of division by 2.
@ -55661,64 +55622,6 @@ sp_256_div2_4 PROC
ret ret
sp_256_div2_4 ENDP sp_256_div2_4 ENDP
_text ENDS _text ENDS
; /* Triple a Montgomery form number (r = a + a + a % m).
; *
; * a is less than m.
; *
; * r Result of Tripling.
; * a Number to triple in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_256_mont_tpl_lower_4 PROC
push r12
push r13
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
add rax, rax
mov r11, 4294967295
adc r8, r8
mov r12, 18446744069414584321
adc r9, r9
adc r10, r10
sbb r13, r13
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
add rax, QWORD PTR [rdx]
mov r11, 4294967295
adc r8, QWORD PTR [rdx+8]
mov r12, 18446744069414584321
adc r9, QWORD PTR [rdx+16]
adc r10, QWORD PTR [rdx+24]
sbb r13, r13
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
sbb r9, 0
sbb r10, r12
adc r13, 0
and r11, r13
and r12, r13
sub rax, r13
sbb r8, r11
mov QWORD PTR [rcx], rax
sbb r9, 0
mov QWORD PTR [rcx+8], r8
sbb r10, r12
mov QWORD PTR [rcx+16], r9
mov QWORD PTR [rcx+24], r10
pop r13
pop r12
ret
sp_256_mont_tpl_lower_4 ENDP
_text ENDS
; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m). ; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
; * ; *
; * r Result of subtration. ; * r Result of subtration.
@ -59792,61 +59695,6 @@ sp_384_mont_sub_6 PROC
ret ret
sp_384_mont_sub_6 ENDP sp_384_mont_sub_6 ENDP
_text ENDS _text ENDS
; /* Subtract two Montgomery form numbers (r = a - b % m).
; *
; * b is less than the modulus.
; *
; * r Result of subtration.
; * a Number to subtract from in Montgomery form.
; * b Number to subtract with in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_sub_lower_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r9, QWORD PTR [rdx+8]
mov r10, QWORD PTR [rdx+16]
mov r11, QWORD PTR [rdx+24]
mov r12, QWORD PTR [rdx+32]
mov r13, QWORD PTR [rdx+40]
sub rax, QWORD PTR [r8]
mov r14, 4294967295
sbb r9, QWORD PTR [r8+8]
mov r15, 18446744069414584320
sbb r10, QWORD PTR [r8+16]
mov rdi, 18446744073709551614
sbb r11, QWORD PTR [r8+24]
sbb r12, QWORD PTR [r8+32]
sbb r13, QWORD PTR [r8+40]
sbb rdx, rdx
and r14, rdx
and r15, rdx
and rdi, rdx
add rax, r14
adc r9, r15
mov QWORD PTR [rcx], rax
adc r10, rdi
mov QWORD PTR [rcx+8], r9
adc r11, rdx
mov QWORD PTR [rcx+16], r10
adc r12, rdx
mov QWORD PTR [rcx+24], r11
adc r13, rdx
mov QWORD PTR [rcx+32], r12
mov QWORD PTR [rcx+40], r13
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_sub_lower_6 ENDP
_text ENDS
; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) ; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
; * ; *
; * r Result of division by 2. ; * r Result of division by 2.
@ -59917,144 +59765,6 @@ sp_384_div2_6 PROC
ret ret
sp_384_div2_6 ENDP sp_384_div2_6 ENDP
_text ENDS _text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * a is less than m.
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_dbl_lower_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
add rax, rax
mov r13, 4294967295
adc r8, r8
mov r14, 18446744069414584320
adc r9, r9
mov r15, 18446744073709551614
adc r10, r10
adc r11, r11
adc r12, r12
sbb rdi, rdi
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
mov QWORD PTR [rcx+8], r8
sbb r10, rdi
mov QWORD PTR [rcx+16], r9
sbb r11, rdi
mov QWORD PTR [rcx+24], r10
sbb r12, rdi
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_dbl_lower_6 ENDP
_text ENDS
; /* Double a Montgomery form number (r = a + a % m).
; *
; * a is less than m.
; *
; * r Result of doubling.
; * a Number to double in Montgomery form.
; * m Modulus (prime).
; */
_text SEGMENT READONLY PARA
sp_384_mont_tpl_lower_6 PROC
push r12
push r13
push r14
push r15
push rdi
mov rax, QWORD PTR [rdx]
mov r8, QWORD PTR [rdx+8]
mov r9, QWORD PTR [rdx+16]
mov r10, QWORD PTR [rdx+24]
mov r11, QWORD PTR [rdx+32]
mov r12, QWORD PTR [rdx+40]
add rax, rax
mov r13, 4294967295
adc r8, r8
mov r14, 18446744069414584320
adc r9, r9
mov r15, 18446744073709551614
adc r10, r10
adc r11, r11
adc r12, r12
sbb rdi, rdi
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
add rax, QWORD PTR [rdx]
mov r13, 4294967295
adc r8, QWORD PTR [rdx+8]
mov r14, 18446744069414584320
adc r9, QWORD PTR [rdx+16]
mov r15, 18446744073709551614
adc r10, QWORD PTR [rdx+24]
adc r11, QWORD PTR [rdx+32]
adc r12, QWORD PTR [rdx+40]
sbb rdi, rdi
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
sbb r9, r15
sbb r10, rdi
sbb r11, rdi
sbb r12, rdi
adc rdi, 0
and r13, rdi
and r14, rdi
and r15, rdi
sub rax, r13
sbb r8, r14
mov QWORD PTR [rcx], rax
sbb r9, r15
mov QWORD PTR [rcx+8], r8
sbb r10, rdi
mov QWORD PTR [rcx+16], r9
sbb r11, rdi
mov QWORD PTR [rcx+24], r10
sbb r12, rdi
mov QWORD PTR [rcx+32], r11
mov QWORD PTR [rcx+40], r12
pop rdi
pop r15
pop r14
pop r13
pop r12
ret
sp_384_mont_tpl_lower_6 ENDP
_text ENDS
IFNDEF WC_NO_CACHE_RESISTANT IFNDEF WC_NO_CACHE_RESISTANT
; /* Touch each possible point that could be being copied. ; /* Touch each possible point that could be being copied.
; * ; *