Merge pull request #2275 from SparkiDev/sha512_arm32

Add ARM32 NEON code for SHA-512/384
This commit is contained in:
toddouska
2019-06-12 16:19:20 -07:00
committed by GitHub
11 changed files with 16449 additions and 6398 deletions

View File

@ -236,6 +236,7 @@ if BUILD_SHA512
if BUILD_ARMASM if BUILD_ARMASM
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512.c
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512-asm.S src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha512-asm.S
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha512-asm.S
else else
src_libwolfssl_la_SOURCES += wolfcrypt/src/sha512.c src_libwolfssl_la_SOURCES += wolfcrypt/src/sha512.c
if BUILD_INTELASM if BUILD_INTELASM
@ -387,7 +388,7 @@ if BUILD_INTELASM
src_libwolfssl_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S src_libwolfssl_la_SOURCES += wolfcrypt/src/fe_x25519_asm.S
else else
if BUILD_ARMASM if BUILD_ARMASM
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv7-curve25519.S src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-curve25519.S
src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.S src_libwolfssl_la_SOURCES += wolfcrypt/src/port/arm/armv8-curve25519.S
else else
src_libwolfssl_la_SOURCES += wolfcrypt/src/fe_operations.c src_libwolfssl_la_SOURCES += wolfcrypt/src/fe_operations.c

View File

@ -52,8 +52,9 @@ EXTRA_DIST += wolfcrypt/src/port/ti/ti-aes.c \
wolfcrypt/src/port/arm/armv8-sha256.c \ wolfcrypt/src/port/arm/armv8-sha256.c \
wolfcrypt/src/port/arm/armv8-chacha.c \ wolfcrypt/src/port/arm/armv8-chacha.c \
wolfcrypt/src/port/arm/armv8-curve25519.c \ wolfcrypt/src/port/arm/armv8-curve25519.c \
wolfcrypt/src/port/arm/armv7-curve25519.c \ wolfcrypt/src/port/arm/armv8-32-curve25519.c \
wolfcrypt/src/port/arm/armv8-sha512-asm.c \ wolfcrypt/src/port/arm/armv8-sha512-asm.c \
wolfcrypt/src/port/arm/armv8-32-sha512-asm.c \
wolfcrypt/src/port/nxp/ksdk_port.c \ wolfcrypt/src/port/nxp/ksdk_port.c \
wolfcrypt/src/port/atmel/README.md \ wolfcrypt/src/port/atmel/README.md \
wolfcrypt/src/port/xilinx/xil-sha3.c \ wolfcrypt/src/port/xilinx/xil-sha3.c \

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* armv7-curve25519 /* armv8-32-curve25519
* *
* Copyright (C) 2006-2019 wolfSSL Inc. * Copyright (C) 2006-2019 wolfSSL Inc.
* *
@ -19,18 +19,22 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/ */
/* Generated using (from wolfssl):
* cd ../scripts
* ruby ./x25519/x25519.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-curve25519.S
*/
#ifndef __aarch64__ #ifndef __aarch64__
.text .text
.globl fe_init .align 2
.type fe_init, %function .globl fe_init
.align 2 .type fe_init, %function
fe_init: fe_init:
bx lr bx lr
.size fe_init,.-fe_init .size fe_init,.-fe_init
.text .text
.globl fe_frombytes .align 2
.type fe_frombytes, %function .globl fe_frombytes
.align 2 .type fe_frombytes, %function
fe_frombytes: fe_frombytes:
push {r4, r5, r6, r7, lr} push {r4, r5, r6, r7, lr}
ldrd r2, r3, [r1] ldrd r2, r3, [r1]
@ -45,11 +49,11 @@ fe_frombytes:
strd r4, r5, [r0, #16] strd r4, r5, [r0, #16]
strd r6, r7, [r0, #24] strd r6, r7, [r0, #24]
pop {r4, r5, r6, r7, pc} pop {r4, r5, r6, r7, pc}
.size fe_frombytes,.-fe_frombytes .size fe_frombytes,.-fe_frombytes
.text .text
.globl fe_tobytes .align 2
.type fe_tobytes, %function .globl fe_tobytes
.align 2 .type fe_tobytes, %function
fe_tobytes: fe_tobytes:
push {r4, r5, r6, r7, r8, lr} push {r4, r5, r6, r7, r8, lr}
ldrd r2, r3, [r1] ldrd r2, r3, [r1]
@ -82,11 +86,11 @@ fe_tobytes:
strd r4, r5, [r0, #16] strd r4, r5, [r0, #16]
strd r6, r7, [r0, #24] strd r6, r7, [r0, #24]
pop {r4, r5, r6, r7, r8, pc} pop {r4, r5, r6, r7, r8, pc}
.size fe_tobytes,.-fe_tobytes .size fe_tobytes,.-fe_tobytes
.text .text
.globl fe_1 .align 2
.type fe_1, %function .globl fe_1
.align 2 .type fe_1, %function
fe_1: fe_1:
# Set one # Set one
mov r2, #1 mov r2, #1
@ -100,11 +104,11 @@ fe_1:
str r1, [r0, #24] str r1, [r0, #24]
str r1, [r0, #28] str r1, [r0, #28]
bx lr bx lr
.size fe_1,.-fe_1 .size fe_1,.-fe_1
.text .text
.globl fe_0 .align 2
.type fe_0, %function .globl fe_0
.align 2 .type fe_0, %function
fe_0: fe_0:
# Set zero # Set zero
mov r1, #0 mov r1, #0
@ -117,11 +121,11 @@ fe_0:
str r1, [r0, #24] str r1, [r0, #24]
str r1, [r0, #28] str r1, [r0, #28]
bx lr bx lr
.size fe_0,.-fe_0 .size fe_0,.-fe_0
.text .text
.globl fe_copy .align 2
.type fe_copy, %function .globl fe_copy
.align 2 .type fe_copy, %function
fe_copy: fe_copy:
push {lr} push {lr}
# Copy # Copy
@ -138,11 +142,11 @@ fe_copy:
str r12, [r0, #24] str r12, [r0, #24]
str lr, [r0, #28] str lr, [r0, #28]
pop {pc} pop {pc}
.size fe_copy,.-fe_copy .size fe_copy,.-fe_copy
.text .text
.globl fe_sub .align 2
.type fe_sub, %function .globl fe_sub
.align 2 .type fe_sub, %function
fe_sub: fe_sub:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
# Sub # Sub
@ -189,11 +193,11 @@ fe_sub:
strd r6, r7, [r0, #16] strd r6, r7, [r0, #16]
strd r8, r9, [r0, #24] strd r8, r9, [r0, #24]
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_sub,.-fe_sub .size fe_sub,.-fe_sub
.text .text
.globl fe_add .align 2
.type fe_add, %function .globl fe_add
.align 2 .type fe_add, %function
fe_add: fe_add:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
# Add # Add
@ -240,11 +244,11 @@ fe_add:
strd r6, r7, [r0, #16] strd r6, r7, [r0, #16]
strd r8, r9, [r0, #24] strd r8, r9, [r0, #24]
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_add,.-fe_add .size fe_add,.-fe_add
.text .text
.globl fe_neg .align 2
.type fe_neg, %function .globl fe_neg
.align 2 .type fe_neg, %function
fe_neg: fe_neg:
push {r4, r5, lr} push {r4, r5, lr}
mov r5, #-1 mov r5, #-1
@ -271,11 +275,11 @@ fe_neg:
str r12, [r0, #24] str r12, [r0, #24]
str lr, [r0, #28] str lr, [r0, #28]
pop {r4, r5, pc} pop {r4, r5, pc}
.size fe_neg,.-fe_neg .size fe_neg,.-fe_neg
.text .text
.globl fe_isnonzero .align 2
.type fe_isnonzero, %function .globl fe_isnonzero
.align 2 .type fe_isnonzero, %function
fe_isnonzero: fe_isnonzero:
push {r4, r5, r6, r7, r8, lr} push {r4, r5, r6, r7, r8, lr}
ldrd r2, r3, [r0] ldrd r2, r3, [r0]
@ -310,11 +314,11 @@ fe_isnonzero:
orr r2, r2, r6 orr r2, r2, r6
orr r0, r2, r12 orr r0, r2, r12
pop {r4, r5, r6, r7, r8, pc} pop {r4, r5, r6, r7, r8, pc}
.size fe_isnonzero,.-fe_isnonzero .size fe_isnonzero,.-fe_isnonzero
.text .text
.globl fe_isnegative .align 2
.type fe_isnegative, %function .globl fe_isnegative
.align 2 .type fe_isnegative, %function
fe_isnegative: fe_isnegative:
push {lr} push {lr}
ldrd r2, r3, [r0] ldrd r2, r3, [r0]
@ -336,11 +340,11 @@ fe_isnegative:
lsr r1, r1, #31 lsr r1, r1, #31
eor r0, r0, r1 eor r0, r0, r1
pop {pc} pop {pc}
.size fe_isnegative,.-fe_isnegative .size fe_isnegative,.-fe_isnegative
.text .text
.globl fe_cmov_table .align 2
.type fe_cmov_table, %function .globl fe_cmov_table
.align 2 .type fe_cmov_table, %function
fe_cmov_table: fe_cmov_table:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sxtb r2, r2 sxtb r2, r2
@ -1319,11 +1323,11 @@ fe_cmov_table:
str r5, [r0, #88] str r5, [r0, #88]
str r6, [r0, #92] str r6, [r0, #92]
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_cmov_table,.-fe_cmov_table .size fe_cmov_table,.-fe_cmov_table
.text .text
.globl fe_mul .align 2
.type fe_mul, %function .globl fe_mul
.align 2 .type fe_mul, %function
fe_mul: fe_mul:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x40 sub sp, sp, #0x40
@ -1848,11 +1852,11 @@ fe_mul:
strd r10, r11, [r0, #24] strd r10, r11, [r0, #24]
add sp, sp, #0x40 add sp, sp, #0x40
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_mul,.-fe_mul .size fe_mul,.-fe_mul
.text .text
.globl fe_sq .align 2
.type fe_sq, %function .globl fe_sq
.align 2 .type fe_sq, %function
fe_sq: fe_sq:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x40 sub sp, sp, #0x40
@ -2269,11 +2273,11 @@ fe_sq:
strd r10, r11, [r0, #24] strd r10, r11, [r0, #24]
add sp, sp, #0x40 add sp, sp, #0x40
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_sq,.-fe_sq .size fe_sq,.-fe_sq
.text .text
.globl fe_mul121666 .align 2
.type fe_mul121666, %function .globl fe_mul121666
.align 2 .type fe_mul121666, %function
fe_mul121666: fe_mul121666:
push {r4, r5, r6, r7, r8, r9, r10, lr} push {r4, r5, r6, r7, r8, r9, r10, lr}
# Multiply by 121666 # Multiply by 121666
@ -2323,11 +2327,11 @@ fe_mul121666:
strd r6, r7, [r0, #16] strd r6, r7, [r0, #16]
strd r8, r9, [r0, #24] strd r8, r9, [r0, #24]
pop {r4, r5, r6, r7, r8, r9, r10, pc} pop {r4, r5, r6, r7, r8, r9, r10, pc}
.size fe_mul121666,.-fe_mul121666 .size fe_mul121666,.-fe_mul121666
.text .text
.globl fe_sq2 .align 2
.type fe_sq2, %function .globl fe_sq2
.align 2 .type fe_sq2, %function
fe_sq2: fe_sq2:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x40 sub sp, sp, #0x40
@ -2759,11 +2763,11 @@ fe_sq2:
strd r10, r11, [r0, #24] strd r10, r11, [r0, #24]
add sp, sp, #0x40 add sp, sp, #0x40
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_sq2,.-fe_sq2 .size fe_sq2,.-fe_sq2
.text .text
.globl fe_invert .align 2
.type fe_invert, %function .globl fe_invert
.align 2 .type fe_invert, %function
fe_invert: fe_invert:
push {r4, lr} push {r4, lr}
sub sp, sp, #0x88 sub sp, sp, #0x88
@ -2787,110 +2791,110 @@ fe_invert:
mov r1, sp mov r1, sp
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
add r0, sp, #64 add r0, sp, #0x40
mov r1, sp mov r1, sp
bl fe_sq bl fe_sq
add r0, sp, #32 add r0, sp, #32
add r1, sp, #32 add r1, sp, #32
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
mov r4, #4 mov r4, #4
L_fe_invert1: L_fe_invert1:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert1 bne L_fe_invert1
add r0, sp, #32 add r0, sp, #32
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
mov r4, #9 mov r4, #9
L_fe_invert2: L_fe_invert2:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert2 bne L_fe_invert2
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
mov r4, #19 mov r4, #19
L_fe_invert3: L_fe_invert3:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert3 bne L_fe_invert3
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
mov r4, #10 mov r4, #10
L_fe_invert4: L_fe_invert4:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert4 bne L_fe_invert4
add r0, sp, #32 add r0, sp, #32
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
mov r4, #0x31 mov r4, #49
L_fe_invert5: L_fe_invert5:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert5 bne L_fe_invert5
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
mov r4, #0x63 mov r4, #0x63
L_fe_invert6: L_fe_invert6:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert6 bne L_fe_invert6
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
mov r4, #0x32 mov r4, #50
L_fe_invert7: L_fe_invert7:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_invert7 bne L_fe_invert7
add r0, sp, #32 add r0, sp, #32
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
mov r4, #5 mov r4, #5
@ -2909,11 +2913,11 @@ L_fe_invert8:
ldr r0, [sp, #128] ldr r0, [sp, #128]
add sp, sp, #0x88 add sp, sp, #0x88
pop {r4, pc} pop {r4, pc}
.size fe_invert,.-fe_invert .size fe_invert,.-fe_invert
.text .text
.globl curve25519 .align 2
.type curve25519, %function .globl curve25519
.align 2 .type curve25519, %function
curve25519: curve25519:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0xbc sub sp, sp, #0xbc
@ -3282,7 +3286,7 @@ L_curve25519_bits:
strd r10, r11, [sp, #120] strd r10, r11, [sp, #120]
ldr r2, [sp, #160] ldr r2, [sp, #160]
add r1, sp, #0x60 add r1, sp, #0x60
add r0, sp, #0x20 add r0, sp, #32
bl fe_mul bl fe_mul
add r2, sp, #0x80 add r2, sp, #0x80
add r1, sp, #0 add r1, sp, #0
@ -3529,7 +3533,7 @@ L_curve25519_bits:
strd r10, r11, [sp, #120] strd r10, r11, [sp, #120]
add r2, sp, #0 add r2, sp, #0
ldr r1, [sp, #168] ldr r1, [sp, #168]
add r0, sp, #0x20 add r0, sp, #32
bl fe_mul bl fe_mul
add r2, sp, #0x60 add r2, sp, #0x60
add r1, sp, #0x80 add r1, sp, #0x80
@ -3549,136 +3553,136 @@ L_curve25519_bits:
add r0, sp, #32 add r0, sp, #32
add r1, sp, #0 add r1, sp, #0
bl fe_sq bl fe_sq
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #0 add r1, sp, #0
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #32 add r0, sp, #32
add r1, sp, #32 add r1, sp, #32
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #96 add r2, sp, #0x60
bl fe_mul bl fe_mul
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
mov r4, #4 mov r4, #4
L_curve25519_inv_1: L_curve25519_inv_1:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_1 bne L_curve25519_inv_1
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
mov r4, #9 mov r4, #9
L_curve25519_inv_2: L_curve25519_inv_2:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_2 bne L_curve25519_inv_2
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #128 add r0, sp, #0x80
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
mov r4, #19 mov r4, #19
L_curve25519_inv_3: L_curve25519_inv_3:
add r0, sp, #128 add r0, sp, #0x80
add r1, sp, #128 add r1, sp, #0x80
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_3 bne L_curve25519_inv_3
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #128 add r1, sp, #0x80
add r2, sp, #96 add r2, sp, #0x60
bl fe_mul bl fe_mul
mov r4, #10 mov r4, #10
L_curve25519_inv_4: L_curve25519_inv_4:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_4 bne L_curve25519_inv_4
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
mov r4, #0x31 mov r4, #49
L_curve25519_inv_5: L_curve25519_inv_5:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_5 bne L_curve25519_inv_5
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
add r0, sp, #128 add r0, sp, #0x80
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
mov r4, #0x63 mov r4, #0x63
L_curve25519_inv_6: L_curve25519_inv_6:
add r0, sp, #128 add r0, sp, #0x80
add r1, sp, #128 add r1, sp, #0x80
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_6 bne L_curve25519_inv_6
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #128 add r1, sp, #0x80
add r2, sp, #96 add r2, sp, #0x60
bl fe_mul bl fe_mul
mov r4, #0x32 mov r4, #50
L_curve25519_inv_7: L_curve25519_inv_7:
add r0, sp, #96 add r0, sp, #0x60
add r1, sp, #96 add r1, sp, #0x60
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_7 bne L_curve25519_inv_7
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #96 add r1, sp, #0x60
add r2, sp, #64 add r2, sp, #0x40
bl fe_mul bl fe_mul
mov r4, #5 mov r4, #5
L_curve25519_inv_8: L_curve25519_inv_8:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_curve25519_inv_8 bne L_curve25519_inv_8
add r0, sp, #0 add r0, sp, #0
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
add r2, sp, #0 add r2, sp, #0
@ -3688,11 +3692,11 @@ L_curve25519_inv_8:
mov r0, #0 mov r0, #0
add sp, sp, #0xbc add sp, sp, #0xbc
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size curve25519,.-curve25519 .size curve25519,.-curve25519
.text .text
.globl fe_pow22523 .align 2
.type fe_pow22523, %function .globl fe_pow22523
.align 2 .type fe_pow22523, %function
fe_pow22523: fe_pow22523:
push {r4, lr} push {r4, lr}
sub sp, sp, #0x68 sub sp, sp, #0x68
@ -3753,19 +3757,19 @@ L_fe_pow22523_2:
add r1, sp, #32 add r1, sp, #32
mov r2, sp mov r2, sp
bl fe_mul bl fe_mul
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
mov r4, #19 mov r4, #19
L_fe_pow22523_3: L_fe_pow22523_3:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_pow22523_3 bne L_fe_pow22523_3
add r0, sp, #32 add r0, sp, #32
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
mov r4, #10 mov r4, #10
@ -3783,7 +3787,7 @@ L_fe_pow22523_4:
add r0, sp, #32 add r0, sp, #32
mov r1, sp mov r1, sp
bl fe_sq bl fe_sq
mov r4, #0x31 mov r4, #49
L_fe_pow22523_5: L_fe_pow22523_5:
add r0, sp, #32 add r0, sp, #32
add r1, sp, #32 add r1, sp, #32
@ -3795,22 +3799,22 @@ L_fe_pow22523_5:
add r1, sp, #32 add r1, sp, #32
mov r2, sp mov r2, sp
bl fe_mul bl fe_mul
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #32 add r1, sp, #32
bl fe_sq bl fe_sq
mov r4, #0x63 mov r4, #0x63
L_fe_pow22523_6: L_fe_pow22523_6:
add r0, sp, #64 add r0, sp, #0x40
add r1, sp, #64 add r1, sp, #0x40
bl fe_sq bl fe_sq
sub r4, r4, #1 sub r4, r4, #1
cmp r4, #0 cmp r4, #0
bne L_fe_pow22523_6 bne L_fe_pow22523_6
add r0, sp, #32 add r0, sp, #32
add r1, sp, #64 add r1, sp, #0x40
add r2, sp, #32 add r2, sp, #32
bl fe_mul bl fe_mul
mov r4, #0x32 mov r4, #50
L_fe_pow22523_7: L_fe_pow22523_7:
add r0, sp, #32 add r0, sp, #32
add r1, sp, #32 add r1, sp, #32
@ -3838,11 +3842,11 @@ L_fe_pow22523_8:
ldr r0, [sp, #96] ldr r0, [sp, #96]
add sp, sp, #0x68 add sp, sp, #0x68
pop {r4, pc} pop {r4, pc}
.size fe_pow22523,.-fe_pow22523 .size fe_pow22523,.-fe_pow22523
.text .text
.globl fe_ge_to_p2 .align 2
.type fe_ge_to_p2, %function .globl fe_ge_to_p2
.align 2 .type fe_ge_to_p2, %function
fe_ge_to_p2: fe_ge_to_p2:
push {lr} push {lr}
sub sp, sp, #16 sub sp, sp, #16
@ -3864,11 +3868,11 @@ fe_ge_to_p2:
bl fe_mul bl fe_mul
add sp, sp, #16 add sp, sp, #16
pop {pc} pop {pc}
.size fe_ge_to_p2,.-fe_ge_to_p2 .size fe_ge_to_p2,.-fe_ge_to_p2
.text .text
.globl fe_ge_to_p3 .align 2
.type fe_ge_to_p3, %function .globl fe_ge_to_p3
.align 2 .type fe_ge_to_p3, %function
fe_ge_to_p3: fe_ge_to_p3:
push {lr} push {lr}
sub sp, sp, #16 sub sp, sp, #16
@ -3894,11 +3898,11 @@ fe_ge_to_p3:
bl fe_mul bl fe_mul
add sp, sp, #16 add sp, sp, #16
pop {pc} pop {pc}
.size fe_ge_to_p3,.-fe_ge_to_p3 .size fe_ge_to_p3,.-fe_ge_to_p3
.text .text
.globl fe_ge_dbl .align 2
.type fe_ge_dbl, %function .globl fe_ge_dbl
.align 2 .type fe_ge_dbl, %function
fe_ge_dbl: fe_ge_dbl:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #16 sub sp, sp, #16
@ -4223,14 +4227,14 @@ fe_ge_dbl:
str r10, [r0, #28] str r10, [r0, #28]
add sp, sp, #16 add sp, sp, #16
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_ge_dbl,.-fe_ge_dbl .size fe_ge_dbl,.-fe_ge_dbl
.text .text
.globl fe_ge_madd .align 2
.type fe_ge_madd, %function .globl fe_ge_madd
.align 2 .type fe_ge_madd, %function
fe_ge_madd: fe_ge_madd:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x20 sub sp, sp, #32
str r0, [sp] str r0, [sp]
str r1, [sp, #4] str r1, [sp, #4]
str r2, [sp, #8] str r2, [sp, #8]
@ -4663,16 +4667,16 @@ fe_ge_madd:
adc r10, r10, lr adc r10, r10, lr
str r9, [r1, #24] str r9, [r1, #24]
str r10, [r1, #28] str r10, [r1, #28]
add sp, sp, #0x20 add sp, sp, #32
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_ge_madd,.-fe_ge_madd .size fe_ge_madd,.-fe_ge_madd
.text .text
.globl fe_ge_msub .align 2
.type fe_ge_msub, %function .globl fe_ge_msub
.align 2 .type fe_ge_msub, %function
fe_ge_msub: fe_ge_msub:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x20 sub sp, sp, #32
str r0, [sp] str r0, [sp]
str r1, [sp, #4] str r1, [sp, #4]
str r2, [sp, #8] str r2, [sp, #8]
@ -5105,13 +5109,13 @@ fe_ge_msub:
adc r10, r10, lr adc r10, r10, lr
str r9, [r1, #24] str r9, [r1, #24]
str r10, [r1, #28] str r10, [r1, #28]
add sp, sp, #0x20 add sp, sp, #32
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_ge_msub,.-fe_ge_msub .size fe_ge_msub,.-fe_ge_msub
.text .text
.globl fe_ge_add .align 2
.type fe_ge_add, %function .globl fe_ge_add
.align 2 .type fe_ge_add, %function
fe_ge_add: fe_ge_add:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x60 sub sp, sp, #0x60
@ -5554,11 +5558,11 @@ fe_ge_add:
str r10, [r1, #28] str r10, [r1, #28]
add sp, sp, #0x60 add sp, sp, #0x60
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_ge_add,.-fe_ge_add .size fe_ge_add,.-fe_ge_add
.text .text
.globl fe_ge_sub .align 2
.type fe_ge_sub, %function .globl fe_ge_sub
.align 2 .type fe_ge_sub, %function
fe_ge_sub: fe_ge_sub:
push {r4, r5, r6, r7, r8, r9, r10, r11, lr} push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
sub sp, sp, #0x60 sub sp, sp, #0x60
@ -6001,5 +6005,5 @@ fe_ge_sub:
str r10, [r1, #28] str r10, [r1, #28]
add sp, sp, #0x60 add sp, sp, #0x60
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size fe_ge_sub,.-fe_ge_sub .size fe_ge_sub,.-fe_ge_sub
#endif /* __aarch64__ */ #endif /* !__aarch64__ */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -93,7 +93,7 @@ int wc_InitSha512_ex(wc_Sha512* sha512, void* heap, int devId)
#endif /* WOLFSSL_SHA512 */ #endif /* WOLFSSL_SHA512 */
#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) #ifndef WOLFSSL_ARMASM
static const word64 K512[80] = { static const word64 K512[80] = {
W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd), W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc), W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
@ -177,7 +177,7 @@ static const word64 K512[80] = {
h(i) += S0(a(i)) + Maj(a(i),b(i),c(i)) h(i) += S0(a(i)) + Maj(a(i),b(i),c(i))
#define DATA sha512->buffer #define DATA sha512->buffer
static int Transform_Sha512(wc_Sha512* sha512) static void Transform_Sha512(wc_Sha512* sha512)
{ {
const word64* K = K512; const word64* K = K512;
word32 j; word32 j;
@ -222,7 +222,7 @@ static int Transform_Sha512(wc_Sha512* sha512)
#undef DATA #undef DATA
#define DATA ((word64*)data) #define DATA ((word64*)data)
static int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) static void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
{ {
const word64* K = K512; const word64* K = K512;
word32 j; word32 j;
@ -324,16 +324,13 @@ static WC_INLINE int Sha512Update(wc_Sha512* sha512, const byte* data, word32 le
} }
if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) { if (sha512->buffLen == WC_SHA512_BLOCK_SIZE) {
#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) #ifndef WOLFSSL_ARMASM
ret = Transform_Sha512(sha512); Transform_Sha512(sha512);
#else #else
ret = Transform_Sha512_Len(sha512, (const byte*)sha512->buffer, Transform_Sha512_Len(sha512, (const byte*)sha512->buffer,
WC_SHA512_BLOCK_SIZE); WC_SHA512_BLOCK_SIZE);
#endif #endif
if (ret == 0) sha512->buffLen = 0;
sha512->buffLen = 0;
else
len = 0;
} }
} }
@ -369,7 +366,6 @@ int wc_Sha512Update(wc_Sha512* sha512, const byte* data, word32 len)
static WC_INLINE int Sha512Final(wc_Sha512* sha512) static WC_INLINE int Sha512Final(wc_Sha512* sha512)
{ {
byte* local = (byte*)sha512->buffer; byte* local = (byte*)sha512->buffer;
int ret;
if (sha512 == NULL) { if (sha512 == NULL) {
return BAD_FUNC_ARG; return BAD_FUNC_ARG;
@ -382,14 +378,12 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512)
XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE - XMEMSET(&local[sha512->buffLen], 0, WC_SHA512_BLOCK_SIZE -
sha512->buffLen); sha512->buffLen);
sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen; sha512->buffLen += WC_SHA512_BLOCK_SIZE - sha512->buffLen;
#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) #ifndef WOLFSSL_ARMASM
ret = Transform_Sha512(sha512); Transform_Sha512(sha512);
#else #else
ret = Transform_Sha512_Len(sha512, (const byte*)sha512->buffer, Transform_Sha512_Len(sha512, (const byte*)sha512->buffer,
WC_SHA512_BLOCK_SIZE); WC_SHA512_BLOCK_SIZE);
#endif #endif
if (ret != 0)
return ret;
sha512->buffLen = 0; sha512->buffLen = 0;
} }
@ -410,14 +404,12 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512)
&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
&(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]), &(sha512->buffer[WC_SHA512_BLOCK_SIZE / sizeof(word64) - 2]),
WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE); WC_SHA512_BLOCK_SIZE - WC_SHA512_PAD_SIZE);
#if !defined(WOLFSSL_ARMASM) || !defined(__aarch64__) #ifndef WOLFSSL_ARMASM
ret = Transform_Sha512(sha512); Transform_Sha512(sha512);
#else #else
ret = Transform_Sha512_Len(sha512, (const byte*)sha512->buffer, Transform_Sha512_Len(sha512, (const byte*)sha512->buffer,
WC_SHA512_BLOCK_SIZE); WC_SHA512_BLOCK_SIZE);
#endif #endif
if (ret != 0)
return ret;
#ifdef LITTLE_ENDIAN_ORDER #ifdef LITTLE_ENDIAN_ORDER
ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE); ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE);

View File

@ -141,9 +141,9 @@ typedef struct wc_Sha512 {
#endif /* HAVE_FIPS */ #endif /* HAVE_FIPS */
#if defined(WOLFSSL_ARMASM) && defined(__aarch64__) #ifdef WOLFSSL_ARMASM
WOLFSSL_LOCAL int Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, WOLFSSL_LOCAL void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data,
word32 len); word32 len);
#endif #endif
#ifdef WOLFSSL_SHA512 #ifdef WOLFSSL_SHA512