diff --git a/configure.ac b/configure.ac index eca16ae45..997c51a1a 100644 --- a/configure.ac +++ b/configure.ac @@ -2582,7 +2582,7 @@ then AC_MSG_NOTICE([64bit ARMv8 found, setting mcpu to generic+crypto]) ;; armv7a*) - AM_CPPFLAGS="$AM_CPPFLAGS -march=armv7-a -mfpu=neon -DWOLFSSL_ARM_ARCH=7" + AM_CPPFLAGS="$AM_CPPFLAGS -march=armv7-a -mfpu=neon -DWOLFSSL_ARM_ARCH=7 -marm" # Include options.h AM_CCASFLAGS="$AM_CCASFLAGS -DEXTERNAL_OPTS_OPENVPN" ENABLED_ARMASM_CRYPTO=no diff --git a/src/include.am b/src/include.am index 8e5a42af0..88cfdb02e 100644 --- a/src/include.am +++ b/src/include.am @@ -157,7 +157,9 @@ endif if BUILD_AES src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c +if BUILD_ARMASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c +endif BUILD_ARMASM if BUILD_ARMASM_NEON if !BUILD_ARMASM_CRYPTO if BUILD_ARMASM_INLINE @@ -409,7 +411,9 @@ endif if !BUILD_FIPS_CURRENT if BUILD_AES src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c +if BUILD_ARMASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c +endif BUILD_ARMASM if BUILD_ARMASM_NEON if !BUILD_ARMASM_CRYPTO if BUILD_ARMASM_INLINE diff --git a/wolfcrypt/src/curve25519.c b/wolfcrypt/src/curve25519.c index fea010599..4d4e6db17 100644 --- a/wolfcrypt/src/curve25519.c +++ b/wolfcrypt/src/curve25519.c @@ -58,7 +58,7 @@ const curve25519_set_type curve25519_sets[] = { } }; -static const unsigned char kCurve25519BasePoint[CURVE25519_KEYSIZE] = {9}; +static const word32 kCurve25519BasePoint[CURVE25519_KEYSIZE/sizeof(word32)] = {9}; /* Curve25519 private key must be less than order */ /* These functions clamp private k and check it */ @@ -133,7 +133,7 @@ int wc_curve25519_make_pub(int public_size, byte* pub, int private_size, SAVE_VECTOR_REGISTERS(return _svr_ret;); - ret = curve25519(pub, priv, kCurve25519BasePoint); + ret = curve25519(pub, priv, (byte*)kCurve25519BasePoint); RESTORE_VECTOR_REGISTERS(); #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index cc6830f95..365d0c342 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -1566,49 +1566,49 @@ AES_decrypt_block: L_AES_decrypt_block_nr: ubfx r8, r7, #16, #8 lsr r11, r4, #24 - ubfx lr, r6, #8, #8 - ubfx r2, r5, #0, #8 + ubfx r12, r6, #8, #8 + ubfx lr, r5, #0, #8 ldr r8, [r0, r8, lsl #2] ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r9, r4, #16, #8 eor r8, r8, r11, ror #24 lsr r11, r5, #24 - eor r8, r8, lr, ror #8 - ubfx lr, r7, #8, #8 - eor r8, r8, r2, ror #16 - ubfx r2, r6, #0, #8 + eor r8, r8, r12, ror #8 + ubfx r12, r7, #8, #8 + eor r8, r8, lr, ror #16 + ubfx lr, r6, #0, #8 ldr r9, [r0, r9, lsl #2] ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r10, r5, #16, #8 eor r9, r9, r11, ror #24 lsr r11, r6, #24 - eor r9, r9, lr, ror #8 - ubfx lr, r4, #8, #8 - eor r9, r9, r2, ror #16 - ubfx r2, r7, #0, #8 + eor r9, r9, r12, ror #8 + ubfx r12, r4, #8, #8 + eor r9, r9, lr, ror #16 + ubfx lr, r7, #0, #8 ldr r10, [r0, r10, lsl #2] ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r4, r4, #0, #8 eor r10, r10, r11, ror #24 ubfx r11, r6, #16, #8 - eor r10, r10, lr, ror #8 - lsr lr, r7, #24 - eor r10, r10, r2, ror #16 - ubfx r2, r5, #8, #8 + eor r10, r10, r12, ror #8 + lsr r12, r7, #24 + eor r10, r10, lr, ror #16 + ubfx lr, r5, #8, #8 ldr r4, [r0, r4, lsl #2] - ldr lr, [r0, lr, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr r11, [r0, r11, lsl #2] - ldr r2, [r0, r2, lsl #2] - eor lr, lr, r4, ror #24 + ldr lr, [r0, lr, lsl #2] + eor r12, r12, r4, ror #24 ldm r3!, {r4, r5, r6, r7} - eor r11, r11, r2, ror #8 - eor r11, r11, lr, ror #24 + eor r11, r11, lr, ror #8 + eor r11, r11, r12, ror #24 # XOR in Key Schedule eor r8, r8, r4 eor r9, r9, r5 @@ -1616,49 +1616,49 @@ L_AES_decrypt_block_nr: eor r11, r11, r7 ubfx r4, r11, #16, #8 lsr r7, r8, #24 - ubfx lr, r10, #8, #8 - ubfx r2, r9, #0, #8 + ubfx r12, r10, #8, #8 + ubfx lr, r9, #0, #8 ldr r4, [r0, r4, lsl #2] ldr r7, [r0, r7, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r5, r8, #16, #8 eor r4, r4, r7, ror #24 lsr r7, r9, #24 - eor r4, r4, lr, ror #8 - ubfx lr, r11, #8, #8 - eor r4, r4, r2, ror #16 - ubfx r2, r10, #0, #8 + eor r4, r4, r12, ror #8 + ubfx r12, r11, #8, #8 + eor r4, r4, lr, ror #16 + ubfx lr, r10, #0, #8 ldr r5, [r0, r5, lsl #2] ldr r7, [r0, r7, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r6, r9, #16, #8 eor r5, r5, r7, ror #24 lsr r7, r10, #24 - eor r5, r5, lr, ror #8 - ubfx lr, r8, #8, #8 - eor r5, r5, r2, ror #16 - ubfx r2, r11, #0, #8 + eor r5, r5, r12, ror #8 + ubfx r12, r8, #8, #8 + eor r5, r5, lr, ror #16 + ubfx lr, r11, #0, #8 ldr r6, [r0, r6, lsl #2] ldr r7, [r0, r7, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r8, r8, #0, #8 eor r6, r6, r7, ror #24 ubfx r7, r10, #16, #8 - eor r6, r6, lr, ror #8 - lsr lr, r11, #24 - eor r6, r6, r2, ror #16 - ubfx r2, r9, #8, #8 + eor r6, r6, r12, ror #8 + lsr r12, r11, #24 + eor r6, r6, lr, ror #16 + ubfx lr, r9, #8, #8 ldr r8, [r0, r8, lsl #2] - ldr lr, [r0, lr, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr r7, [r0, r7, lsl #2] - ldr r2, [r0, r2, lsl #2] - eor lr, lr, r8, ror #24 + ldr lr, [r0, lr, lsl #2] + eor r12, r12, r8, ror #24 ldm r3!, {r8, r9, r10, r11} - eor r7, r7, r2, ror #8 - eor r7, r7, lr, ror #24 + eor r7, r7, lr, ror #8 + eor r7, r7, r12, ror #24 # XOR in Key Schedule eor r4, r4, r8 eor r5, r5, r9 @@ -1668,49 +1668,49 @@ L_AES_decrypt_block_nr: bne L_AES_decrypt_block_nr ubfx r8, r7, #16, #8 lsr r11, r4, #24 - ubfx lr, r6, #8, #8 - ubfx r2, r5, #0, #8 + ubfx r12, r6, #8, #8 + ubfx lr, r5, #0, #8 ldr r8, [r0, r8, lsl #2] ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r9, r4, #16, #8 eor r8, r8, r11, ror #24 lsr r11, r5, #24 - eor r8, r8, lr, ror #8 - ubfx lr, r7, #8, #8 - eor r8, r8, r2, ror #16 - ubfx r2, r6, #0, #8 + eor r8, r8, r12, ror #8 + ubfx r12, r7, #8, #8 + eor r8, r8, lr, ror #16 + ubfx lr, r6, #0, #8 ldr r9, [r0, r9, lsl #2] ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r10, r5, #16, #8 eor r9, r9, r11, ror #24 lsr r11, r6, #24 - eor r9, r9, lr, ror #8 - ubfx lr, r4, #8, #8 - eor r9, r9, r2, ror #16 - ubfx r2, r7, #0, #8 + eor r9, r9, r12, ror #8 + ubfx r12, r4, #8, #8 + eor r9, r9, lr, ror #16 + ubfx lr, r7, #0, #8 ldr r10, [r0, r10, lsl #2] ldr r11, [r0, r11, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr lr, [r0, lr, lsl #2] - ldr r2, [r0, r2, lsl #2] ubfx r4, r4, #0, #8 eor r10, r10, r11, ror #24 ubfx r11, r6, #16, #8 - eor r10, r10, lr, ror #8 - lsr lr, r7, #24 - eor r10, r10, r2, ror #16 - ubfx r2, r5, #8, #8 + eor r10, r10, r12, ror #8 + lsr r12, r7, #24 + eor r10, r10, lr, ror #16 + ubfx lr, r5, #8, #8 ldr r4, [r0, r4, lsl #2] - ldr lr, [r0, lr, lsl #2] + ldr r12, [r0, r12, lsl #2] ldr r11, [r0, r11, lsl #2] - ldr r2, [r0, r2, lsl #2] - eor lr, lr, r4, ror #24 + ldr lr, [r0, lr, lsl #2] + eor r12, r12, r4, ror #24 ldm r3!, {r4, r5, r6, r7} - eor r11, r11, r2, ror #8 - eor r11, r11, lr, ror #24 + eor r11, r11, lr, ror #8 + eor r11, r11, r12, ror #24 # XOR in Key Schedule eor r8, r8, r4 eor r9, r9, r5 @@ -1718,49 +1718,49 @@ L_AES_decrypt_block_nr: eor r11, r11, r7 ubfx r4, r9, #0, #8 ubfx r7, r10, #8, #8 - ubfx lr, r11, #16, #8 - lsr r2, r8, #24 - ldrb r4, [r12, r4] - ldrb r7, [r12, r7] - ldrb lr, [r12, lr] - ldrb r2, [r12, r2] + ubfx r12, r11, #16, #8 + lsr lr, r8, #24 + ldrb r4, [r2, r4] + ldrb r7, [r2, r7] + ldrb r12, [r2, r12] + ldrb lr, [r2, lr] ubfx r5, r10, #0, #8 eor r4, r4, r7, lsl #8 ubfx r7, r11, #8, #8 - eor r4, r4, lr, lsl #16 - ubfx lr, r8, #16, #8 - eor r4, r4, r2, lsl #24 - lsr r2, r9, #24 - ldrb r7, [r12, r7] - ldrb r2, [r12, r2] - ldrb r5, [r12, r5] - ldrb lr, [r12, lr] + eor r4, r4, r12, lsl #16 + ubfx r12, r8, #16, #8 + eor r4, r4, lr, lsl #24 + lsr lr, r9, #24 + ldrb r7, [r2, r7] + ldrb lr, [r2, lr] + ldrb r5, [r2, r5] + ldrb r12, [r2, r12] ubfx r6, r11, #0, #8 eor r5, r5, r7, lsl #8 ubfx r7, r8, #8, #8 - eor r5, r5, lr, lsl #16 - ubfx lr, r9, #16, #8 - eor r5, r5, r2, lsl #24 - lsr r2, r10, #24 - ldrb r7, [r12, r7] - ldrb r2, [r12, r2] - ldrb r6, [r12, r6] - ldrb lr, [r12, lr] + eor r5, r5, r12, lsl #16 + ubfx r12, r9, #16, #8 + eor r5, r5, lr, lsl #24 + lsr lr, r10, #24 + ldrb r7, [r2, r7] + ldrb lr, [r2, lr] + ldrb r6, [r2, r6] + ldrb r12, [r2, r12] lsr r11, r11, #24 eor r6, r6, r7, lsl #8 ubfx r7, r8, #0, #8 - eor r6, r6, lr, lsl #16 - ubfx lr, r9, #8, #8 - eor r6, r6, r2, lsl #24 - ubfx r2, r10, #16, #8 - ldrb r11, [r12, r11] - ldrb lr, [r12, lr] - ldrb r7, [r12, r7] - ldrb r2, [r12, r2] - eor lr, lr, r11, lsl #16 + eor r6, r6, r12, lsl #16 + ubfx r12, r9, #8, #8 + eor r6, r6, lr, lsl #24 + ubfx lr, r10, #16, #8 + ldrb r11, [r2, r11] + ldrb r12, [r2, r12] + ldrb r7, [r2, r7] + ldrb lr, [r2, lr] + eor r12, r12, r11, lsl #16 ldm r3, {r8, r9, r10, r11} - eor r7, r7, lr, lsl #8 - eor r7, r7, r2, lsl #16 + eor r7, r7, r12, lsl #8 + eor r7, r7, lr, lsl #16 # XOR in Key Schedule eor r4, r4, r8 eor r5, r5, r9 @@ -2046,7 +2046,8 @@ AES_ECB_decrypt: mov lr, r0 adr r0, L_AES_ARM32_td_ecb ldr r0, [r0] - adr r12, L_AES_ARM32_td4 + mov r12, r2 + adr r2, L_AES_ARM32_td4 cmp r8, #10 beq L_AES_ECB_decrypt_start_block_128 cmp r8, #12 @@ -2060,7 +2061,7 @@ L_AES_ECB_decrypt_loop_block_256: rev r5, r5 rev r6, r6 rev r7, r7 - push {r1, r2, r3, lr} + push {r1, r3, r12, lr} ldm r3!, {r8, r9, r10, r11} # Round: 0 - XOR in key schedule eor r4, r4, r8 @@ -2069,7 +2070,7 @@ L_AES_ECB_decrypt_loop_block_256: eor r7, r7, r11 mov r1, #6 bl AES_decrypt_block - pop {r1, r2, r3, lr} + pop {r1, r3, r12, lr} rev r4, r4 rev r5, r5 rev r6, r6 @@ -2078,7 +2079,7 @@ L_AES_ECB_decrypt_loop_block_256: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 bne L_AES_ECB_decrypt_loop_block_256 @@ -2093,7 +2094,7 @@ L_AES_ECB_decrypt_loop_block_192: rev r5, r5 rev r6, r6 rev r7, r7 - push {r1, r2, r3, lr} + push {r1, r3, r12, lr} ldm r3!, {r8, r9, r10, r11} # Round: 0 - XOR in key schedule eor r4, r4, r8 @@ -2102,7 +2103,7 @@ L_AES_ECB_decrypt_loop_block_192: eor r7, r7, r11 mov r1, #5 bl AES_decrypt_block - pop {r1, r2, r3, lr} + pop {r1, r3, r12, lr} rev r4, r4 rev r5, r5 rev r6, r6 @@ -2111,7 +2112,7 @@ L_AES_ECB_decrypt_loop_block_192: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 bne L_AES_ECB_decrypt_loop_block_192 @@ -2126,7 +2127,7 @@ L_AES_ECB_decrypt_loop_block_128: rev r5, r5 rev r6, r6 rev r7, r7 - push {r1, r2, r3, lr} + push {r1, r3, r12, lr} ldm r3!, {r8, r9, r10, r11} # Round: 0 - XOR in key schedule eor r4, r4, r8 @@ -2135,7 +2136,7 @@ L_AES_ECB_decrypt_loop_block_128: eor r7, r7, r11 mov r1, #4 bl AES_decrypt_block - pop {r1, r2, r3, lr} + pop {r1, r3, r12, lr} rev r4, r4 rev r5, r5 rev r6, r6 @@ -2144,7 +2145,7 @@ L_AES_ECB_decrypt_loop_block_128: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 bne L_AES_ECB_decrypt_loop_block_128 @@ -2164,14 +2165,15 @@ AES_CBC_decrypt: mov lr, r0 adr r0, L_AES_ARM32_td_ecb ldr r0, [r0] - adr r12, L_AES_ARM32_td4 + mov r12, r2 + adr r2, L_AES_ARM32_td4 push {r3, r4} cmp r8, #10 beq L_AES_CBC_decrypt_loop_block_128 cmp r8, #12 beq L_AES_CBC_decrypt_loop_block_192 L_AES_CBC_decrypt_loop_block_256: - push {r1, r2, lr} + push {r1, r12, lr} ldr r4, [lr] ldr r5, [lr, #4] ldr r6, [lr, #8] @@ -2207,7 +2209,7 @@ L_AES_CBC_decrypt_loop_block_256: rev r6, r6 rev r7, r7 ldm lr, {r8, r9, r10, r11} - pop {r1, r2, lr} + pop {r1, r12, lr} ldr r3, [sp] eor r4, r4, r8 eor r5, r5, r9 @@ -2217,11 +2219,11 @@ L_AES_CBC_decrypt_loop_block_256: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 beq L_AES_CBC_decrypt_end_odd - push {r1, r2, lr} + push {r1, r12, lr} ldr r4, [lr] ldr r5, [lr, #4] ldr r6, [lr, #8] @@ -2268,7 +2270,7 @@ L_AES_CBC_decrypt_loop_block_256: #else ldrd r10, r11, [lr, #24] #endif - pop {r1, r2, lr} + pop {r1, r12, lr} ldr r3, [sp] eor r4, r4, r8 eor r5, r5, r9 @@ -2278,13 +2280,13 @@ L_AES_CBC_decrypt_loop_block_256: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 bne L_AES_CBC_decrypt_loop_block_256 b L_AES_CBC_decrypt_end L_AES_CBC_decrypt_loop_block_192: - push {r1, r2, lr} + push {r1, r12, lr} ldr r4, [lr] ldr r5, [lr, #4] ldr r6, [lr, #8] @@ -2320,7 +2322,7 @@ L_AES_CBC_decrypt_loop_block_192: rev r6, r6 rev r7, r7 ldm lr, {r8, r9, r10, r11} - pop {r1, r2, lr} + pop {r1, r12, lr} ldr r3, [sp] eor r4, r4, r8 eor r5, r5, r9 @@ -2330,11 +2332,11 @@ L_AES_CBC_decrypt_loop_block_192: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 beq L_AES_CBC_decrypt_end_odd - push {r1, r2, lr} + push {r1, r12, lr} ldr r4, [lr] ldr r5, [lr, #4] ldr r6, [lr, #8] @@ -2381,7 +2383,7 @@ L_AES_CBC_decrypt_loop_block_192: #else ldrd r10, r11, [lr, #24] #endif - pop {r1, r2, lr} + pop {r1, r12, lr} ldr r3, [sp] eor r4, r4, r8 eor r5, r5, r9 @@ -2391,13 +2393,13 @@ L_AES_CBC_decrypt_loop_block_192: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 bne L_AES_CBC_decrypt_loop_block_192 b L_AES_CBC_decrypt_end L_AES_CBC_decrypt_loop_block_128: - push {r1, r2, lr} + push {r1, r12, lr} ldr r4, [lr] ldr r5, [lr, #4] ldr r6, [lr, #8] @@ -2433,7 +2435,7 @@ L_AES_CBC_decrypt_loop_block_128: rev r6, r6 rev r7, r7 ldm lr, {r8, r9, r10, r11} - pop {r1, r2, lr} + pop {r1, r12, lr} ldr r3, [sp] eor r4, r4, r8 eor r5, r5, r9 @@ -2443,11 +2445,11 @@ L_AES_CBC_decrypt_loop_block_128: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 beq L_AES_CBC_decrypt_end_odd - push {r1, r2, lr} + push {r1, r12, lr} ldr r4, [lr] ldr r5, [lr, #4] ldr r6, [lr, #8] @@ -2494,7 +2496,7 @@ L_AES_CBC_decrypt_loop_block_128: #else ldrd r10, r11, [lr, #24] #endif - pop {r1, r2, lr} + pop {r1, r12, lr} ldr r3, [sp] eor r4, r4, r8 eor r5, r5, r9 @@ -2504,7 +2506,7 @@ L_AES_CBC_decrypt_loop_block_128: str r5, [r1, #4] str r6, [r1, #8] str r7, [r1, #12] - subs r2, r2, #16 + subs r12, r12, #16 add lr, lr, #16 add r1, r1, #16 bne L_AES_CBC_decrypt_loop_block_128 diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 39959155e..cfa532a58 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -1231,60 +1231,61 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) -void AES_decrypt_block(const uint32_t* td, int nr); -void AES_decrypt_block(const uint32_t* td_p, int nr_p) +void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4); +void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p) { register const uint32_t* td asm ("r0") = (const uint32_t*)td_p; register int nr asm ("r1") = (int)nr_p; + register const uint8_t* td4 asm ("r2") = (const uint8_t*)td4_p; __asm__ __volatile__ ( "\n" "L_AES_decrypt_block_nr_%=: \n\t" "ubfx r8, r7, #16, #8\n\t" "lsr r11, r4, #24\n\t" - "ubfx lr, r6, #8, #8\n\t" - "ubfx r2, r5, #0, #8\n\t" + "ubfx r12, r6, #8, #8\n\t" + "ubfx lr, r5, #0, #8\n\t" "ldr r8, [%[td], r8, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r9, r4, #16, #8\n\t" "eor r8, r8, r11, ror #24\n\t" "lsr r11, r5, #24\n\t" - "eor r8, r8, lr, ror #8\n\t" - "ubfx lr, r7, #8, #8\n\t" - "eor r8, r8, r2, ror #16\n\t" - "ubfx r2, r6, #0, #8\n\t" + "eor r8, r8, r12, ror #8\n\t" + "ubfx r12, r7, #8, #8\n\t" + "eor r8, r8, lr, ror #16\n\t" + "ubfx lr, r6, #0, #8\n\t" "ldr r9, [%[td], r9, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r10, r5, #16, #8\n\t" "eor r9, r9, r11, ror #24\n\t" "lsr r11, r6, #24\n\t" - "eor r9, r9, lr, ror #8\n\t" - "ubfx lr, r4, #8, #8\n\t" - "eor r9, r9, r2, ror #16\n\t" - "ubfx r2, r7, #0, #8\n\t" + "eor r9, r9, r12, ror #8\n\t" + "ubfx r12, r4, #8, #8\n\t" + "eor r9, r9, lr, ror #16\n\t" + "ubfx lr, r7, #0, #8\n\t" "ldr r10, [%[td], r10, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r4, r4, #0, #8\n\t" "eor r10, r10, r11, ror #24\n\t" "ubfx r11, r6, #16, #8\n\t" - "eor r10, r10, lr, ror #8\n\t" - "lsr lr, r7, #24\n\t" - "eor r10, r10, r2, ror #16\n\t" - "ubfx r2, r5, #8, #8\n\t" + "eor r10, r10, r12, ror #8\n\t" + "lsr r12, r7, #24\n\t" + "eor r10, r10, lr, ror #16\n\t" + "ubfx lr, r5, #8, #8\n\t" "ldr r4, [%[td], r4, lsl #2]\n\t" - "ldr lr, [%[td], lr, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" - "eor lr, lr, r4, ror #24\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" + "eor r12, r12, r4, ror #24\n\t" "ldm r3!, {r4, r5, r6, r7}\n\t" - "eor r11, r11, r2, ror #8\n\t" - "eor r11, r11, lr, ror #24\n\t" + "eor r11, r11, lr, ror #8\n\t" + "eor r11, r11, r12, ror #24\n\t" /* XOR in Key Schedule */ "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" @@ -1292,49 +1293,49 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p) "eor r11, r11, r7\n\t" "ubfx r4, r11, #16, #8\n\t" "lsr r7, r8, #24\n\t" - "ubfx lr, r10, #8, #8\n\t" - "ubfx r2, r9, #0, #8\n\t" + "ubfx r12, r10, #8, #8\n\t" + "ubfx lr, r9, #0, #8\n\t" "ldr r4, [%[td], r4, lsl #2]\n\t" "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r5, r8, #16, #8\n\t" "eor r4, r4, r7, ror #24\n\t" "lsr r7, r9, #24\n\t" - "eor r4, r4, lr, ror #8\n\t" - "ubfx lr, r11, #8, #8\n\t" - "eor r4, r4, r2, ror #16\n\t" - "ubfx r2, r10, #0, #8\n\t" + "eor r4, r4, r12, ror #8\n\t" + "ubfx r12, r11, #8, #8\n\t" + "eor r4, r4, lr, ror #16\n\t" + "ubfx lr, r10, #0, #8\n\t" "ldr r5, [%[td], r5, lsl #2]\n\t" "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r6, r9, #16, #8\n\t" "eor r5, r5, r7, ror #24\n\t" "lsr r7, r10, #24\n\t" - "eor r5, r5, lr, ror #8\n\t" - "ubfx lr, r8, #8, #8\n\t" - "eor r5, r5, r2, ror #16\n\t" - "ubfx r2, r11, #0, #8\n\t" + "eor r5, r5, r12, ror #8\n\t" + "ubfx r12, r8, #8, #8\n\t" + "eor r5, r5, lr, ror #16\n\t" + "ubfx lr, r11, #0, #8\n\t" "ldr r6, [%[td], r6, lsl #2]\n\t" "ldr r7, [%[td], r7, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r8, r8, #0, #8\n\t" "eor r6, r6, r7, ror #24\n\t" "ubfx r7, r10, #16, #8\n\t" - "eor r6, r6, lr, ror #8\n\t" - "lsr lr, r11, #24\n\t" - "eor r6, r6, r2, ror #16\n\t" - "ubfx r2, r9, #8, #8\n\t" + "eor r6, r6, r12, ror #8\n\t" + "lsr r12, r11, #24\n\t" + "eor r6, r6, lr, ror #16\n\t" + "ubfx lr, r9, #8, #8\n\t" "ldr r8, [%[td], r8, lsl #2]\n\t" - "ldr lr, [%[td], lr, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr r7, [%[td], r7, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" - "eor lr, lr, r8, ror #24\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" + "eor r12, r12, r8, ror #24\n\t" "ldm r3!, {r8, r9, r10, r11}\n\t" - "eor r7, r7, r2, ror #8\n\t" - "eor r7, r7, lr, ror #24\n\t" + "eor r7, r7, lr, ror #8\n\t" + "eor r7, r7, r12, ror #24\n\t" /* XOR in Key Schedule */ "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1344,49 +1345,49 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p) "bne L_AES_decrypt_block_nr_%=\n\t" "ubfx r8, r7, #16, #8\n\t" "lsr r11, r4, #24\n\t" - "ubfx lr, r6, #8, #8\n\t" - "ubfx r2, r5, #0, #8\n\t" + "ubfx r12, r6, #8, #8\n\t" + "ubfx lr, r5, #0, #8\n\t" "ldr r8, [%[td], r8, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r9, r4, #16, #8\n\t" "eor r8, r8, r11, ror #24\n\t" "lsr r11, r5, #24\n\t" - "eor r8, r8, lr, ror #8\n\t" - "ubfx lr, r7, #8, #8\n\t" - "eor r8, r8, r2, ror #16\n\t" - "ubfx r2, r6, #0, #8\n\t" + "eor r8, r8, r12, ror #8\n\t" + "ubfx r12, r7, #8, #8\n\t" + "eor r8, r8, lr, ror #16\n\t" + "ubfx lr, r6, #0, #8\n\t" "ldr r9, [%[td], r9, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r10, r5, #16, #8\n\t" "eor r9, r9, r11, ror #24\n\t" "lsr r11, r6, #24\n\t" - "eor r9, r9, lr, ror #8\n\t" - "ubfx lr, r4, #8, #8\n\t" - "eor r9, r9, r2, ror #16\n\t" - "ubfx r2, r7, #0, #8\n\t" + "eor r9, r9, r12, ror #8\n\t" + "ubfx r12, r4, #8, #8\n\t" + "eor r9, r9, lr, ror #16\n\t" + "ubfx lr, r7, #0, #8\n\t" "ldr r10, [%[td], r10, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr lr, [%[td], lr, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" "ubfx r4, r4, #0, #8\n\t" "eor r10, r10, r11, ror #24\n\t" "ubfx r11, r6, #16, #8\n\t" - "eor r10, r10, lr, ror #8\n\t" - "lsr lr, r7, #24\n\t" - "eor r10, r10, r2, ror #16\n\t" - "ubfx r2, r5, #8, #8\n\t" + "eor r10, r10, r12, ror #8\n\t" + "lsr r12, r7, #24\n\t" + "eor r10, r10, lr, ror #16\n\t" + "ubfx lr, r5, #8, #8\n\t" "ldr r4, [%[td], r4, lsl #2]\n\t" - "ldr lr, [%[td], lr, lsl #2]\n\t" + "ldr r12, [%[td], r12, lsl #2]\n\t" "ldr r11, [%[td], r11, lsl #2]\n\t" - "ldr r2, [%[td], r2, lsl #2]\n\t" - "eor lr, lr, r4, ror #24\n\t" + "ldr lr, [%[td], lr, lsl #2]\n\t" + "eor r12, r12, r4, ror #24\n\t" "ldm r3!, {r4, r5, r6, r7}\n\t" - "eor r11, r11, r2, ror #8\n\t" - "eor r11, r11, lr, ror #24\n\t" + "eor r11, r11, lr, ror #8\n\t" + "eor r11, r11, r12, ror #24\n\t" /* XOR in Key Schedule */ "eor r8, r8, r4\n\t" "eor r9, r9, r5\n\t" @@ -1394,55 +1395,55 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p) "eor r11, r11, r7\n\t" "ubfx r4, r9, #0, #8\n\t" "ubfx r7, r10, #8, #8\n\t" - "ubfx lr, r11, #16, #8\n\t" - "lsr r2, r8, #24\n\t" - "ldrb r4, [r12, r4]\n\t" - "ldrb r7, [r12, r7]\n\t" - "ldrb lr, [r12, lr]\n\t" - "ldrb r2, [r12, r2]\n\t" + "ubfx r12, r11, #16, #8\n\t" + "lsr lr, r8, #24\n\t" + "ldrb r4, [%[td4], r4]\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb r12, [%[td4], r12]\n\t" + "ldrb lr, [%[td4], lr]\n\t" "ubfx r5, r10, #0, #8\n\t" "eor r4, r4, r7, lsl #8\n\t" "ubfx r7, r11, #8, #8\n\t" - "eor r4, r4, lr, lsl #16\n\t" - "ubfx lr, r8, #16, #8\n\t" - "eor r4, r4, r2, lsl #24\n\t" - "lsr r2, r9, #24\n\t" - "ldrb r7, [r12, r7]\n\t" - "ldrb r2, [r12, r2]\n\t" - "ldrb r5, [r12, r5]\n\t" - "ldrb lr, [r12, lr]\n\t" + "eor r4, r4, r12, lsl #16\n\t" + "ubfx r12, r8, #16, #8\n\t" + "eor r4, r4, lr, lsl #24\n\t" + "lsr lr, r9, #24\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb lr, [%[td4], lr]\n\t" + "ldrb r5, [%[td4], r5]\n\t" + "ldrb r12, [%[td4], r12]\n\t" "ubfx r6, r11, #0, #8\n\t" "eor r5, r5, r7, lsl #8\n\t" "ubfx r7, r8, #8, #8\n\t" - "eor r5, r5, lr, lsl #16\n\t" - "ubfx lr, r9, #16, #8\n\t" - "eor r5, r5, r2, lsl #24\n\t" - "lsr r2, r10, #24\n\t" - "ldrb r7, [r12, r7]\n\t" - "ldrb r2, [r12, r2]\n\t" - "ldrb r6, [r12, r6]\n\t" - "ldrb lr, [r12, lr]\n\t" + "eor r5, r5, r12, lsl #16\n\t" + "ubfx r12, r9, #16, #8\n\t" + "eor r5, r5, lr, lsl #24\n\t" + "lsr lr, r10, #24\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb lr, [%[td4], lr]\n\t" + "ldrb r6, [%[td4], r6]\n\t" + "ldrb r12, [%[td4], r12]\n\t" "lsr r11, r11, #24\n\t" "eor r6, r6, r7, lsl #8\n\t" "ubfx r7, r8, #0, #8\n\t" - "eor r6, r6, lr, lsl #16\n\t" - "ubfx lr, r9, #8, #8\n\t" - "eor r6, r6, r2, lsl #24\n\t" - "ubfx r2, r10, #16, #8\n\t" - "ldrb r11, [r12, r11]\n\t" - "ldrb lr, [r12, lr]\n\t" - "ldrb r7, [r12, r7]\n\t" - "ldrb r2, [r12, r2]\n\t" - "eor lr, lr, r11, lsl #16\n\t" + "eor r6, r6, r12, lsl #16\n\t" + "ubfx r12, r9, #8, #8\n\t" + "eor r6, r6, lr, lsl #24\n\t" + "ubfx lr, r10, #16, #8\n\t" + "ldrb r11, [%[td4], r11]\n\t" + "ldrb r12, [%[td4], r12]\n\t" + "ldrb r7, [%[td4], r7]\n\t" + "ldrb lr, [%[td4], lr]\n\t" + "eor r12, r12, r11, lsl #16\n\t" "ldm r3, {r8, r9, r10, r11}\n\t" - "eor r7, r7, lr, lsl #8\n\t" - "eor r7, r7, r2, lsl #16\n\t" + "eor r7, r7, r12, lsl #8\n\t" + "eor r7, r7, lr, lsl #16\n\t" /* XOR in Key Schedule */ "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" - : [td] "+r" (td), [nr] "+r" (nr) + : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) : : "memory", "lr" ); @@ -1500,7 +1501,8 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "mov r8, r4\n\t" "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_td_ecb]\n\t" - "mov r12, %[L_AES_ARM32_td4]\n\t" + "mov r12, %[len]\n\t" + "mov r2, %[L_AES_ARM32_td4]\n\t" "cmp r8, #10\n\t" "beq L_AES_ECB_decrypt_start_block_128_%=\n\t" "cmp r8, #12\n\t" @@ -1515,7 +1517,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r5, r5\n\t" "rev r6, r6\n\t" "rev r7, r7\n\t" - "push {r1, r2, %[ks], lr}\n\t" + "push {r1, %[ks], r12, lr}\n\t" "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "eor r4, r4, r8\n\t" @@ -1524,7 +1526,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "eor r7, r7, r11\n\t" "mov r1, #6\n\t" "bl AES_decrypt_block\n\t" - "pop {r1, r2, %[ks], lr}\n\t" + "pop {r1, %[ks], r12, lr}\n\t" "rev r4, r4\n\t" "rev r5, r5\n\t" "rev r6, r6\n\t" @@ -1533,7 +1535,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "bne L_AES_ECB_decrypt_loop_block_256_%=\n\t" @@ -1550,7 +1552,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r5, r5\n\t" "rev r6, r6\n\t" "rev r7, r7\n\t" - "push {r1, r2, %[ks], lr}\n\t" + "push {r1, %[ks], r12, lr}\n\t" "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "eor r4, r4, r8\n\t" @@ -1559,7 +1561,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "eor r7, r7, r11\n\t" "mov r1, #5\n\t" "bl AES_decrypt_block\n\t" - "pop {r1, r2, %[ks], lr}\n\t" + "pop {r1, %[ks], r12, lr}\n\t" "rev r4, r4\n\t" "rev r5, r5\n\t" "rev r6, r6\n\t" @@ -1568,7 +1570,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "bne L_AES_ECB_decrypt_loop_block_192_%=\n\t" @@ -1585,7 +1587,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r5, r5\n\t" "rev r6, r6\n\t" "rev r7, r7\n\t" - "push {r1, r2, %[ks], lr}\n\t" + "push {r1, %[ks], r12, lr}\n\t" "ldm %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "eor r4, r4, r8\n\t" @@ -1594,7 +1596,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "eor r7, r7, r11\n\t" "mov r1, #4\n\t" "bl AES_decrypt_block\n\t" - "pop {r1, r2, %[ks], lr}\n\t" + "pop {r1, %[ks], r12, lr}\n\t" "rev r4, r4\n\t" "rev r5, r5\n\t" "rev r6, r6\n\t" @@ -1603,7 +1605,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "bne L_AES_ECB_decrypt_loop_block_128_%=\n\t" @@ -1635,7 +1637,8 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "mov r4, r5\n\t" "mov lr, %[in]\n\t" "mov r0, %[L_AES_ARM32_td_ecb]\n\t" - "mov r12, %[L_AES_ARM32_td4]\n\t" + "mov r12, %[len]\n\t" + "mov r2, %[L_AES_ARM32_td4]\n\t" "push {%[ks]-r4}\n\t" "cmp r8, #10\n\t" "beq L_AES_CBC_decrypt_loop_block_128_%=\n\t" @@ -1643,7 +1646,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "beq L_AES_CBC_decrypt_loop_block_192_%=\n\t" "\n" "L_AES_CBC_decrypt_loop_block_256_%=: \n\t" - "push {r1, r2, lr}\n\t" + "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -1679,7 +1682,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r6, r6\n\t" "rev r7, r7\n\t" "ldm lr, {r8, r9, r10, r11}\n\t" - "pop {r1, r2, lr}\n\t" + "pop {r1, r12, lr}\n\t" "ldr %[ks], [sp]\n\t" "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1689,11 +1692,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "beq L_AES_CBC_decrypt_end_odd_%=\n\t" - "push {r1, r2, lr}\n\t" + "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -1740,7 +1743,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #else "ldrd r10, r11, [lr, #24]\n\t" #endif - "pop {r1, r2, lr}\n\t" + "pop {r1, r12, lr}\n\t" "ldr %[ks], [sp]\n\t" "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1750,14 +1753,14 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "bne L_AES_CBC_decrypt_loop_block_256_%=\n\t" "b L_AES_CBC_decrypt_end_%=\n\t" "\n" "L_AES_CBC_decrypt_loop_block_192_%=: \n\t" - "push {r1, r2, lr}\n\t" + "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -1793,7 +1796,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r6, r6\n\t" "rev r7, r7\n\t" "ldm lr, {r8, r9, r10, r11}\n\t" - "pop {r1, r2, lr}\n\t" + "pop {r1, r12, lr}\n\t" "ldr %[ks], [sp]\n\t" "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1803,11 +1806,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "beq L_AES_CBC_decrypt_end_odd_%=\n\t" - "push {r1, r2, lr}\n\t" + "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -1854,7 +1857,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #else "ldrd r10, r11, [lr, #24]\n\t" #endif - "pop {r1, r2, lr}\n\t" + "pop {r1, r12, lr}\n\t" "ldr %[ks], [sp]\n\t" "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1864,14 +1867,14 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "bne L_AES_CBC_decrypt_loop_block_192_%=\n\t" "b L_AES_CBC_decrypt_end_%=\n\t" "\n" "L_AES_CBC_decrypt_loop_block_128_%=: \n\t" - "push {r1, r2, lr}\n\t" + "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -1907,7 +1910,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r6, r6\n\t" "rev r7, r7\n\t" "ldm lr, {r8, r9, r10, r11}\n\t" - "pop {r1, r2, lr}\n\t" + "pop {r1, r12, lr}\n\t" "ldr %[ks], [sp]\n\t" "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1917,11 +1920,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "beq L_AES_CBC_decrypt_end_odd_%=\n\t" - "push {r1, r2, lr}\n\t" + "push {r1, r12, lr}\n\t" "ldr r4, [lr]\n\t" "ldr r5, [lr, #4]\n\t" "ldr r6, [lr, #8]\n\t" @@ -1968,7 +1971,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #else "ldrd r10, r11, [lr, #24]\n\t" #endif - "pop {r1, r2, lr}\n\t" + "pop {r1, r12, lr}\n\t" "ldr %[ks], [sp]\n\t" "eor r4, r4, r8\n\t" "eor r5, r5, r9\n\t" @@ -1978,7 +1981,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "str r5, [%[out], #4]\n\t" "str r6, [%[out], #8]\n\t" "str r7, [%[out], #12]\n\t" - "subs %[len], %[len], #16\n\t" + "subs r12, r12, #16\n\t" "add lr, lr, #16\n\t" "add %[out], %[out], #16\n\t" "bne L_AES_CBC_decrypt_loop_block_128_%=\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index c71df72bb..fa07d4372 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -5622,6 +5622,13 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) if (sz == 0) { return 0; } + if (sz % AES_BLOCK_SIZE) { +#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + return BAD_LENGTH_E; +#else + return BAD_FUNC_ARG; +#endif + } AES_CBC_encrypt(in, out, sz, (const unsigned char*)aes->key, aes->rounds, (unsigned char*)aes->reg); @@ -5644,6 +5651,13 @@ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) if (sz == 0) { return 0; } + if (sz % AES_BLOCK_SIZE) { +#ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + return BAD_LENGTH_E; +#else + return BAD_FUNC_ARG; +#endif + } AES_CBC_decrypt(in, out, sz, (const unsigned char*)aes->key, aes->rounds, (unsigned char*)aes->reg); diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index 1401be432..2df54f6e7 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -1518,49 +1518,49 @@ AES_decrypt_block: L_AES_decrypt_block_nr: UBFX r8, r7, #16, #8 LSR r11, r4, #24 - UBFX lr, r6, #8, #8 - UBFX r2, r5, #0, #8 + UBFX r12, r6, #8, #8 + UBFX lr, r5, #0, #8 LDR r8, [r0, r8, LSL #2] LDR r11, [r0, r11, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r9, r4, #16, #8 EOR r8, r8, r11, ROR #24 LSR r11, r5, #24 - EOR r8, r8, lr, ROR #8 - UBFX lr, r7, #8, #8 - EOR r8, r8, r2, ROR #16 - UBFX r2, r6, #0, #8 + EOR r8, r8, r12, ROR #8 + UBFX r12, r7, #8, #8 + EOR r8, r8, lr, ROR #16 + UBFX lr, r6, #0, #8 LDR r9, [r0, r9, LSL #2] LDR r11, [r0, r11, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r10, r5, #16, #8 EOR r9, r9, r11, ROR #24 LSR r11, r6, #24 - EOR r9, r9, lr, ROR #8 - UBFX lr, r4, #8, #8 - EOR r9, r9, r2, ROR #16 - UBFX r2, r7, #0, #8 + EOR r9, r9, r12, ROR #8 + UBFX r12, r4, #8, #8 + EOR r9, r9, lr, ROR #16 + UBFX lr, r7, #0, #8 LDR r10, [r0, r10, LSL #2] LDR r11, [r0, r11, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r4, r4, #0, #8 EOR r10, r10, r11, ROR #24 UBFX r11, r6, #16, #8 - EOR r10, r10, lr, ROR #8 - LSR lr, r7, #24 - EOR r10, r10, r2, ROR #16 - UBFX r2, r5, #8, #8 + EOR r10, r10, r12, ROR #8 + LSR r12, r7, #24 + EOR r10, r10, lr, ROR #16 + UBFX lr, r5, #8, #8 LDR r4, [r0, r4, LSL #2] - LDR lr, [r0, lr, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR r11, [r0, r11, LSL #2] - LDR r2, [r0, r2, LSL #2] - EOR lr, lr, r4, ROR #24 + LDR lr, [r0, lr, LSL #2] + EOR r12, r12, r4, ROR #24 LDM r3!, {r4, r5, r6, r7} - EOR r11, r11, r2, ROR #8 - EOR r11, r11, lr, ROR #24 + EOR r11, r11, lr, ROR #8 + EOR r11, r11, r12, ROR #24 # XOR in Key Schedule EOR r8, r8, r4 EOR r9, r9, r5 @@ -1568,49 +1568,49 @@ L_AES_decrypt_block_nr: EOR r11, r11, r7 UBFX r4, r11, #16, #8 LSR r7, r8, #24 - UBFX lr, r10, #8, #8 - UBFX r2, r9, #0, #8 + UBFX r12, r10, #8, #8 + UBFX lr, r9, #0, #8 LDR r4, [r0, r4, LSL #2] LDR r7, [r0, r7, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r5, r8, #16, #8 EOR r4, r4, r7, ROR #24 LSR r7, r9, #24 - EOR r4, r4, lr, ROR #8 - UBFX lr, r11, #8, #8 - EOR r4, r4, r2, ROR #16 - UBFX r2, r10, #0, #8 + EOR r4, r4, r12, ROR #8 + UBFX r12, r11, #8, #8 + EOR r4, r4, lr, ROR #16 + UBFX lr, r10, #0, #8 LDR r5, [r0, r5, LSL #2] LDR r7, [r0, r7, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r6, r9, #16, #8 EOR r5, r5, r7, ROR #24 LSR r7, r10, #24 - EOR r5, r5, lr, ROR #8 - UBFX lr, r8, #8, #8 - EOR r5, r5, r2, ROR #16 - UBFX r2, r11, #0, #8 + EOR r5, r5, r12, ROR #8 + UBFX r12, r8, #8, #8 + EOR r5, r5, lr, ROR #16 + UBFX lr, r11, #0, #8 LDR r6, [r0, r6, LSL #2] LDR r7, [r0, r7, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r8, r8, #0, #8 EOR r6, r6, r7, ROR #24 UBFX r7, r10, #16, #8 - EOR r6, r6, lr, ROR #8 - LSR lr, r11, #24 - EOR r6, r6, r2, ROR #16 - UBFX r2, r9, #8, #8 + EOR r6, r6, r12, ROR #8 + LSR r12, r11, #24 + EOR r6, r6, lr, ROR #16 + UBFX lr, r9, #8, #8 LDR r8, [r0, r8, LSL #2] - LDR lr, [r0, lr, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR r7, [r0, r7, LSL #2] - LDR r2, [r0, r2, LSL #2] - EOR lr, lr, r8, ROR #24 + LDR lr, [r0, lr, LSL #2] + EOR r12, r12, r8, ROR #24 LDM r3!, {r8, r9, r10, r11} - EOR r7, r7, r2, ROR #8 - EOR r7, r7, lr, ROR #24 + EOR r7, r7, lr, ROR #8 + EOR r7, r7, r12, ROR #24 # XOR in Key Schedule EOR r4, r4, r8 EOR r5, r5, r9 @@ -1620,49 +1620,49 @@ L_AES_decrypt_block_nr: BNE L_AES_decrypt_block_nr UBFX r8, r7, #16, #8 LSR r11, r4, #24 - UBFX lr, r6, #8, #8 - UBFX r2, r5, #0, #8 + UBFX r12, r6, #8, #8 + UBFX lr, r5, #0, #8 LDR r8, [r0, r8, LSL #2] LDR r11, [r0, r11, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r9, r4, #16, #8 EOR r8, r8, r11, ROR #24 LSR r11, r5, #24 - EOR r8, r8, lr, ROR #8 - UBFX lr, r7, #8, #8 - EOR r8, r8, r2, ROR #16 - UBFX r2, r6, #0, #8 + EOR r8, r8, r12, ROR #8 + UBFX r12, r7, #8, #8 + EOR r8, r8, lr, ROR #16 + UBFX lr, r6, #0, #8 LDR r9, [r0, r9, LSL #2] LDR r11, [r0, r11, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r10, r5, #16, #8 EOR r9, r9, r11, ROR #24 LSR r11, r6, #24 - EOR r9, r9, lr, ROR #8 - UBFX lr, r4, #8, #8 - EOR r9, r9, r2, ROR #16 - UBFX r2, r7, #0, #8 + EOR r9, r9, r12, ROR #8 + UBFX r12, r4, #8, #8 + EOR r9, r9, lr, ROR #16 + UBFX lr, r7, #0, #8 LDR r10, [r0, r10, LSL #2] LDR r11, [r0, r11, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR lr, [r0, lr, LSL #2] - LDR r2, [r0, r2, LSL #2] UBFX r4, r4, #0, #8 EOR r10, r10, r11, ROR #24 UBFX r11, r6, #16, #8 - EOR r10, r10, lr, ROR #8 - LSR lr, r7, #24 - EOR r10, r10, r2, ROR #16 - UBFX r2, r5, #8, #8 + EOR r10, r10, r12, ROR #8 + LSR r12, r7, #24 + EOR r10, r10, lr, ROR #16 + UBFX lr, r5, #8, #8 LDR r4, [r0, r4, LSL #2] - LDR lr, [r0, lr, LSL #2] + LDR r12, [r0, r12, LSL #2] LDR r11, [r0, r11, LSL #2] - LDR r2, [r0, r2, LSL #2] - EOR lr, lr, r4, ROR #24 + LDR lr, [r0, lr, LSL #2] + EOR r12, r12, r4, ROR #24 LDM r3!, {r4, r5, r6, r7} - EOR r11, r11, r2, ROR #8 - EOR r11, r11, lr, ROR #24 + EOR r11, r11, lr, ROR #8 + EOR r11, r11, r12, ROR #24 # XOR in Key Schedule EOR r8, r8, r4 EOR r9, r9, r5 @@ -1670,49 +1670,49 @@ L_AES_decrypt_block_nr: EOR r11, r11, r7 UBFX r4, r9, #0, #8 UBFX r7, r10, #8, #8 - UBFX lr, r11, #16, #8 - LSR r2, r8, #24 - LDRB r4, [r12, r4] - LDRB r7, [r12, r7] - LDRB lr, [r12, lr] - LDRB r2, [r12, r2] + UBFX r12, r11, #16, #8 + LSR lr, r8, #24 + LDRB r4, [r2, r4] + LDRB r7, [r2, r7] + LDRB r12, [r2, r12] + LDRB lr, [r2, lr] UBFX r5, r10, #0, #8 EOR r4, r4, r7, LSL #8 UBFX r7, r11, #8, #8 - EOR r4, r4, lr, LSL #16 - UBFX lr, r8, #16, #8 - EOR r4, r4, r2, LSL #24 - LSR r2, r9, #24 - LDRB r7, [r12, r7] - LDRB r2, [r12, r2] - LDRB r5, [r12, r5] - LDRB lr, [r12, lr] + EOR r4, r4, r12, LSL #16 + UBFX r12, r8, #16, #8 + EOR r4, r4, lr, LSL #24 + LSR lr, r9, #24 + LDRB r7, [r2, r7] + LDRB lr, [r2, lr] + LDRB r5, [r2, r5] + LDRB r12, [r2, r12] UBFX r6, r11, #0, #8 EOR r5, r5, r7, LSL #8 UBFX r7, r8, #8, #8 - EOR r5, r5, lr, LSL #16 - UBFX lr, r9, #16, #8 - EOR r5, r5, r2, LSL #24 - LSR r2, r10, #24 - LDRB r7, [r12, r7] - LDRB r2, [r12, r2] - LDRB r6, [r12, r6] - LDRB lr, [r12, lr] + EOR r5, r5, r12, LSL #16 + UBFX r12, r9, #16, #8 + EOR r5, r5, lr, LSL #24 + LSR lr, r10, #24 + LDRB r7, [r2, r7] + LDRB lr, [r2, lr] + LDRB r6, [r2, r6] + LDRB r12, [r2, r12] LSR r11, r11, #24 EOR r6, r6, r7, LSL #8 UBFX r7, r8, #0, #8 - EOR r6, r6, lr, LSL #16 - UBFX lr, r9, #8, #8 - EOR r6, r6, r2, LSL #24 - UBFX r2, r10, #16, #8 - LDRB r11, [r12, r11] - LDRB lr, [r12, lr] - LDRB r7, [r12, r7] - LDRB r2, [r12, r2] - EOR lr, lr, r11, LSL #16 + EOR r6, r6, r12, LSL #16 + UBFX r12, r9, #8, #8 + EOR r6, r6, lr, LSL #24 + UBFX lr, r10, #16, #8 + LDRB r11, [r2, r11] + LDRB r12, [r2, r12] + LDRB r7, [r2, r7] + LDRB lr, [r2, lr] + EOR r12, r12, r11, LSL #16 LDM r3, {r8, r9, r10, r11} - EOR r7, r7, lr, LSL #8 - EOR r7, r7, r2, LSL #16 + EOR r7, r7, r12, LSL #8 + EOR r7, r7, lr, LSL #16 # XOR in Key Schedule EOR r4, r4, r8 EOR r5, r5, r9 @@ -1998,7 +1998,8 @@ AES_ECB_decrypt: LDR r8, [sp, #36] MOV lr, r0 LDR r0, L_AES_Thumb2_td_ecb - ADR r12, L_AES_Thumb2_td4 + MOV r12, r2 + ADR r2, L_AES_Thumb2_td4 CMP r8, #0xa BEQ L_AES_ECB_decrypt_start_block_128 CMP r8, #0xc @@ -2012,7 +2013,7 @@ L_AES_ECB_decrypt_loop_block_256: REV r5, r5 REV r6, r6 REV r7, r7 - PUSH {r1, r2, r3, lr} + PUSH {r1, r3, r12, lr} LDM r3!, {r8, r9, r10, r11} # Round: 0 - XOR in key schedule EOR r4, r4, r8 @@ -2021,7 +2022,7 @@ L_AES_ECB_decrypt_loop_block_256: EOR r7, r7, r11 MOV r1, #0x6 BL AES_decrypt_block - POP {r1, r2, r3, lr} + POP {r1, r3, r12, lr} REV r4, r4 REV r5, r5 REV r6, r6 @@ -2030,7 +2031,7 @@ L_AES_ECB_decrypt_loop_block_256: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BNE L_AES_ECB_decrypt_loop_block_256 @@ -2045,7 +2046,7 @@ L_AES_ECB_decrypt_loop_block_192: REV r5, r5 REV r6, r6 REV r7, r7 - PUSH {r1, r2, r3, lr} + PUSH {r1, r3, r12, lr} LDM r3!, {r8, r9, r10, r11} # Round: 0 - XOR in key schedule EOR r4, r4, r8 @@ -2054,7 +2055,7 @@ L_AES_ECB_decrypt_loop_block_192: EOR r7, r7, r11 MOV r1, #0x5 BL AES_decrypt_block - POP {r1, r2, r3, lr} + POP {r1, r3, r12, lr} REV r4, r4 REV r5, r5 REV r6, r6 @@ -2063,7 +2064,7 @@ L_AES_ECB_decrypt_loop_block_192: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BNE L_AES_ECB_decrypt_loop_block_192 @@ -2078,7 +2079,7 @@ L_AES_ECB_decrypt_loop_block_128: REV r5, r5 REV r6, r6 REV r7, r7 - PUSH {r1, r2, r3, lr} + PUSH {r1, r3, r12, lr} LDM r3!, {r8, r9, r10, r11} # Round: 0 - XOR in key schedule EOR r4, r4, r8 @@ -2087,7 +2088,7 @@ L_AES_ECB_decrypt_loop_block_128: EOR r7, r7, r11 MOV r1, #0x4 BL AES_decrypt_block - POP {r1, r2, r3, lr} + POP {r1, r3, r12, lr} REV r4, r4 REV r5, r5 REV r6, r6 @@ -2096,13 +2097,13 @@ L_AES_ECB_decrypt_loop_block_128: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BNE L_AES_ECB_decrypt_loop_block_128 L_AES_ECB_decrypt_end: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 209 + # Cycle Count = 210 .size AES_ECB_decrypt,.-AES_ECB_decrypt #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_CBC @@ -2116,14 +2117,15 @@ AES_CBC_decrypt: LDR r4, [sp, #40] MOV lr, r0 LDR r0, L_AES_Thumb2_td_ecb - ADR r12, L_AES_Thumb2_td4 + MOV r12, r2 + ADR r2, L_AES_Thumb2_td4 PUSH {r3, r4} CMP r8, #0xa BEQ L_AES_CBC_decrypt_loop_block_128 CMP r8, #0xc BEQ L_AES_CBC_decrypt_loop_block_192 L_AES_CBC_decrypt_loop_block_256: - PUSH {r1, r2, lr} + PUSH {r1, r12, lr} LDR r4, [lr] LDR r5, [lr, #4] LDR r6, [lr, #8] @@ -2149,7 +2151,7 @@ L_AES_CBC_decrypt_loop_block_256: REV r6, r6 REV r7, r7 LDM lr, {r8, r9, r10, r11} - POP {r1, r2, lr} + POP {r1, r12, lr} LDR r3, [sp] EOR r4, r4, r8 EOR r5, r5, r9 @@ -2159,11 +2161,11 @@ L_AES_CBC_decrypt_loop_block_256: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BEQ L_AES_CBC_decrypt_end_odd - PUSH {r1, r2, lr} + PUSH {r1, r12, lr} LDR r4, [lr] LDR r5, [lr, #4] LDR r6, [lr, #8] @@ -2190,7 +2192,7 @@ L_AES_CBC_decrypt_loop_block_256: REV r7, r7 LDRD r8, r9, [lr, #16] LDRD r10, r11, [lr, #24] - POP {r1, r2, lr} + POP {r1, r12, lr} LDR r3, [sp] EOR r4, r4, r8 EOR r5, r5, r9 @@ -2200,13 +2202,13 @@ L_AES_CBC_decrypt_loop_block_256: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BNE L_AES_CBC_decrypt_loop_block_256 B L_AES_CBC_decrypt_end L_AES_CBC_decrypt_loop_block_192: - PUSH {r1, r2, lr} + PUSH {r1, r12, lr} LDR r4, [lr] LDR r5, [lr, #4] LDR r6, [lr, #8] @@ -2232,7 +2234,7 @@ L_AES_CBC_decrypt_loop_block_192: REV r6, r6 REV r7, r7 LDM lr, {r8, r9, r10, r11} - POP {r1, r2, lr} + POP {r1, r12, lr} LDR r3, [sp] EOR r4, r4, r8 EOR r5, r5, r9 @@ -2242,11 +2244,11 @@ L_AES_CBC_decrypt_loop_block_192: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BEQ L_AES_CBC_decrypt_end_odd - PUSH {r1, r2, lr} + PUSH {r1, r12, lr} LDR r4, [lr] LDR r5, [lr, #4] LDR r6, [lr, #8] @@ -2273,7 +2275,7 @@ L_AES_CBC_decrypt_loop_block_192: REV r7, r7 LDRD r8, r9, [lr, #16] LDRD r10, r11, [lr, #24] - POP {r1, r2, lr} + POP {r1, r12, lr} LDR r3, [sp] EOR r4, r4, r8 EOR r5, r5, r9 @@ -2283,13 +2285,13 @@ L_AES_CBC_decrypt_loop_block_192: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BNE L_AES_CBC_decrypt_loop_block_192 B L_AES_CBC_decrypt_end L_AES_CBC_decrypt_loop_block_128: - PUSH {r1, r2, lr} + PUSH {r1, r12, lr} LDR r4, [lr] LDR r5, [lr, #4] LDR r6, [lr, #8] @@ -2315,7 +2317,7 @@ L_AES_CBC_decrypt_loop_block_128: REV r6, r6 REV r7, r7 LDM lr, {r8, r9, r10, r11} - POP {r1, r2, lr} + POP {r1, r12, lr} LDR r3, [sp] EOR r4, r4, r8 EOR r5, r5, r9 @@ -2325,11 +2327,11 @@ L_AES_CBC_decrypt_loop_block_128: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BEQ L_AES_CBC_decrypt_end_odd - PUSH {r1, r2, lr} + PUSH {r1, r12, lr} LDR r4, [lr] LDR r5, [lr, #4] LDR r6, [lr, #8] @@ -2356,7 +2358,7 @@ L_AES_CBC_decrypt_loop_block_128: REV r7, r7 LDRD r8, r9, [lr, #16] LDRD r10, r11, [lr, #24] - POP {r1, r2, lr} + POP {r1, r12, lr} LDR r3, [sp] EOR r4, r4, r8 EOR r5, r5, r9 @@ -2366,7 +2368,7 @@ L_AES_CBC_decrypt_loop_block_128: STR r5, [r1, #4] STR r6, [r1, #8] STR r7, [r1, #12] - SUBS r2, r2, #0x10 + SUBS r12, r12, #0x10 ADD lr, lr, #0x10 ADD r1, r1, #0x10 BNE L_AES_CBC_decrypt_loop_block_128 @@ -2380,7 +2382,7 @@ L_AES_CBC_decrypt_end_odd: L_AES_CBC_decrypt_end: POP {r3, r4} POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 517 + # Cycle Count = 518 .size AES_CBC_decrypt,.-AES_CBC_decrypt #endif /* HAVE_AES_CBC */ #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index 0a29de362..ec9f3f38c 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -1183,60 +1183,61 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #endif /* WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) -void AES_decrypt_block(const uint32_t* td, int nr); -void AES_decrypt_block(const uint32_t* td_p, int nr_p) +void AES_decrypt_block(const uint32_t* td, int nr, const uint8_t* td4); +void AES_decrypt_block(const uint32_t* td_p, int nr_p, const uint8_t* td4_p) { register const uint32_t* td asm ("r0") = (const uint32_t*)td_p; register int nr asm ("r1") = (int)nr_p; + register const uint8_t* td4 asm ("r2") = (const uint8_t*)td4_p; __asm__ __volatile__ ( "\n" "L_AES_decrypt_block_nr_%=:\n\t" "UBFX r8, r7, #16, #8\n\t" "LSR r11, r4, #24\n\t" - "UBFX lr, r6, #8, #8\n\t" - "UBFX r2, r5, #0, #8\n\t" + "UBFX r12, r6, #8, #8\n\t" + "UBFX lr, r5, #0, #8\n\t" "LDR r8, [%[td], r8, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r9, r4, #16, #8\n\t" "EOR r8, r8, r11, ROR #24\n\t" "LSR r11, r5, #24\n\t" - "EOR r8, r8, lr, ROR #8\n\t" - "UBFX lr, r7, #8, #8\n\t" - "EOR r8, r8, r2, ROR #16\n\t" - "UBFX r2, r6, #0, #8\n\t" + "EOR r8, r8, r12, ROR #8\n\t" + "UBFX r12, r7, #8, #8\n\t" + "EOR r8, r8, lr, ROR #16\n\t" + "UBFX lr, r6, #0, #8\n\t" "LDR r9, [%[td], r9, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r10, r5, #16, #8\n\t" "EOR r9, r9, r11, ROR #24\n\t" "LSR r11, r6, #24\n\t" - "EOR r9, r9, lr, ROR #8\n\t" - "UBFX lr, r4, #8, #8\n\t" - "EOR r9, r9, r2, ROR #16\n\t" - "UBFX r2, r7, #0, #8\n\t" + "EOR r9, r9, r12, ROR #8\n\t" + "UBFX r12, r4, #8, #8\n\t" + "EOR r9, r9, lr, ROR #16\n\t" + "UBFX lr, r7, #0, #8\n\t" "LDR r10, [%[td], r10, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r4, r4, #0, #8\n\t" "EOR r10, r10, r11, ROR #24\n\t" "UBFX r11, r6, #16, #8\n\t" - "EOR r10, r10, lr, ROR #8\n\t" - "LSR lr, r7, #24\n\t" - "EOR r10, r10, r2, ROR #16\n\t" - "UBFX r2, r5, #8, #8\n\t" + "EOR r10, r10, r12, ROR #8\n\t" + "LSR r12, r7, #24\n\t" + "EOR r10, r10, lr, ROR #16\n\t" + "UBFX lr, r5, #8, #8\n\t" "LDR r4, [%[td], r4, LSL #2]\n\t" - "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" - "EOR lr, lr, r4, ROR #24\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "EOR r12, r12, r4, ROR #24\n\t" "LDM r3!, {r4, r5, r6, r7}\n\t" - "EOR r11, r11, r2, ROR #8\n\t" - "EOR r11, r11, lr, ROR #24\n\t" + "EOR r11, r11, lr, ROR #8\n\t" + "EOR r11, r11, r12, ROR #24\n\t" /* XOR in Key Schedule */ "EOR r8, r8, r4\n\t" "EOR r9, r9, r5\n\t" @@ -1244,49 +1245,49 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p) "EOR r11, r11, r7\n\t" "UBFX r4, r11, #16, #8\n\t" "LSR r7, r8, #24\n\t" - "UBFX lr, r10, #8, #8\n\t" - "UBFX r2, r9, #0, #8\n\t" + "UBFX r12, r10, #8, #8\n\t" + "UBFX lr, r9, #0, #8\n\t" "LDR r4, [%[td], r4, LSL #2]\n\t" "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r5, r8, #16, #8\n\t" "EOR r4, r4, r7, ROR #24\n\t" "LSR r7, r9, #24\n\t" - "EOR r4, r4, lr, ROR #8\n\t" - "UBFX lr, r11, #8, #8\n\t" - "EOR r4, r4, r2, ROR #16\n\t" - "UBFX r2, r10, #0, #8\n\t" + "EOR r4, r4, r12, ROR #8\n\t" + "UBFX r12, r11, #8, #8\n\t" + "EOR r4, r4, lr, ROR #16\n\t" + "UBFX lr, r10, #0, #8\n\t" "LDR r5, [%[td], r5, LSL #2]\n\t" "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r6, r9, #16, #8\n\t" "EOR r5, r5, r7, ROR #24\n\t" "LSR r7, r10, #24\n\t" - "EOR r5, r5, lr, ROR #8\n\t" - "UBFX lr, r8, #8, #8\n\t" - "EOR r5, r5, r2, ROR #16\n\t" - "UBFX r2, r11, #0, #8\n\t" + "EOR r5, r5, r12, ROR #8\n\t" + "UBFX r12, r8, #8, #8\n\t" + "EOR r5, r5, lr, ROR #16\n\t" + "UBFX lr, r11, #0, #8\n\t" "LDR r6, [%[td], r6, LSL #2]\n\t" "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r8, r8, #0, #8\n\t" "EOR r6, r6, r7, ROR #24\n\t" "UBFX r7, r10, #16, #8\n\t" - "EOR r6, r6, lr, ROR #8\n\t" - "LSR lr, r11, #24\n\t" - "EOR r6, r6, r2, ROR #16\n\t" - "UBFX r2, r9, #8, #8\n\t" + "EOR r6, r6, r12, ROR #8\n\t" + "LSR r12, r11, #24\n\t" + "EOR r6, r6, lr, ROR #16\n\t" + "UBFX lr, r9, #8, #8\n\t" "LDR r8, [%[td], r8, LSL #2]\n\t" - "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR r7, [%[td], r7, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" - "EOR lr, lr, r8, ROR #24\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "EOR r12, r12, r8, ROR #24\n\t" "LDM r3!, {r8, r9, r10, r11}\n\t" - "EOR r7, r7, r2, ROR #8\n\t" - "EOR r7, r7, lr, ROR #24\n\t" + "EOR r7, r7, lr, ROR #8\n\t" + "EOR r7, r7, r12, ROR #24\n\t" /* XOR in Key Schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1296,49 +1297,49 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p) "BNE L_AES_decrypt_block_nr_%=\n\t" "UBFX r8, r7, #16, #8\n\t" "LSR r11, r4, #24\n\t" - "UBFX lr, r6, #8, #8\n\t" - "UBFX r2, r5, #0, #8\n\t" + "UBFX r12, r6, #8, #8\n\t" + "UBFX lr, r5, #0, #8\n\t" "LDR r8, [%[td], r8, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r9, r4, #16, #8\n\t" "EOR r8, r8, r11, ROR #24\n\t" "LSR r11, r5, #24\n\t" - "EOR r8, r8, lr, ROR #8\n\t" - "UBFX lr, r7, #8, #8\n\t" - "EOR r8, r8, r2, ROR #16\n\t" - "UBFX r2, r6, #0, #8\n\t" + "EOR r8, r8, r12, ROR #8\n\t" + "UBFX r12, r7, #8, #8\n\t" + "EOR r8, r8, lr, ROR #16\n\t" + "UBFX lr, r6, #0, #8\n\t" "LDR r9, [%[td], r9, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r10, r5, #16, #8\n\t" "EOR r9, r9, r11, ROR #24\n\t" "LSR r11, r6, #24\n\t" - "EOR r9, r9, lr, ROR #8\n\t" - "UBFX lr, r4, #8, #8\n\t" - "EOR r9, r9, r2, ROR #16\n\t" - "UBFX r2, r7, #0, #8\n\t" + "EOR r9, r9, r12, ROR #8\n\t" + "UBFX r12, r4, #8, #8\n\t" + "EOR r9, r9, lr, ROR #16\n\t" + "UBFX lr, r7, #0, #8\n\t" "LDR r10, [%[td], r10, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR lr, [%[td], lr, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" "UBFX r4, r4, #0, #8\n\t" "EOR r10, r10, r11, ROR #24\n\t" "UBFX r11, r6, #16, #8\n\t" - "EOR r10, r10, lr, ROR #8\n\t" - "LSR lr, r7, #24\n\t" - "EOR r10, r10, r2, ROR #16\n\t" - "UBFX r2, r5, #8, #8\n\t" + "EOR r10, r10, r12, ROR #8\n\t" + "LSR r12, r7, #24\n\t" + "EOR r10, r10, lr, ROR #16\n\t" + "UBFX lr, r5, #8, #8\n\t" "LDR r4, [%[td], r4, LSL #2]\n\t" - "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r12, [%[td], r12, LSL #2]\n\t" "LDR r11, [%[td], r11, LSL #2]\n\t" - "LDR r2, [%[td], r2, LSL #2]\n\t" - "EOR lr, lr, r4, ROR #24\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "EOR r12, r12, r4, ROR #24\n\t" "LDM r3!, {r4, r5, r6, r7}\n\t" - "EOR r11, r11, r2, ROR #8\n\t" - "EOR r11, r11, lr, ROR #24\n\t" + "EOR r11, r11, lr, ROR #8\n\t" + "EOR r11, r11, r12, ROR #24\n\t" /* XOR in Key Schedule */ "EOR r8, r8, r4\n\t" "EOR r9, r9, r5\n\t" @@ -1346,55 +1347,55 @@ void AES_decrypt_block(const uint32_t* td_p, int nr_p) "EOR r11, r11, r7\n\t" "UBFX r4, r9, #0, #8\n\t" "UBFX r7, r10, #8, #8\n\t" - "UBFX lr, r11, #16, #8\n\t" - "LSR r2, r8, #24\n\t" - "LDRB r4, [r12, r4]\n\t" - "LDRB r7, [r12, r7]\n\t" - "LDRB lr, [r12, lr]\n\t" - "LDRB r2, [r12, r2]\n\t" + "UBFX r12, r11, #16, #8\n\t" + "LSR lr, r8, #24\n\t" + "LDRB r4, [%[td4], r4]\n\t" + "LDRB r7, [%[td4], r7]\n\t" + "LDRB r12, [%[td4], r12]\n\t" + "LDRB lr, [%[td4], lr]\n\t" "UBFX r5, r10, #0, #8\n\t" "EOR r4, r4, r7, LSL #8\n\t" "UBFX r7, r11, #8, #8\n\t" - "EOR r4, r4, lr, LSL #16\n\t" - "UBFX lr, r8, #16, #8\n\t" - "EOR r4, r4, r2, LSL #24\n\t" - "LSR r2, r9, #24\n\t" - "LDRB r7, [r12, r7]\n\t" - "LDRB r2, [r12, r2]\n\t" - "LDRB r5, [r12, r5]\n\t" - "LDRB lr, [r12, lr]\n\t" + "EOR r4, r4, r12, LSL #16\n\t" + "UBFX r12, r8, #16, #8\n\t" + "EOR r4, r4, lr, LSL #24\n\t" + "LSR lr, r9, #24\n\t" + "LDRB r7, [%[td4], r7]\n\t" + "LDRB lr, [%[td4], lr]\n\t" + "LDRB r5, [%[td4], r5]\n\t" + "LDRB r12, [%[td4], r12]\n\t" "UBFX r6, r11, #0, #8\n\t" "EOR r5, r5, r7, LSL #8\n\t" "UBFX r7, r8, #8, #8\n\t" - "EOR r5, r5, lr, LSL #16\n\t" - "UBFX lr, r9, #16, #8\n\t" - "EOR r5, r5, r2, LSL #24\n\t" - "LSR r2, r10, #24\n\t" - "LDRB r7, [r12, r7]\n\t" - "LDRB r2, [r12, r2]\n\t" - "LDRB r6, [r12, r6]\n\t" - "LDRB lr, [r12, lr]\n\t" + "EOR r5, r5, r12, LSL #16\n\t" + "UBFX r12, r9, #16, #8\n\t" + "EOR r5, r5, lr, LSL #24\n\t" + "LSR lr, r10, #24\n\t" + "LDRB r7, [%[td4], r7]\n\t" + "LDRB lr, [%[td4], lr]\n\t" + "LDRB r6, [%[td4], r6]\n\t" + "LDRB r12, [%[td4], r12]\n\t" "LSR r11, r11, #24\n\t" "EOR r6, r6, r7, LSL #8\n\t" "UBFX r7, r8, #0, #8\n\t" - "EOR r6, r6, lr, LSL #16\n\t" - "UBFX lr, r9, #8, #8\n\t" - "EOR r6, r6, r2, LSL #24\n\t" - "UBFX r2, r10, #16, #8\n\t" - "LDRB r11, [r12, r11]\n\t" - "LDRB lr, [r12, lr]\n\t" - "LDRB r7, [r12, r7]\n\t" - "LDRB r2, [r12, r2]\n\t" - "EOR lr, lr, r11, LSL #16\n\t" + "EOR r6, r6, r12, LSL #16\n\t" + "UBFX r12, r9, #8, #8\n\t" + "EOR r6, r6, lr, LSL #24\n\t" + "UBFX lr, r10, #16, #8\n\t" + "LDRB r11, [%[td4], r11]\n\t" + "LDRB r12, [%[td4], r12]\n\t" + "LDRB r7, [%[td4], r7]\n\t" + "LDRB lr, [%[td4], lr]\n\t" + "EOR r12, r12, r11, LSL #16\n\t" "LDM r3, {r8, r9, r10, r11}\n\t" - "EOR r7, r7, lr, LSL #8\n\t" - "EOR r7, r7, r2, LSL #16\n\t" + "EOR r7, r7, r12, LSL #8\n\t" + "EOR r7, r7, lr, LSL #16\n\t" /* XOR in Key Schedule */ "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" - : [td] "+r" (td), [nr] "+r" (nr) + : [td] "+r" (td), [nr] "+r" (nr), [td4] "+r" (td4) : : "memory", "lr" ); @@ -1453,7 +1454,8 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "MOV r8, r4\n\t" "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" - "MOV r12, %[L_AES_Thumb2_td4]\n\t" + "MOV r12, %[len]\n\t" + "MOV r2, %[L_AES_Thumb2_td4]\n\t" "CMP r8, #0xa\n\t" "BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t" "CMP r8, #0xc\n\t" @@ -1468,7 +1470,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "PUSH {r1, r2, %[ks], lr}\n\t" + "PUSH {r1, %[ks], r12, lr}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" @@ -1477,7 +1479,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "EOR r7, r7, r11\n\t" "MOV r1, #0x6\n\t" "BL AES_decrypt_block\n\t" - "POP {r1, r2, %[ks], lr}\n\t" + "POP {r1, %[ks], r12, lr}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1486,7 +1488,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t" @@ -1503,7 +1505,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "PUSH {r1, r2, %[ks], lr}\n\t" + "PUSH {r1, %[ks], r12, lr}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" @@ -1512,7 +1514,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "EOR r7, r7, r11\n\t" "MOV r1, #0x5\n\t" "BL AES_decrypt_block\n\t" - "POP {r1, r2, %[ks], lr}\n\t" + "POP {r1, %[ks], r12, lr}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1521,7 +1523,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t" @@ -1538,7 +1540,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r5, r5\n\t" "REV r6, r6\n\t" "REV r7, r7\n\t" - "PUSH {r1, r2, %[ks], lr}\n\t" + "PUSH {r1, %[ks], r12, lr}\n\t" "LDM %[ks]!, {r8, r9, r10, r11}\n\t" /* Round: 0 - XOR in key schedule */ "EOR r4, r4, r8\n\t" @@ -1547,7 +1549,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "EOR r7, r7, r11\n\t" "MOV r1, #0x4\n\t" "BL AES_decrypt_block\n\t" - "POP {r1, r2, %[ks], lr}\n\t" + "POP {r1, %[ks], r12, lr}\n\t" "REV r4, r4\n\t" "REV r5, r5\n\t" "REV r6, r6\n\t" @@ -1556,7 +1558,7 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t" @@ -1589,7 +1591,8 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "MOV r4, r5\n\t" "MOV lr, %[in]\n\t" "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" - "MOV r12, %[L_AES_Thumb2_td4]\n\t" + "MOV r12, %[len]\n\t" + "MOV r2, %[L_AES_Thumb2_td4]\n\t" "PUSH {%[ks], r4}\n\t" "CMP r8, #0xa\n\t" "BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t" @@ -1597,7 +1600,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t" "\n" "L_AES_CBC_decrypt_loop_block_256_%=:\n\t" - "PUSH {r1, r2, lr}\n\t" + "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1623,7 +1626,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r6, r6\n\t" "REV r7, r7\n\t" "LDM lr, {r8, r9, r10, r11}\n\t" - "POP {r1, r2, lr}\n\t" + "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1633,11 +1636,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" - "PUSH {r1, r2, lr}\n\t" + "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1664,7 +1667,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r7, r7\n\t" "LDRD r8, r9, [lr, #16]\n\t" "LDRD r10, r11, [lr, #24]\n\t" - "POP {r1, r2, lr}\n\t" + "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1674,14 +1677,14 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t" "B L_AES_CBC_decrypt_end_%=\n\t" "\n" "L_AES_CBC_decrypt_loop_block_192_%=:\n\t" - "PUSH {r1, r2, lr}\n\t" + "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1707,7 +1710,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r6, r6\n\t" "REV r7, r7\n\t" "LDM lr, {r8, r9, r10, r11}\n\t" - "POP {r1, r2, lr}\n\t" + "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1717,11 +1720,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" - "PUSH {r1, r2, lr}\n\t" + "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1748,7 +1751,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r7, r7\n\t" "LDRD r8, r9, [lr, #16]\n\t" "LDRD r10, r11, [lr, #24]\n\t" - "POP {r1, r2, lr}\n\t" + "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1758,14 +1761,14 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t" "B L_AES_CBC_decrypt_end_%=\n\t" "\n" "L_AES_CBC_decrypt_loop_block_128_%=:\n\t" - "PUSH {r1, r2, lr}\n\t" + "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1791,7 +1794,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r6, r6\n\t" "REV r7, r7\n\t" "LDM lr, {r8, r9, r10, r11}\n\t" - "POP {r1, r2, lr}\n\t" + "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1801,11 +1804,11 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" - "PUSH {r1, r2, lr}\n\t" + "PUSH {r1, r12, lr}\n\t" "LDR r4, [lr]\n\t" "LDR r5, [lr, #4]\n\t" "LDR r6, [lr, #8]\n\t" @@ -1832,7 +1835,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "REV r7, r7\n\t" "LDRD r8, r9, [lr, #16]\n\t" "LDRD r10, r11, [lr, #24]\n\t" - "POP {r1, r2, lr}\n\t" + "POP {r1, r12, lr}\n\t" "LDR %[ks], [sp]\n\t" "EOR r4, r4, r8\n\t" "EOR r5, r5, r9\n\t" @@ -1842,7 +1845,7 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "STR r5, [%[out], #4]\n\t" "STR r6, [%[out], #8]\n\t" "STR r7, [%[out], #12]\n\t" - "SUBS %[len], %[len], #0x10\n\t" + "SUBS r12, r12, #0x10\n\t" "ADD lr, lr, #0x10\n\t" "ADD %[out], %[out], #0x10\n\t" "BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index 65911ca40..d46e13d05 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -277,33 +277,38 @@ fe_tobytes: .globl fe_1 .type fe_1, %function fe_1: + PUSH {r4, r5, r6, r7, r8, r9, lr} # Set one MOV r2, #0x1 MOV r3, #0x0 - STM r0!, {r2, r3} - MOV r2, #0x0 - STM r0!, {r2, r3} - STM r0!, {r2, r3} - STM r0!, {r2, r3} - SUB r0, r0, #0x20 - BX lr - # Cycle Count = 20 + MOV r4, #0x0 + MOV r5, #0x0 + MOV r6, #0x0 + MOV r7, #0x0 + MOV r8, #0x0 + MOV r9, #0x0 + STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} + POP {r4, r5, r6, r7, r8, r9, pc} + # Cycle Count = 33 .size fe_1,.-fe_1 .text .align 4 .globl fe_0 .type fe_0, %function fe_0: + PUSH {r4, r5, r6, r7, r8, r9, lr} # Set zero MOV r2, #0x0 MOV r3, #0x0 - STM r0!, {r2, r3} - STM r0!, {r2, r3} - STM r0!, {r2, r3} - STM r0!, {r2, r3} - SUB r0, r0, #0x20 - BX lr - # Cycle Count = 19 + MOV r4, #0x0 + MOV r5, #0x0 + MOV r6, #0x0 + MOV r7, #0x0 + MOV r8, #0x0 + MOV r9, #0x0 + STM r0, {r2, r3, r4, r5, r6, r7, r8, r9} + POP {r4, r5, r6, r7, r8, r9, pc} + # Cycle Count = 33 .size fe_0,.-fe_0 .text .align 4 @@ -406,6 +411,7 @@ fe_isnegative: POP {r4, r5, pc} # Cycle Count = 31 .size fe_isnegative,.-fe_isnegative +#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) #ifndef WC_NO_CACHE_RESISTANT .text .align 4 @@ -1482,6 +1488,7 @@ fe_cmov_table: # Cycle Count = 160 .size fe_cmov_table,.-fe_cmov_table #endif /* WC_NO_CACHE_RESISTANT */ +#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */ #endif /* HAVE_ED25519 */ .text .align 4 @@ -1803,34 +1810,20 @@ curve25519: STR r2, [sp, #168] MOV r1, #0x0 STR r1, [sp, #172] - # Set one - MOV r10, #0x1 - MOV r11, #0x0 - STM r0!, {r10, r11} - MOV r10, #0x0 - STM r0!, {r10, r11} - STM r0!, {r10, r11} - STM r0!, {r10, r11} - SUB r0, r0, #0x20 - MOV r3, sp - # Set zero + MOV r4, #0x1 + MOV r5, #0x0 + MOV r6, #0x0 + MOV r7, #0x0 + MOV r8, #0x0 + MOV r9, #0x0 MOV r10, #0x0 MOV r11, #0x0 - STM r3!, {r10, r11} - STM r3!, {r10, r11} - STM r3!, {r10, r11} - STM r3!, {r10, r11} - SUB r3, r3, #0x20 + STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x20 - # Set one - MOV r10, #0x1 - MOV r11, #0x0 - STM r3!, {r10, r11} - MOV r10, #0x0 - STM r3!, {r10, r11} - STM r3!, {r10, r11} - STM r3!, {r10, r11} - SUB r3, r3, #0x20 + STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} + MOV r4, #0x0 + MOV r3, sp + STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x40 # Copy LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} @@ -2177,7 +2170,7 @@ L_curve25519_inv_8: MOV r0, #0x0 ADD sp, sp, #0xbc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 693 + # Cycle Count = 682 .size curve25519,.-curve25519 #else .text @@ -2197,34 +2190,20 @@ curve25519: STR r4, [sp, #188] MOV r1, #0x0 STR r1, [sp, #164] - # Set one - MOV r10, #0x1 - MOV r11, #0x0 - STM r0!, {r10, r11} - MOV r10, #0x0 - STM r0!, {r10, r11} - STM r0!, {r10, r11} - STM r0!, {r10, r11} - SUB r0, r0, #0x20 - MOV r3, sp - # Set zero + MOV r4, #0x1 + MOV r5, #0x0 + MOV r6, #0x0 + MOV r7, #0x0 + MOV r8, #0x0 + MOV r9, #0x0 MOV r10, #0x0 MOV r11, #0x0 - STM r3!, {r10, r11} - STM r3!, {r10, r11} - STM r3!, {r10, r11} - STM r3!, {r10, r11} - SUB r3, r3, #0x20 + STM r0, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x20 - # Set one - MOV r10, #0x1 - MOV r11, #0x0 - STM r3!, {r10, r11} - MOV r10, #0x0 - STM r3!, {r10, r11} - STM r3!, {r10, r11} - STM r3!, {r10, r11} - SUB r3, r3, #0x20 + STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} + MOV r4, #0x0 + MOV r3, sp + STM r3, {r4, r5, r6, r7, r8, r9, r10, r11} ADD r3, sp, #0x40 # Copy LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} @@ -2487,7 +2466,7 @@ L_curve25519_inv_8: MOV r0, #0x0 ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 600 + # Cycle Count = 589 .size curve25519,.-curve25519 #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_CURVE25519 */ @@ -3659,6 +3638,7 @@ sc_reduce: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} # Cycle Count = 482 .size sc_reduce,.-sc_reduce +#ifdef HAVE_ED25519_SIGN .text .align 4 .globl sc_muladd @@ -4061,6 +4041,7 @@ sc_muladd: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} # Cycle Count = 728 .size sc_muladd,.-sc_muladd +#endif /* HAVE_ED25519_SIGN */ #endif /* HAVE_ED25519 */ #endif /* !CURVE25519_SMALL || !ED25519_SMALL */ diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index 531137ba7..43c79b5ed 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -310,15 +310,16 @@ void fe_1(fe n_p) /* Set one */ "MOV r2, #0x1\n\t" "MOV r3, #0x0\n\t" - "STM %[n]!, {r2, r3}\n\t" - "MOV r2, #0x0\n\t" - "STM %[n]!, {r2, r3}\n\t" - "STM %[n]!, {r2, r3}\n\t" - "STM %[n]!, {r2, r3}\n\t" - "SUB %[n], %[n], #0x20\n\t" + "MOV r4, #0x0\n\t" + "MOV r5, #0x0\n\t" + "MOV r6, #0x0\n\t" + "MOV r7, #0x0\n\t" + "MOV r8, #0x0\n\t" + "MOV r9, #0x0\n\t" + "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" : [n] "+r" (n) : - : "memory", "r2", "r3" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -330,14 +331,16 @@ void fe_0(fe n_p) /* Set zero */ "MOV r2, #0x0\n\t" "MOV r3, #0x0\n\t" - "STM %[n]!, {r2, r3}\n\t" - "STM %[n]!, {r2, r3}\n\t" - "STM %[n]!, {r2, r3}\n\t" - "STM %[n]!, {r2, r3}\n\t" - "SUB %[n], %[n], #0x20\n\t" + "MOV r4, #0x0\n\t" + "MOV r5, #0x0\n\t" + "MOV r6, #0x0\n\t" + "MOV r7, #0x0\n\t" + "MOV r8, #0x0\n\t" + "MOV r9, #0x0\n\t" + "STM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" : [n] "+r" (n) : - : "memory", "r2", "r3" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -454,6 +457,7 @@ int fe_isnegative(const fe a_p) return (uint32_t)(size_t)a; } +#if defined(HAVE_ED25519_MAKE_KEY) || defined(HAVE_ED25519_SIGN) #ifndef WC_NO_CACHE_RESISTANT void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) { @@ -1538,6 +1542,7 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) } #endif /* WC_NO_CACHE_RESISTANT */ +#endif /* HAVE_ED25519_MAKE_KEY || HAVE_ED25519_SIGN */ #endif /* HAVE_ED25519 */ void fe_mul_op(void); void fe_mul_op() @@ -1872,34 +1877,20 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "STR %[a], [sp, #168]\n\t" "MOV %[n], #0x0\n\t" "STR %[n], [sp, #172]\n\t" - /* Set one */ - "MOV r10, #0x1\n\t" - "MOV r11, #0x0\n\t" - "STM %[r]!, {r10, r11}\n\t" - "MOV r10, #0x0\n\t" - "STM %[r]!, {r10, r11}\n\t" - "STM %[r]!, {r10, r11}\n\t" - "STM %[r]!, {r10, r11}\n\t" - "SUB %[r], %[r], #0x20\n\t" - "MOV r3, sp\n\t" - /* Set zero */ + "MOV r4, #0x1\n\t" + "MOV r5, #0x0\n\t" + "MOV r6, #0x0\n\t" + "MOV r7, #0x0\n\t" + "MOV r8, #0x0\n\t" + "MOV r9, #0x0\n\t" "MOV r10, #0x0\n\t" "MOV r11, #0x0\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "SUB r3, r3, #0x20\n\t" + "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x20\n\t" - /* Set one */ - "MOV r10, #0x1\n\t" - "MOV r11, #0x0\n\t" - "STM r3!, {r10, r11}\n\t" - "MOV r10, #0x0\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "SUB r3, r3, #0x20\n\t" + "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "MOV r4, #0x0\n\t" + "MOV r3, sp\n\t" + "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -2281,34 +2272,20 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) "STR r4, [sp, #188]\n\t" "MOV %[n], #0x0\n\t" "STR %[n], [sp, #164]\n\t" - /* Set one */ - "MOV r10, #0x1\n\t" - "MOV r11, #0x0\n\t" - "STM %[r]!, {r10, r11}\n\t" - "MOV r10, #0x0\n\t" - "STM %[r]!, {r10, r11}\n\t" - "STM %[r]!, {r10, r11}\n\t" - "STM %[r]!, {r10, r11}\n\t" - "SUB %[r], %[r], #0x20\n\t" - "MOV r3, sp\n\t" - /* Set zero */ + "MOV r4, #0x1\n\t" + "MOV r5, #0x0\n\t" + "MOV r6, #0x0\n\t" + "MOV r7, #0x0\n\t" + "MOV r8, #0x0\n\t" + "MOV r9, #0x0\n\t" "MOV r10, #0x0\n\t" "MOV r11, #0x0\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "SUB r3, r3, #0x20\n\t" + "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x20\n\t" - /* Set one */ - "MOV r10, #0x1\n\t" - "MOV r11, #0x0\n\t" - "STM r3!, {r10, r11}\n\t" - "MOV r10, #0x0\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "STM r3!, {r10, r11}\n\t" - "SUB r3, r3, #0x20\n\t" + "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "MOV r4, #0x0\n\t" + "MOV r3, sp\n\t" + "STM r3, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -3808,6 +3785,7 @@ void sc_reduce(byte* s_p) ); } +#ifdef HAVE_ED25519_SIGN void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) { register byte* s asm ("r0") = (byte*)s_p; @@ -4215,6 +4193,7 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) ); } +#endif /* HAVE_ED25519_SIGN */ #endif /* HAVE_ED25519 */ #endif /* !CURVE25519_SMALL || !ED25519_SMALL */