diff --git a/configure.ac b/configure.ac index 16f0ba138..eca16ae45 100644 --- a/configure.ac +++ b/configure.ac @@ -2268,7 +2268,7 @@ AC_ARG_ENABLE([aescbc], if test "$ENABLED_AESCBC" = "no" then AM_CFLAGS="$AM_CFLAGS -DNO_AES_CBC" - AM_CCASFLAGS="$AM_CCASFLAGS -DHAVE_AES_CBC" + AM_CCASFLAGS="$AM_CCASFLAGS -DNO_AES_CBC" fi # AES-CBC length checks (checks that input lengths are multiples of block size) diff --git a/src/include.am b/src/include.am index 7efe27e39..8e5a42af0 100644 --- a/src/include.am +++ b/src/include.am @@ -157,8 +157,8 @@ endif if BUILD_AES src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c -if BUILD_ARMASM_NEON src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c +if BUILD_ARMASM_NEON if !BUILD_ARMASM_CRYPTO if BUILD_ARMASM_INLINE src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -166,7 +166,15 @@ else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_CRYPTO -endif BUILD_ARMASM_NEON +else +if BUILD_ARMASM +if BUILD_ARMASM_INLINE +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +else +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S +endif !BUILD_ARMASM_INLINE +endif BUILD_ARMASM +endif !BUILD_ARMASM_NEON endif BUILD_AES if BUILD_AESNI @@ -401,16 +409,26 @@ endif if !BUILD_FIPS_CURRENT if BUILD_AES src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/aes.c -if BUILD_ARMASM_NEON src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-aes.c +if BUILD_ARMASM_NEON if !BUILD_ARMASM_CRYPTO if BUILD_ARMASM_INLINE src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-aes-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S endif !BUILD_ARMASM_INLINE endif !BUILD_ARMASM_CRYPTO -endif BUILD_ARMASM_NEON +else +if BUILD_ARMASM +if BUILD_ARMASM_INLINE +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +else +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-aes-asm.S +endif !BUILD_ARMASM_INLINE +endif BUILD_ARMASM +endif !BUILD_ARMASM_NEON if BUILD_AFALG src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/af_alg/afalg_aes.c endif BUILD_AFALG diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index a615488f1..17da9d652 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -97,7 +97,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #include #endif -#if !defined(WOLFSSL_ARMASM) || defined(WOLFSSL_ARMASM_NO_NEON) +#ifndef WOLFSSL_ARMASM #ifdef WOLFSSL_IMX6_CAAM_BLOB /* case of possibly not using hardware acceleration for AES but using key @@ -4573,7 +4573,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #endif /* NEED_AES_CTR_SOFT */ #endif /* WOLFSSL_AES_COUNTER */ -#endif /* !WOLFSSL_ARMASM || WOLFSSL_ARMASM_NO_NEON */ +#endif /* !WOLFSSL_ARMASM */ /* @@ -4620,7 +4620,7 @@ static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) #endif -#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON) +#ifdef WOLFSSL_ARMASM /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */ #elif defined(WOLFSSL_AFALG) @@ -8851,7 +8851,7 @@ int wc_AesCcmCheckTagSize(int sz) return 0; } -#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON) +#ifdef WOLFSSL_ARMASM /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */ #elif defined(HAVE_COLDFIRE_SEC) diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c index 39a709db2..995e1c59c 100644 --- a/wolfcrypt/src/ge_operations.c +++ b/wolfcrypt/src/ge_operations.c @@ -921,10 +921,14 @@ int ge_compress_key(byte* out, const byte* xIn, const byte* yIn, word32 keySz) { ge_p2 g; byte bArray[ED25519_KEY_SIZE]; + byte x[ED25519_KEY_SIZE]; + byte y[ED25519_KEY_SIZE]; word32 i; - fe_frombytes(g.X, xIn); - fe_frombytes(g.Y, yIn); + XMEMCPY(x, xIn, ED25519_KEY_SIZE); + XMEMCPY(y, yIn, ED25519_KEY_SIZE); + fe_frombytes(g.X, x); + fe_frombytes(g.Y, y); fe_1(g.Z); ge_tobytes(bArray, &g); diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index ccfbdeb7e..cc6830f95 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -33,6 +33,7 @@ #if !defined(__aarch64__) && defined(__arm__) #ifndef WOLFSSL_ARMASM_INLINE #ifndef NO_AES +#ifdef HAVE_AES_DECRYPT .text .type L_AES_ARM32_td_data, %object .size L_AES_ARM32_td_data, 1024 @@ -294,6 +295,8 @@ L_AES_ARM32_td_data: .word 0x70d532b6 .word 0x74486c5c .word 0x42d0b857 +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_ARM32_te_data, %object .size L_AES_ARM32_te_data, 1024 @@ -555,18 +558,23 @@ L_AES_ARM32_te_data: .word 0xfca85454 .word 0xd66dbbbb .word 0x3a2c1616 +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT .text .type L_AES_ARM32_td, %object .size L_AES_ARM32_td, 12 .align 4 L_AES_ARM32_td: .word L_AES_ARM32_td_data +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_ARM32_te, %object .size L_AES_ARM32_te, 12 .align 4 L_AES_ARM32_te: .word L_AES_ARM32_te_data +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT .text .align 4 @@ -574,8 +582,10 @@ L_AES_ARM32_te: .type AES_invert_key, %function AES_invert_key: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} - ldr r12, L_AES_ARM32_te - ldr lr, L_AES_ARM32_td + adr r12, L_AES_ARM32_te + ldr r12, [r12] + adr lr, L_AES_ARM32_td + ldr lr, [lr] add r10, r0, r1, lsl #4 mov r11, r1 L_AES_invert_key_loop: @@ -681,7 +691,8 @@ L_AES_ARM32_rcon: .type AES_set_encrypt_key, %function AES_set_encrypt_key: push {r4, r5, r6, r7, r8, lr} - ldr r8, L_AES_ARM32_te + adr r8, L_AES_ARM32_te + ldr r8, [r8] adr lr, L_AES_ARM32_rcon cmp r1, #0x80 beq L_AES_set_encrypt_key_start_128 @@ -911,7 +922,6 @@ L_AES_set_encrypt_key_loop_128: L_AES_set_encrypt_key_end: pop {r4, r5, r6, r7, r8, pc} .size AES_set_encrypt_key,.-AES_set_encrypt_key -#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .align 4 .globl AES_encrypt_block @@ -1123,12 +1133,14 @@ L_AES_encrypt_block_nr: eor r7, r7, r11 pop {pc} .size AES_encrypt_block,.-AES_encrypt_block +#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .type L_AES_ARM32_te_ecb, %object .size L_AES_ARM32_te_ecb, 12 .align 4 L_AES_ARM32_te_ecb: .word L_AES_ARM32_te_data +#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) .text .align 4 @@ -1137,7 +1149,8 @@ L_AES_ARM32_te_ecb: AES_ECB_encrypt: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} mov lr, r0 - ldr r0, L_AES_ARM32_te_ecb + adr r0, L_AES_ARM32_te_ecb + ldr r0, [r0] ldr r12, [sp, #36] push {r3} cmp r12, #10 @@ -1259,7 +1272,8 @@ AES_CBC_encrypt: ldr r8, [sp, #36] ldr r9, [sp, #40] mov lr, r0 - ldr r0, L_AES_ARM32_te_ecb + adr r0, L_AES_ARM32_te_ecb + ldr r0, [r0] ldm r9, {r4, r5, r6, r7} push {r3, r9} cmp r8, #10 @@ -1394,7 +1408,8 @@ AES_CTR_encrypt: ldr r12, [sp, #36] ldr r8, [sp, #40] mov lr, r0 - ldr r0, L_AES_ARM32_te_ecb + adr r0, L_AES_ARM32_te_ecb + ldr r0, [r0] ldm r8, {r4, r5, r6, r7} rev r4, r4 rev r5, r5 @@ -1540,7 +1555,6 @@ L_AES_CTR_encrypt_end: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size AES_CTR_encrypt,.-AES_CTR_encrypt #endif /* WOLFSSL_AES_COUNTER */ -#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) .text @@ -2030,7 +2044,8 @@ AES_ECB_decrypt: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ldr r8, [sp, #36] mov lr, r0 - ldr r0, L_AES_ARM32_td_ecb + adr r0, L_AES_ARM32_td_ecb + ldr r0, [r0] adr r12, L_AES_ARM32_td4 cmp r8, #10 beq L_AES_ECB_decrypt_start_block_128 @@ -2147,7 +2162,8 @@ AES_CBC_decrypt: ldr r8, [sp, #36] ldr r4, [sp, #40] mov lr, r0 - ldr r0, L_AES_ARM32_td_ecb + adr r0, L_AES_ARM32_td_ecb + ldr r0, [r0] adr r12, L_AES_ARM32_td4 push {r3, r4} cmp r8, #10 @@ -3118,7 +3134,8 @@ AES_GCM_encrypt: ldr r12, [sp, #36] ldr r8, [sp, #40] mov lr, r0 - ldr r0, L_AES_ARM32_te_gcm + adr r0, L_AES_ARM32_te_gcm + ldr r0, [r0] ldm r8, {r4, r5, r6, r7} rev r4, r4 rev r5, r5 diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index 84e0ef62c..39959155e 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -28,6 +28,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && defined(__arm__) @@ -36,10 +37,12 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM_INLINE #ifndef NO_AES #include +#ifdef HAVE_AES_DECRYPT static const uint32_t L_AES_ARM32_td_data[] = { 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, @@ -107,6 +110,8 @@ static const uint32_t L_AES_ARM32_td_data[] = { 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, }; +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const uint32_t L_AES_ARM32_te_data[] = { 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, @@ -174,18 +179,25 @@ static const uint32_t L_AES_ARM32_te_data[] = { 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, }; +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT static const uint32_t* L_AES_ARM32_td = L_AES_ARM32_td_data; +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const uint32_t* L_AES_ARM32_te = L_AES_ARM32_te_data; +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT void AES_invert_key(unsigned char* ks, word32 rounds); void AES_invert_key(unsigned char* ks_p, word32 rounds_p) { - register unsigned char* ks asm ("r0") = ks_p; - register word32 rounds asm ("r1") = rounds_p; + register unsigned char* ks asm ("r0") = (unsigned char*)ks_p; + register word32 rounds asm ("r1") = (word32)rounds_p; + register uint32_t* L_AES_ARM32_te_c asm ("r2") = (uint32_t*)L_AES_ARM32_te; + register uint32_t* L_AES_ARM32_td_c asm ("r3") = (uint32_t*)L_AES_ARM32_td; __asm__ __volatile__ ( - "ldr r12, %[L_AES_ARM32_te]\n\t" - "ldr lr, %[L_AES_ARM32_td]\n\t" + "mov r12, %[L_AES_ARM32_te]\n\t" + "mov lr, %[L_AES_ARM32_td]\n\t" "add r10, %[ks], %[rounds], lsl #4\n\t" "mov r11, %[rounds]\n\t" "\n" @@ -269,9 +281,9 @@ void AES_invert_key(unsigned char* ks_p, word32 rounds_p) "str r8, [%[ks]], #4\n\t" "subs r11, r11, #1\n\t" "bne L_AES_invert_key_mix_loop_%=\n\t" - : [ks] "+r" (ks), [rounds] "+r" (rounds) - : [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_td] "g" (L_AES_ARM32_td) - : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_td] "+r" (L_AES_ARM32_td_c) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -285,9 +297,11 @@ static const uint32_t L_AES_ARM32_rcon[] = { void AES_set_encrypt_key(const unsigned char* key, word32 len, unsigned char* ks); void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) { - register const unsigned char* key asm ("r0") = key_p; - register word32 len asm ("r1") = len_p; - register unsigned char* ks asm ("r2") = ks_p; + register const unsigned char* key asm ("r0") = (const unsigned char*)key_p; + register word32 len asm ("r1") = (word32)len_p; + register unsigned char* ks asm ("r2") = (unsigned char*)ks_p; + register uint32_t* L_AES_ARM32_te_c asm ("r3") = (uint32_t*)L_AES_ARM32_te; + register uint32_t* L_AES_ARM32_rcon_c asm ("r4") = (uint32_t*)&L_AES_ARM32_rcon; __asm__ __volatile__ ( "mov r8, %[L_AES_ARM32_te]\n\t" @@ -524,20 +538,19 @@ void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char "bne L_AES_set_encrypt_key_loop_128_%=\n\t" "\n" "L_AES_set_encrypt_key_end_%=: \n\t" - : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks) - : [L_AES_ARM32_te] "g" (L_AES_ARM32_te), [L_AES_ARM32_rcon] "g" (L_AES_ARM32_rcon) - : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_ARM32_te] "+r" (L_AES_ARM32_te_c), [L_AES_ARM32_rcon] "+r" (L_AES_ARM32_rcon_c) + : + : "memory", "r12", "lr", "r5", "r6", "r7", "r8" ); } -#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks); void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p) { - register const uint32_t* te asm ("r0") = te_p; - register int nr asm ("r1") = nr_p; - register int len asm ("r2") = len_p; - register const uint32_t* ks asm ("r3") = ks_p; + register const uint32_t* te asm ("r0") = (const uint32_t*)te_p; + register int nr asm ("r1") = (int)nr_p; + register int len asm ("r2") = (int)len_p; + register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p; __asm__ __volatile__ ( "\n" @@ -750,20 +763,23 @@ void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t ); } +#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) static const uint32_t* L_AES_ARM32_te_ecb = L_AES_ARM32_te_data; +#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) { - register const unsigned char* in asm ("r0") = in_p; - register unsigned char* out asm ("r1") = out_p; - register unsigned long len asm ("r2") = len_p; - register const unsigned char* ks asm ("r3") = ks_p; - register int nr asm ("r4") = nr_p; + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register uint32_t* L_AES_ARM32_te_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_te_ecb; __asm__ __volatile__ ( "mov lr, %[in]\n\t" - "ldr r0, %[L_AES_ARM32_te_ecb]\n\t" + "mov r0, %[L_AES_ARM32_te_ecb]\n\t" "mov r12, r4\n\t" "push {%[ks]}\n\t" "cmp r12, #10\n\t" @@ -878,9 +894,9 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "\n" "L_AES_ECB_encrypt_end_%=: \n\t" "pop {%[ks]}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr) - : [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb) - : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) + : + : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" ); (void)nr; } @@ -890,18 +906,19 @@ void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) { - register const unsigned char* in asm ("r0") = in_p; - register unsigned char* out asm ("r1") = out_p; - register unsigned long len asm ("r2") = len_p; - register const unsigned char* ks asm ("r3") = ks_p; - register int nr asm ("r4") = nr_p; - register unsigned char* iv asm ("r5") = iv_p; + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; + register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb; __asm__ __volatile__ ( "mov r8, r4\n\t" "mov r9, r5\n\t" "mov lr, %[in]\n\t" - "ldr r0, %[L_AES_ARM32_te_ecb]\n\t" + "mov r0, %[L_AES_ARM32_te_ecb]\n\t" "ldm r9, {r4, r5, r6, r7}\n\t" "push {%[ks], r9}\n\t" "cmp r8, #10\n\t" @@ -1029,9 +1046,9 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "L_AES_CBC_encrypt_end_%=: \n\t" "pop {%[ks], r9}\n\t" "stm r9, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv) - : [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb) - : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); (void)nr; (void)iv; @@ -1042,18 +1059,19 @@ void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) { - register const unsigned char* in asm ("r0") = in_p; - register unsigned char* out asm ("r1") = out_p; - register unsigned long len asm ("r2") = len_p; - register const unsigned char* ks asm ("r3") = ks_p; - register int nr asm ("r4") = nr_p; - register unsigned char* ctr asm ("r5") = ctr_p; + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; + register uint32_t* L_AES_ARM32_te_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_ecb; __asm__ __volatile__ ( "mov r12, r4\n\t" "mov r8, r5\n\t" "mov lr, %[in]\n\t" - "ldr r0, %[L_AES_ARM32_te_ecb]\n\t" + "mov r0, %[L_AES_ARM32_te_ecb]\n\t" "ldm r8, {r4, r5, r6, r7}\n\t" "rev r4, r4\n\t" "rev r5, r5\n\t" @@ -1202,23 +1220,22 @@ void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r6, r6\n\t" "rev r7, r7\n\t" "stm r8, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr) - : [L_AES_ARM32_te_ecb] "g" (L_AES_ARM32_te_ecb) - : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_ecb] "+r" (L_AES_ARM32_te_ecb_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); (void)nr; (void)ctr; } #endif /* WOLFSSL_AES_COUNTER */ -#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #ifdef HAVE_AES_DECRYPT #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) void AES_decrypt_block(const uint32_t* td, int nr); void AES_decrypt_block(const uint32_t* td_p, int nr_p) { - register const uint32_t* td asm ("r0") = td_p; - register int nr asm ("r1") = nr_p; + register const uint32_t* td asm ("r0") = (const uint32_t*)td_p; + register int nr asm ("r1") = (int)nr_p; __asm__ __volatile__ ( "\n" @@ -1471,17 +1488,19 @@ static const unsigned char L_AES_ARM32_td4[] = { void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr); void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) { - register const unsigned char* in asm ("r0") = in_p; - register unsigned char* out asm ("r1") = out_p; - register unsigned long len asm ("r2") = len_p; - register const unsigned char* ks asm ("r3") = ks_p; - register int nr asm ("r4") = nr_p; + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register uint32_t* L_AES_ARM32_td_ecb_c asm ("r5") = (uint32_t*)L_AES_ARM32_td_ecb; + register unsigned char* L_AES_ARM32_td4_c asm ("r6") = (unsigned char*)&L_AES_ARM32_td4; __asm__ __volatile__ ( "mov r8, r4\n\t" "mov lr, %[in]\n\t" - "ldr r0, %[L_AES_ARM32_td_ecb]\n\t" - "ldr r12, %[L_AES_ARM32_td4]\n\t" + "mov r0, %[L_AES_ARM32_td_ecb]\n\t" + "mov r12, %[L_AES_ARM32_td4]\n\t" "cmp r8, #10\n\t" "beq L_AES_ECB_decrypt_start_block_128_%=\n\t" "cmp r8, #12\n\t" @@ -1590,9 +1609,9 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "bne L_AES_ECB_decrypt_loop_block_128_%=\n\t" "\n" "L_AES_ECB_decrypt_end_%=: \n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr) - : [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4) - : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); (void)nr; } @@ -1602,19 +1621,21 @@ void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) { - register const unsigned char* in asm ("r0") = in_p; - register unsigned char* out asm ("r1") = out_p; - register unsigned long len asm ("r2") = len_p; - register const unsigned char* ks asm ("r3") = ks_p; - register int nr asm ("r4") = nr_p; - register unsigned char* iv asm ("r5") = iv_p; + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; + register uint32_t* L_AES_ARM32_td_ecb_c asm ("r6") = (uint32_t*)L_AES_ARM32_td_ecb; + register unsigned char* L_AES_ARM32_td4_c asm ("r7") = (unsigned char*)&L_AES_ARM32_td4; __asm__ __volatile__ ( "mov r8, r4\n\t" "mov r4, r5\n\t" "mov lr, %[in]\n\t" - "ldr r0, %[L_AES_ARM32_td_ecb]\n\t" - "ldr r12, %[L_AES_ARM32_td4]\n\t" + "mov r0, %[L_AES_ARM32_td_ecb]\n\t" + "mov r12, %[L_AES_ARM32_td4]\n\t" "push {%[ks]-r4}\n\t" "cmp r8, #10\n\t" "beq L_AES_CBC_decrypt_loop_block_128_%=\n\t" @@ -1992,9 +2013,9 @@ void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "\n" "L_AES_CBC_decrypt_end_%=: \n\t" "pop {%[ks]-r4}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv) - : [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4) - : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_ARM32_td_ecb] "+r" (L_AES_ARM32_td_ecb_c), [L_AES_ARM32_td4] "+r" (L_AES_ARM32_td4_c) + : + : "memory", "r12", "lr", "r8", "r9", "r10", "r11" ); (void)nr; (void)iv; @@ -2014,13 +2035,14 @@ static const uint32_t L_GCM_gmult_len_r[] = { void GCM_gmult_len(unsigned char* x, const unsigned char** m, const unsigned char* data, unsigned long len); void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) { - register unsigned char* x asm ("r0") = x_p; - register const unsigned char** m asm ("r1") = m_p; - register const unsigned char* data asm ("r2") = data_p; - register unsigned long len asm ("r3") = len_p; + register unsigned char* x asm ("r0") = (unsigned char*)x_p; + register const unsigned char** m asm ("r1") = (const unsigned char**)m_p; + register const unsigned char* data asm ("r2") = (const unsigned char*)data_p; + register unsigned long len asm ("r3") = (unsigned long)len_p; + register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r; __asm__ __volatile__ ( - "ldr lr, %[L_GCM_gmult_len_r]\n\t" + "mov lr, %[L_GCM_gmult_len_r]\n\t" "\n" "L_GCM_gmult_len_start_block_%=: \n\t" "push {r3}\n\t" @@ -2568,9 +2590,9 @@ void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned "subs %[len], %[len], #16\n\t" "add %[data], %[data], #16\n\t" "bne L_GCM_gmult_len_start_block_%=\n\t" - : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len) - : [L_AES_ARM32_td_ecb] "g" (L_AES_ARM32_td_ecb), [L_AES_ARM32_td4] "g" (L_AES_ARM32_td4), [L_GCM_gmult_len_r] "g" (L_GCM_gmult_len_r) - : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) + : + : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -2578,18 +2600,19 @@ static const uint32_t* L_AES_ARM32_te_gcm = L_AES_ARM32_te_data; void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) { - register const unsigned char* in asm ("r0") = in_p; - register unsigned char* out asm ("r1") = out_p; - register unsigned long len asm ("r2") = len_p; - register const unsigned char* ks asm ("r3") = ks_p; - register int nr asm ("r4") = nr_p; - register unsigned char* ctr asm ("r5") = ctr_p; + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; + register uint32_t* L_AES_ARM32_te_gcm_c asm ("r6") = (uint32_t*)L_AES_ARM32_te_gcm; __asm__ __volatile__ ( "mov r12, r4\n\t" "mov r8, r5\n\t" "mov lr, %[in]\n\t" - "ldr r0, %[L_AES_ARM32_te_gcm]\n\t" + "mov r0, %[L_AES_ARM32_te_gcm]\n\t" "ldm r8, {r4, r5, r6, r7}\n\t" "rev r4, r4\n\t" "rev r5, r5\n\t" @@ -2729,9 +2752,9 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l "rev r6, r6\n\t" "rev r7, r7\n\t" "stm r8, {r4, r5, r6, r7}\n\t" - : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr) - : [L_AES_ARM32_te_gcm] "g" (L_AES_ARM32_te_gcm) - : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_ARM32_te_gcm] "+r" (L_AES_ARM32_te_gcm_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" ); (void)nr; (void)ctr; diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 403c8c5ef..627b74edb 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -28,6 +28,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && defined(__arm__) @@ -36,6 +37,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM_INLINE /* Based on work by: Emil Lenngren * https://github.com/pornin/X25519-Cortex-M4 @@ -50,7 +52,6 @@ void fe_init() { - __asm__ __volatile__ ( "\n\t" : @@ -62,7 +63,6 @@ void fe_init() void fe_add_sub_op(void); void fe_add_sub_op() { - __asm__ __volatile__ ( /* Add-Sub */ #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) @@ -269,7 +269,6 @@ void fe_add_sub_op() void fe_sub_op(void); void fe_sub_op() { - __asm__ __volatile__ ( /* Sub */ "ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" @@ -307,9 +306,9 @@ void fe_sub_op() void fe_sub(fe r_p, const fe a_p, const fe b_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; - register const fe b asm ("r2") = b_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + register const sword32* b asm ("r2") = (const sword32*)b_p; __asm__ __volatile__ ( "bl fe_sub_op\n\t" @@ -322,7 +321,6 @@ void fe_sub(fe r_p, const fe a_p, const fe b_p) void fe_add_op(void); void fe_add_op() { - __asm__ __volatile__ ( /* Add */ "ldm r2!, {r6, r7, r8, r9, r10, r11, r12, lr}\n\t" @@ -361,9 +359,9 @@ void fe_add_op() void fe_add(fe r_p, const fe a_p, const fe b_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; - register const fe b asm ("r2") = b_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + register const sword32* b asm ("r2") = (const sword32*)b_p; __asm__ __volatile__ ( "bl fe_add_op\n\t" @@ -376,8 +374,8 @@ void fe_add(fe r_p, const fe a_p, const fe b_p) #ifdef HAVE_ED25519 void fe_frombytes(fe out_p, const unsigned char* in_p) { - register fe out asm ("r0") = out_p; - register const unsigned char* in asm ("r1") = in_p; + register sword32* out asm ("r0") = (sword32*)out_p; + register const unsigned char* in asm ("r1") = (const unsigned char*)in_p; __asm__ __volatile__ ( "ldm %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -391,8 +389,8 @@ void fe_frombytes(fe out_p, const unsigned char* in_p) void fe_tobytes(unsigned char* out_p, const fe n_p) { - register unsigned char* out asm ("r0") = out_p; - register const fe n asm ("r1") = n_p; + register unsigned char* out asm ("r0") = (unsigned char*)out_p; + register const sword32* n asm ("r1") = (const sword32*)n_p; __asm__ __volatile__ ( "ldm %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -424,7 +422,7 @@ void fe_tobytes(unsigned char* out_p, const fe n_p) void fe_1(fe n_p) { - register fe n asm ("r0") = n_p; + register sword32* n asm ("r0") = (sword32*)n_p; __asm__ __volatile__ ( /* Set one */ @@ -463,7 +461,7 @@ void fe_1(fe n_p) void fe_0(fe n_p) { - register fe n asm ("r0") = n_p; + register sword32* n asm ("r0") = (sword32*)n_p; __asm__ __volatile__ ( /* Set zero */ @@ -501,8 +499,8 @@ void fe_0(fe n_p) void fe_copy(fe r_p, const fe a_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; __asm__ __volatile__ ( /* Copy */ @@ -562,8 +560,8 @@ void fe_copy(fe r_p, const fe a_p) void fe_neg(fe r_p, const fe a_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; __asm__ __volatile__ ( "mvn lr, #0\n\t" @@ -589,7 +587,7 @@ void fe_neg(fe r_p, const fe a_p) int fe_isnonzero(const fe a_p) { - register const fe a asm ("r0") = a_p; + register const sword32* a asm ("r0") = (const sword32*)a_p; __asm__ __volatile__ ( "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -628,7 +626,7 @@ int fe_isnonzero(const fe a_p) int fe_isnegative(const fe a_p) { - register const fe a asm ("r0") = a_p; + register const sword32* a asm ("r0") = (const sword32*)a_p; __asm__ __volatile__ ( "ldm %[a]!, {r2, r3, r4, r5}\n\t" @@ -655,9 +653,9 @@ int fe_isnegative(const fe a_p) #ifndef WC_NO_CACHE_RESISTANT void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) { - register fe* r asm ("r0") = r_p; - register fe* base asm ("r1") = base_p; - register signed char b asm ("r2") = b_p; + register fe* r asm ("r0") = (fe*)r_p; + register fe* base asm ("r1") = (fe*)base_p; + register signed char b asm ("r2") = (signed char)b_p; __asm__ __volatile__ ( "sxtb %[b], %[b]\n\t" @@ -2364,9 +2362,9 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) #else void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) { - register fe* r asm ("r0") = r_p; - register fe* base asm ("r1") = base_p; - register signed char b asm ("r2") = b_p; + register fe* r asm ("r0") = (fe*)r_p; + register fe* base asm ("r1") = (fe*)base_p; + register signed char b asm ("r2") = (signed char)b_p; __asm__ __volatile__ ( "sxtb %[b], %[b]\n\t" @@ -2472,7 +2470,6 @@ void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) void fe_mul_op(void); void fe_mul_op() { - __asm__ __volatile__ ( "sub sp, sp, #44\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) @@ -2610,9 +2607,9 @@ void fe_mul_op() void fe_mul(fe r_p, const fe a_p, const fe b_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; - register const fe b asm ("r2") = b_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + register const sword32* b asm ("r2") = (const sword32*)b_p; __asm__ __volatile__ ( "bl fe_mul_op\n\t" @@ -2625,7 +2622,6 @@ void fe_mul(fe r_p, const fe a_p, const fe b_p) void fe_sq_op(void); void fe_sq_op() { - __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "str r0, [sp, #28]\n\t" @@ -2749,8 +2745,8 @@ void fe_sq_op() void fe_sq(fe r_p, const fe a_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; __asm__ __volatile__ ( "bl fe_sq_op\n\t" @@ -2762,8 +2758,8 @@ void fe_sq(fe r_p, const fe a_p) void fe_mul121666(fe r_p, fe a_p) { - register fe r asm ("r0") = r_p; - register fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register sword32* a asm ("r1") = (sword32*)a_p; __asm__ __volatile__ ( /* Multiply by 121666 */ @@ -2808,9 +2804,9 @@ void fe_mul121666(fe r_p, fe a_p) #ifndef WC_NO_CACHE_RESISTANT int curve25519(byte* r_p, const byte* n_p, const byte* a_p) { - register byte* r asm ("r0") = r_p; - register const byte* n asm ("r1") = n_p; - register const byte* a asm ("r2") = a_p; + register byte* r asm ("r0") = (byte*)r_p; + register const byte* n asm ("r1") = (const byte*)n_p; + register const byte* a asm ("r2") = (const byte*)a_p; __asm__ __volatile__ ( "sub sp, sp, #0xbc\n\t" @@ -3423,9 +3419,9 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) #else int curve25519(byte* r_p, const byte* n_p, const byte* a_p) { - register byte* r asm ("r0") = r_p; - register const byte* n asm ("r1") = n_p; - register const byte* a asm ("r2") = a_p; + register byte* r asm ("r0") = (byte*)r_p; + register const byte* n asm ("r1") = (const byte*)n_p; + register const byte* a asm ("r2") = (const byte*)a_p; __asm__ __volatile__ ( "sub sp, sp, #0xc0\n\t" @@ -3802,8 +3798,8 @@ int curve25519(byte* r_p, const byte* n_p, const byte* a_p) #ifdef HAVE_ED25519 void fe_invert(fe r_p, const fe a_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; __asm__ __volatile__ ( "sub sp, sp, #0x88\n\t" @@ -3972,8 +3968,8 @@ void fe_invert(fe r_p, const fe a_p) void fe_sq2(fe r_p, const fe a_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; __asm__ __volatile__ ( "sub sp, sp, #36\n\t" @@ -4138,8 +4134,8 @@ void fe_sq2(fe r_p, const fe a_p) void fe_pow22523(fe r_p, const fe a_p) { - register fe r asm ("r0") = r_p; - register const fe a asm ("r1") = a_p; + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; __asm__ __volatile__ ( "sub sp, sp, #0x68\n\t" @@ -4308,8 +4304,8 @@ void fe_pow22523(fe r_p, const fe a_p) void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) { - register ge_p2 * r asm ("r0") = r_p; - register const ge_p1p1 * p asm ("r1") = p_p; + register ge_p2 * r asm ("r0") = (ge_p2 *)r_p; + register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -4338,8 +4334,8 @@ void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) { - register ge_p3 * r asm ("r0") = r_p; - register const ge_p1p1 * p asm ("r1") = p_p; + register ge_p3 * r asm ("r0") = (ge_p3 *)r_p; + register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -4373,8 +4369,8 @@ void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) { - register ge_p1p1 * r asm ("r0") = r_p; - register const ge_p2 * p asm ("r1") = p_p; + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p; __asm__ __volatile__ ( "sub sp, sp, #8\n\t" @@ -4420,9 +4416,9 @@ void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) { - register ge_p1p1 * r asm ("r0") = r_p; - register const ge_p3 * p asm ("r1") = p_p; - register const ge_precomp * q asm ("r2") = q_p; + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; __asm__ __volatile__ ( "sub sp, sp, #12\n\t" @@ -4502,9 +4498,9 @@ void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) { - register ge_p1p1 * r asm ("r0") = r_p; - register const ge_p3 * p asm ("r1") = p_p; - register const ge_precomp * q asm ("r2") = q_p; + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; __asm__ __volatile__ ( "sub sp, sp, #12\n\t" @@ -4585,9 +4581,9 @@ void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) { - register ge_p1p1 * r asm ("r0") = r_p; - register const ge_p3 * p asm ("r1") = p_p; - register const ge_cached* q asm ("r2") = q_p; + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; __asm__ __volatile__ ( "sub sp, sp, #44\n\t" @@ -4668,9 +4664,9 @@ void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) { - register ge_p1p1 * r asm ("r0") = r_p; - register const ge_p3 * p asm ("r1") = p_p; - register const ge_cached* q asm ("r2") = q_p; + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; __asm__ __volatile__ ( "sub sp, sp, #44\n\t" @@ -4751,7 +4747,7 @@ void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) void sc_reduce(byte* s_p) { - register byte* s asm ("r0") = s_p; + register byte* s asm ("r0") = (byte*)s_p; __asm__ __volatile__ ( "sub sp, sp, #52\n\t" @@ -5163,10 +5159,10 @@ void sc_reduce(byte* s_p) void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) { - register byte* s asm ("r0") = s_p; - register const byte* a asm ("r1") = a_p; - register const byte* b asm ("r2") = b_p; - register const byte* c asm ("r3") = c_p; + register byte* s asm ("r0") = (byte*)s_p; + register const byte* a asm ("r1") = (const byte*)a_p; + register const byte* b asm ("r2") = (const byte*)b_p; + register const byte* c asm ("r3") = (const byte*)c_p; __asm__ __volatile__ ( "sub sp, sp, #0x50\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 97b53420a..49301d7dc 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -28,6 +28,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && defined(__arm__) @@ -36,6 +37,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM_INLINE #ifndef NO_SHA256 #include @@ -63,13 +65,13 @@ static const uint32_t L_SHA256_transform_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) { - register wc_Sha256* sha256 asm ("r0") = sha256_p; - register const byte* data asm ("r1") = data_p; - register word32 len asm ("r2") = len_p; + register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k; __asm__ __volatile__ ( "sub sp, sp, #0xc0\n\t" - "mov r3, %[L_SHA256_transform_len_k]\n\t" /* Copy digest to add in at end */ #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "ldr r4, [%[sha256]]\n\t" @@ -1587,9 +1589,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "add %[data], %[data], #0x40\n\t" "bne L_SHA256_transform_len_begin_%=\n\t" "add sp, sp, #0xc0\n\t" - : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len) - : [L_SHA256_transform_len_k] "g" (L_SHA256_transform_len_k) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -1619,9 +1621,10 @@ static const uint32_t L_SHA256_transform_neon_len_k[] = { void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) { - register wc_Sha256* sha256 asm ("r0") = sha256_p; - register const byte* data asm ("r1") = data_p; - register word32 len asm ("r2") = len_p; + register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint32_t* L_SHA256_transform_neon_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_neon_len_k; __asm__ __volatile__ ( "sub sp, sp, #24\n\t" @@ -2648,9 +2651,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) "str r10, [sp, #8]\n\t" "bne L_SHA256_transform_neon_len_begin_%=\n\t" "add sp, sp, #24\n\t" - : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len) - : [L_SHA256_transform_neon_len_k] "g" (L_SHA256_transform_neon_len_k) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11" + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_neon_len_k] "+r" (L_SHA256_transform_neon_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11" ); } diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index 1c4a7176f..ba50a88b8 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -7679,6 +7679,7 @@ L_SHA512_transform_neon_len_k: .type Transform_Sha512_Len, %function Transform_Sha512_Len: vpush {d8-d15} + adr r3, L_SHA512_transform_neon_len_k # Load digest into working vars vldm.64 r0, {d0-d7} # Start of loop processing a block @@ -7715,7 +7716,6 @@ L_SHA512_transform_neon_len_begin: vrev64.8 d30, d30 vrev64.8 d31, d31 #endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ - adr r3, L_SHA512_transform_neon_len_k mov r12, #4 # Start of 16 rounds L_SHA512_transform_neon_len_start: @@ -9164,6 +9164,7 @@ L_SHA512_transform_neon_len_start: #endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ vstm.64 r0, {d0-d7} subs r2, r2, #0x80 + sub r3, r3, #0x280 bne L_SHA512_transform_neon_len_begin vpop {d8-d15} bx lr diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 0a513ac04..0171ea883 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -28,6 +28,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM #if !defined(__aarch64__) && defined(__arm__) @@ -36,64 +37,65 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_SHA512 #include #ifdef WOLFSSL_ARMASM_NO_NEON static const uint64_t L_SHA512_transform_len_k[] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, - 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, - 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, - 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, - 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, - 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, - 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, - 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, - 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, - 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, - 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, - 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, - 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, - 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, - 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, - 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, - 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, - 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, - 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, - 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, - 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len); void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) { - register wc_Sha512* sha512 asm ("r0") = sha512_p; - register const byte* data asm ("r1") = data_p; - register word32 len asm ("r2") = len_p; + register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k; __asm__ __volatile__ ( "sub sp, sp, #0xc0\n\t" - "mov r3, %[L_SHA512_transform_len_k]\n\t" /* Copy digest to add in at end */ #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "ldr r4, [%[sha512]]\n\t" @@ -7392,9 +7394,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "bne L_SHA512_transform_len_begin_%=\n\t" "eor r0, r0, r0\n\t" "add sp, sp, #0xc0\n\t" - : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_transform_len_k] "g" (L_SHA512_transform_len_k) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -7403,54 +7405,55 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) #ifndef WOLFSSL_ARMASM_NO_NEON static const uint64_t L_SHA512_transform_neon_len_k[] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, - 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, - 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, - 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, - 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, - 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, - 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, - 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, - 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, - 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, - 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, - 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, - 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, - 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, - 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, - 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, - 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, - 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, - 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, - 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, - 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len); void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) { - register wc_Sha512* sha512 asm ("r0") = sha512_p; - register const byte* data asm ("r1") = data_p; - register word32 len asm ("r2") = len_p; + register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint64_t* L_SHA512_transform_neon_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_neon_len_k; __asm__ __volatile__ ( /* Load digest into working vars */ @@ -7490,7 +7493,6 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) "vrev64.8 d30, d30\n\t" "vrev64.8 d31, d31\n\t" #endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ - "mov r3, %[L_SHA512_transform_neon_len_k]\n\t" "mov r12, #4\n\t" /* Start of 16 rounds */ "\n" @@ -8940,10 +8942,11 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) #endif /* WOLFSSL_ARM_ARCH_NEON_64BIT */ "vstm.64 %[sha512], {d0-d7}\n\t" "subs %[len], %[len], #0x80\n\t" + "sub r3, r3, #0x280\n\t" "bne L_SHA512_transform_neon_len_begin_%=\n\t" - : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len) - : [L_SHA512_transform_neon_len_k] "g" (L_SHA512_transform_neon_len_k) - : "memory", "r3", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_neon_len_k] "+r" (L_SHA512_transform_neon_len_k_c) + : + : "memory", "r12", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } diff --git a/wolfcrypt/src/port/arm/armv8-aes.c b/wolfcrypt/src/port/arm/armv8-aes.c index e3a0ebaa4..c71df72bb 100644 --- a/wolfcrypt/src/port/arm/armv8-aes.c +++ b/wolfcrypt/src/port/arm/armv8-aes.c @@ -31,6 +31,7 @@ #endif #include +#include #if !defined(NO_AES) && defined(WOLFSSL_ARMASM) @@ -41,7 +42,6 @@ #ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO #include -#include #include #ifdef NO_INLINE #include @@ -5467,7 +5467,6 @@ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) #else /* !WOLFSSL_ARMASM_NO_HW_CRYPTO */ #include -#include #include #ifdef NO_INLINE #include diff --git a/wolfcrypt/src/port/arm/armv8-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-curve25519_c.c index 2c9717519..cbfb69b7e 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-curve25519_c.c @@ -23,6 +23,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include /* Generated using (from wolfssl): * cd ../scripts @@ -6312,9 +6313,9 @@ void ge_msub(ge_p1p1* r, const ge_p3* p, const ge_precomp* q) __asm__ __volatile__ ( "stp x29, x30, [sp, #-48]!\n\t" "add x29, sp, #0\n\t" - "str %w[r], [x29, #16]\n\t" - "str %w[p], [x29, #24]\n\t" - "str %w[q], [x29, #32]\n\t" + "str %x[r], [x29, #16]\n\t" + "str %x[p], [x29, #24]\n\t" + "str %x[q], [x29, #32]\n\t" "mov x3, x1\n\t" "add x2, x1, #32\n\t" "add x1, x0, #32\n\t" @@ -6808,9 +6809,9 @@ void ge_add(ge_p1p1* r, const ge_p3* p, const ge_cached* q) __asm__ __volatile__ ( "stp x29, x30, [sp, #-48]!\n\t" "add x29, sp, #0\n\t" - "str %w[r], [x29, #16]\n\t" - "str %w[p], [x29, #24]\n\t" - "str %w[q], [x29, #32]\n\t" + "str %x[r], [x29, #16]\n\t" + "str %x[p], [x29, #24]\n\t" + "str %x[q], [x29, #32]\n\t" "mov x3, x1\n\t" "add x2, x1, #32\n\t" "add x1, x0, #32\n\t" @@ -7430,9 +7431,9 @@ void ge_sub(ge_p1p1* r, const ge_p3* p, const ge_cached* q) __asm__ __volatile__ ( "stp x29, x30, [sp, #-48]!\n\t" "add x29, sp, #0\n\t" - "str %w[r], [x29, #16]\n\t" - "str %w[p], [x29, #24]\n\t" - "str %w[q], [x29, #32]\n\t" + "str %x[r], [x29, #16]\n\t" + "str %x[p], [x29, #24]\n\t" + "str %x[q], [x29, #32]\n\t" "mov x3, x1\n\t" "add x2, x1, #32\n\t" "add x1, x0, #32\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c index 0e6dc056e..3ac7e65eb 100644 --- a/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha3-asm_c.c @@ -23,6 +23,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include /* Generated using (from wolfssl): * cd ../scripts diff --git a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c index 62f2ecbea..35053198f 100644 --- a/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-sha512-asm_c.c @@ -23,6 +23,7 @@ #include #endif /* HAVE_CONFIG_H */ #include +#include /* Generated using (from wolfssl): * cd ../scripts diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S new file mode 100644 index 000000000..1401be432 --- /dev/null +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -0,0 +1,3126 @@ +/* thumb2-aes-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./aes/aes.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-aes-asm.S + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__thumb__) +#ifndef WOLFSSL_ARMASM_INLINE + .thumb + .syntax unified +#ifndef NO_AES +#ifdef HAVE_AES_DECRYPT + .text + .type L_AES_Thumb2_td_data, %object + .size L_AES_Thumb2_td_data, 1024 + .align 4 +L_AES_Thumb2_td_data: + .word 0x5051f4a7 + .word 0x537e4165 + .word 0xc31a17a4 + .word 0x963a275e + .word 0xcb3bab6b + .word 0xf11f9d45 + .word 0xabacfa58 + .word 0x934be303 + .word 0x552030fa + .word 0xf6ad766d + .word 0x9188cc76 + .word 0x25f5024c + .word 0xfc4fe5d7 + .word 0xd7c52acb + .word 0x80263544 + .word 0x8fb562a3 + .word 0x49deb15a + .word 0x6725ba1b + .word 0x9845ea0e + .word 0xe15dfec0 + .word 0x2c32f75 + .word 0x12814cf0 + .word 0xa38d4697 + .word 0xc66bd3f9 + .word 0xe7038f5f + .word 0x9515929c + .word 0xebbf6d7a + .word 0xda955259 + .word 0x2dd4be83 + .word 0xd3587421 + .word 0x2949e069 + .word 0x448ec9c8 + .word 0x6a75c289 + .word 0x78f48e79 + .word 0x6b99583e + .word 0xdd27b971 + .word 0xb6bee14f + .word 0x17f088ad + .word 0x66c920ac + .word 0xb47dce3a + .word 0x1863df4a + .word 0x82e51a31 + .word 0x60975133 + .word 0x4562537f + .word 0xe0b16477 + .word 0x84bb6bae + .word 0x1cfe81a0 + .word 0x94f9082b + .word 0x58704868 + .word 0x198f45fd + .word 0x8794de6c + .word 0xb7527bf8 + .word 0x23ab73d3 + .word 0xe2724b02 + .word 0x57e31f8f + .word 0x2a6655ab + .word 0x7b2eb28 + .word 0x32fb5c2 + .word 0x9a86c57b + .word 0xa5d33708 + .word 0xf2302887 + .word 0xb223bfa5 + .word 0xba02036a + .word 0x5ced1682 + .word 0x2b8acf1c + .word 0x92a779b4 + .word 0xf0f307f2 + .word 0xa14e69e2 + .word 0xcd65daf4 + .word 0xd50605be + .word 0x1fd13462 + .word 0x8ac4a6fe + .word 0x9d342e53 + .word 0xa0a2f355 + .word 0x32058ae1 + .word 0x75a4f6eb + .word 0x390b83ec + .word 0xaa4060ef + .word 0x65e719f + .word 0x51bd6e10 + .word 0xf93e218a + .word 0x3d96dd06 + .word 0xaedd3e05 + .word 0x464de6bd + .word 0xb591548d + .word 0x571c45d + .word 0x6f0406d4 + .word 0xff605015 + .word 0x241998fb + .word 0x97d6bde9 + .word 0xcc894043 + .word 0x7767d99e + .word 0xbdb0e842 + .word 0x8807898b + .word 0x38e7195b + .word 0xdb79c8ee + .word 0x47a17c0a + .word 0xe97c420f + .word 0xc9f8841e + .word 0x0 + .word 0x83098086 + .word 0x48322bed + .word 0xac1e1170 + .word 0x4e6c5a72 + .word 0xfbfd0eff + .word 0x560f8538 + .word 0x1e3daed5 + .word 0x27362d39 + .word 0x640a0fd9 + .word 0x21685ca6 + .word 0xd19b5b54 + .word 0x3a24362e + .word 0xb10c0a67 + .word 0xf9357e7 + .word 0xd2b4ee96 + .word 0x9e1b9b91 + .word 0x4f80c0c5 + .word 0xa261dc20 + .word 0x695a774b + .word 0x161c121a + .word 0xae293ba + .word 0xe5c0a02a + .word 0x433c22e0 + .word 0x1d121b17 + .word 0xb0e090d + .word 0xadf28bc7 + .word 0xb92db6a8 + .word 0xc8141ea9 + .word 0x8557f119 + .word 0x4caf7507 + .word 0xbbee99dd + .word 0xfda37f60 + .word 0x9ff70126 + .word 0xbc5c72f5 + .word 0xc544663b + .word 0x345bfb7e + .word 0x768b4329 + .word 0xdccb23c6 + .word 0x68b6edfc + .word 0x63b8e4f1 + .word 0xcad731dc + .word 0x10426385 + .word 0x40139722 + .word 0x2084c611 + .word 0x7d854a24 + .word 0xf8d2bb3d + .word 0x11aef932 + .word 0x6dc729a1 + .word 0x4b1d9e2f + .word 0xf3dcb230 + .word 0xec0d8652 + .word 0xd077c1e3 + .word 0x6c2bb316 + .word 0x99a970b9 + .word 0xfa119448 + .word 0x2247e964 + .word 0xc4a8fc8c + .word 0x1aa0f03f + .word 0xd8567d2c + .word 0xef223390 + .word 0xc787494e + .word 0xc1d938d1 + .word 0xfe8ccaa2 + .word 0x3698d40b + .word 0xcfa6f581 + .word 0x28a57ade + .word 0x26dab78e + .word 0xa43fadbf + .word 0xe42c3a9d + .word 0xd507892 + .word 0x9b6a5fcc + .word 0x62547e46 + .word 0xc2f68d13 + .word 0xe890d8b8 + .word 0x5e2e39f7 + .word 0xf582c3af + .word 0xbe9f5d80 + .word 0x7c69d093 + .word 0xa96fd52d + .word 0xb3cf2512 + .word 0x3bc8ac99 + .word 0xa710187d + .word 0x6ee89c63 + .word 0x7bdb3bbb + .word 0x9cd2678 + .word 0xf46e5918 + .word 0x1ec9ab7 + .word 0xa8834f9a + .word 0x65e6956e + .word 0x7eaaffe6 + .word 0x821bccf + .word 0xe6ef15e8 + .word 0xd9bae79b + .word 0xce4a6f36 + .word 0xd4ea9f09 + .word 0xd629b07c + .word 0xaf31a4b2 + .word 0x312a3f23 + .word 0x30c6a594 + .word 0xc035a266 + .word 0x37744ebc + .word 0xa6fc82ca + .word 0xb0e090d0 + .word 0x1533a7d8 + .word 0x4af10498 + .word 0xf741ecda + .word 0xe7fcd50 + .word 0x2f1791f6 + .word 0x8d764dd6 + .word 0x4d43efb0 + .word 0x54ccaa4d + .word 0xdfe49604 + .word 0xe39ed1b5 + .word 0x1b4c6a88 + .word 0xb8c12c1f + .word 0x7f466551 + .word 0x49d5eea + .word 0x5d018c35 + .word 0x73fa8774 + .word 0x2efb0b41 + .word 0x5ab3671d + .word 0x5292dbd2 + .word 0x33e91056 + .word 0x136dd647 + .word 0x8c9ad761 + .word 0x7a37a10c + .word 0x8e59f814 + .word 0x89eb133c + .word 0xeecea927 + .word 0x35b761c9 + .word 0xede11ce5 + .word 0x3c7a47b1 + .word 0x599cd2df + .word 0x3f55f273 + .word 0x791814ce + .word 0xbf73c737 + .word 0xea53f7cd + .word 0x5b5ffdaa + .word 0x14df3d6f + .word 0x867844db + .word 0x81caaff3 + .word 0x3eb968c4 + .word 0x2c382434 + .word 0x5fc2a340 + .word 0x72161dc3 + .word 0xcbce225 + .word 0x8b283c49 + .word 0x41ff0d95 + .word 0x7139a801 + .word 0xde080cb3 + .word 0x9cd8b4e4 + .word 0x906456c1 + .word 0x617bcb84 + .word 0x70d532b6 + .word 0x74486c5c + .word 0x42d0b857 +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .type L_AES_Thumb2_te_data, %object + .size L_AES_Thumb2_te_data, 1024 + .align 4 +L_AES_Thumb2_te_data: + .word 0xa5c66363 + .word 0x84f87c7c + .word 0x99ee7777 + .word 0x8df67b7b + .word 0xdfff2f2 + .word 0xbdd66b6b + .word 0xb1de6f6f + .word 0x5491c5c5 + .word 0x50603030 + .word 0x3020101 + .word 0xa9ce6767 + .word 0x7d562b2b + .word 0x19e7fefe + .word 0x62b5d7d7 + .word 0xe64dabab + .word 0x9aec7676 + .word 0x458fcaca + .word 0x9d1f8282 + .word 0x4089c9c9 + .word 0x87fa7d7d + .word 0x15effafa + .word 0xebb25959 + .word 0xc98e4747 + .word 0xbfbf0f0 + .word 0xec41adad + .word 0x67b3d4d4 + .word 0xfd5fa2a2 + .word 0xea45afaf + .word 0xbf239c9c + .word 0xf753a4a4 + .word 0x96e47272 + .word 0x5b9bc0c0 + .word 0xc275b7b7 + .word 0x1ce1fdfd + .word 0xae3d9393 + .word 0x6a4c2626 + .word 0x5a6c3636 + .word 0x417e3f3f + .word 0x2f5f7f7 + .word 0x4f83cccc + .word 0x5c683434 + .word 0xf451a5a5 + .word 0x34d1e5e5 + .word 0x8f9f1f1 + .word 0x93e27171 + .word 0x73abd8d8 + .word 0x53623131 + .word 0x3f2a1515 + .word 0xc080404 + .word 0x5295c7c7 + .word 0x65462323 + .word 0x5e9dc3c3 + .word 0x28301818 + .word 0xa1379696 + .word 0xf0a0505 + .word 0xb52f9a9a + .word 0x90e0707 + .word 0x36241212 + .word 0x9b1b8080 + .word 0x3ddfe2e2 + .word 0x26cdebeb + .word 0x694e2727 + .word 0xcd7fb2b2 + .word 0x9fea7575 + .word 0x1b120909 + .word 0x9e1d8383 + .word 0x74582c2c + .word 0x2e341a1a + .word 0x2d361b1b + .word 0xb2dc6e6e + .word 0xeeb45a5a + .word 0xfb5ba0a0 + .word 0xf6a45252 + .word 0x4d763b3b + .word 0x61b7d6d6 + .word 0xce7db3b3 + .word 0x7b522929 + .word 0x3edde3e3 + .word 0x715e2f2f + .word 0x97138484 + .word 0xf5a65353 + .word 0x68b9d1d1 + .word 0x0 + .word 0x2cc1eded + .word 0x60402020 + .word 0x1fe3fcfc + .word 0xc879b1b1 + .word 0xedb65b5b + .word 0xbed46a6a + .word 0x468dcbcb + .word 0xd967bebe + .word 0x4b723939 + .word 0xde944a4a + .word 0xd4984c4c + .word 0xe8b05858 + .word 0x4a85cfcf + .word 0x6bbbd0d0 + .word 0x2ac5efef + .word 0xe54faaaa + .word 0x16edfbfb + .word 0xc5864343 + .word 0xd79a4d4d + .word 0x55663333 + .word 0x94118585 + .word 0xcf8a4545 + .word 0x10e9f9f9 + .word 0x6040202 + .word 0x81fe7f7f + .word 0xf0a05050 + .word 0x44783c3c + .word 0xba259f9f + .word 0xe34ba8a8 + .word 0xf3a25151 + .word 0xfe5da3a3 + .word 0xc0804040 + .word 0x8a058f8f + .word 0xad3f9292 + .word 0xbc219d9d + .word 0x48703838 + .word 0x4f1f5f5 + .word 0xdf63bcbc + .word 0xc177b6b6 + .word 0x75afdada + .word 0x63422121 + .word 0x30201010 + .word 0x1ae5ffff + .word 0xefdf3f3 + .word 0x6dbfd2d2 + .word 0x4c81cdcd + .word 0x14180c0c + .word 0x35261313 + .word 0x2fc3ecec + .word 0xe1be5f5f + .word 0xa2359797 + .word 0xcc884444 + .word 0x392e1717 + .word 0x5793c4c4 + .word 0xf255a7a7 + .word 0x82fc7e7e + .word 0x477a3d3d + .word 0xacc86464 + .word 0xe7ba5d5d + .word 0x2b321919 + .word 0x95e67373 + .word 0xa0c06060 + .word 0x98198181 + .word 0xd19e4f4f + .word 0x7fa3dcdc + .word 0x66442222 + .word 0x7e542a2a + .word 0xab3b9090 + .word 0x830b8888 + .word 0xca8c4646 + .word 0x29c7eeee + .word 0xd36bb8b8 + .word 0x3c281414 + .word 0x79a7dede + .word 0xe2bc5e5e + .word 0x1d160b0b + .word 0x76addbdb + .word 0x3bdbe0e0 + .word 0x56643232 + .word 0x4e743a3a + .word 0x1e140a0a + .word 0xdb924949 + .word 0xa0c0606 + .word 0x6c482424 + .word 0xe4b85c5c + .word 0x5d9fc2c2 + .word 0x6ebdd3d3 + .word 0xef43acac + .word 0xa6c46262 + .word 0xa8399191 + .word 0xa4319595 + .word 0x37d3e4e4 + .word 0x8bf27979 + .word 0x32d5e7e7 + .word 0x438bc8c8 + .word 0x596e3737 + .word 0xb7da6d6d + .word 0x8c018d8d + .word 0x64b1d5d5 + .word 0xd29c4e4e + .word 0xe049a9a9 + .word 0xb4d86c6c + .word 0xfaac5656 + .word 0x7f3f4f4 + .word 0x25cfeaea + .word 0xafca6565 + .word 0x8ef47a7a + .word 0xe947aeae + .word 0x18100808 + .word 0xd56fbaba + .word 0x88f07878 + .word 0x6f4a2525 + .word 0x725c2e2e + .word 0x24381c1c + .word 0xf157a6a6 + .word 0xc773b4b4 + .word 0x5197c6c6 + .word 0x23cbe8e8 + .word 0x7ca1dddd + .word 0x9ce87474 + .word 0x213e1f1f + .word 0xdd964b4b + .word 0xdc61bdbd + .word 0x860d8b8b + .word 0x850f8a8a + .word 0x90e07070 + .word 0x427c3e3e + .word 0xc471b5b5 + .word 0xaacc6666 + .word 0xd8904848 + .word 0x5060303 + .word 0x1f7f6f6 + .word 0x121c0e0e + .word 0xa3c26161 + .word 0x5f6a3535 + .word 0xf9ae5757 + .word 0xd069b9b9 + .word 0x91178686 + .word 0x5899c1c1 + .word 0x273a1d1d + .word 0xb9279e9e + .word 0x38d9e1e1 + .word 0x13ebf8f8 + .word 0xb32b9898 + .word 0x33221111 + .word 0xbbd26969 + .word 0x70a9d9d9 + .word 0x89078e8e + .word 0xa7339494 + .word 0xb62d9b9b + .word 0x223c1e1e + .word 0x92158787 + .word 0x20c9e9e9 + .word 0x4987cece + .word 0xffaa5555 + .word 0x78502828 + .word 0x7aa5dfdf + .word 0x8f038c8c + .word 0xf859a1a1 + .word 0x80098989 + .word 0x171a0d0d + .word 0xda65bfbf + .word 0x31d7e6e6 + .word 0xc6844242 + .word 0xb8d06868 + .word 0xc3824141 + .word 0xb0299999 + .word 0x775a2d2d + .word 0x111e0f0f + .word 0xcb7bb0b0 + .word 0xfca85454 + .word 0xd66dbbbb + .word 0x3a2c1616 +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT + .text + .type L_AES_Thumb2_td, %object + .size L_AES_Thumb2_td, 12 + .align 4 +L_AES_Thumb2_td: + .word L_AES_Thumb2_td_data +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .type L_AES_Thumb2_te, %object + .size L_AES_Thumb2_te, 12 + .align 4 +L_AES_Thumb2_te: + .word L_AES_Thumb2_te_data +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT + .text + .align 4 + .globl AES_invert_key + .type AES_invert_key, %function +AES_invert_key: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + LDR r12, L_AES_Thumb2_te + LDR lr, L_AES_Thumb2_td + ADD r10, r0, r1, LSL #4 + MOV r11, r1 +L_AES_invert_key_loop: + LDM r0, {r2, r3, r4, r5} + LDM r10, {r6, r7, r8, r9} + STM r10, {r2, r3, r4, r5} + STM r0!, {r6, r7, r8, r9} + SUBS r11, r11, #0x2 + SUB r10, r10, #0x10 + BNE L_AES_invert_key_loop + SUB r0, r0, r1, LSL #3 + ADD r0, r0, #0x10 + SUB r11, r1, #0x1 +L_AES_invert_key_mix_loop: + LDM r0, {r2, r3, r4, r5} + UBFX r6, r2, #0, #8 + UBFX r7, r2, #8, #8 + UBFX r8, r2, #16, #8 + LSR r9, r2, #24 + LDRB r6, [r12, r6, LSL #2] + LDRB r7, [r12, r7, LSL #2] + LDRB r8, [r12, r8, LSL #2] + LDRB r9, [r12, r9, LSL #2] + LDR r6, [lr, r6, LSL #2] + LDR r7, [lr, r7, LSL #2] + LDR r8, [lr, r8, LSL #2] + LDR r9, [lr, r9, LSL #2] + EOR r8, r8, r6, ROR #16 + EOR r8, r8, r7, ROR #8 + EOR r8, r8, r9, ROR #24 + STR r8, [r0], #4 + UBFX r6, r3, #0, #8 + UBFX r7, r3, #8, #8 + UBFX r8, r3, #16, #8 + LSR r9, r3, #24 + LDRB r6, [r12, r6, LSL #2] + LDRB r7, [r12, r7, LSL #2] + LDRB r8, [r12, r8, LSL #2] + LDRB r9, [r12, r9, LSL #2] + LDR r6, [lr, r6, LSL #2] + LDR r7, [lr, r7, LSL #2] + LDR r8, [lr, r8, LSL #2] + LDR r9, [lr, r9, LSL #2] + EOR r8, r8, r6, ROR #16 + EOR r8, r8, r7, ROR #8 + EOR r8, r8, r9, ROR #24 + STR r8, [r0], #4 + UBFX r6, r4, #0, #8 + UBFX r7, r4, #8, #8 + UBFX r8, r4, #16, #8 + LSR r9, r4, #24 + LDRB r6, [r12, r6, LSL #2] + LDRB r7, [r12, r7, LSL #2] + LDRB r8, [r12, r8, LSL #2] + LDRB r9, [r12, r9, LSL #2] + LDR r6, [lr, r6, LSL #2] + LDR r7, [lr, r7, LSL #2] + LDR r8, [lr, r8, LSL #2] + LDR r9, [lr, r9, LSL #2] + EOR r8, r8, r6, ROR #16 + EOR r8, r8, r7, ROR #8 + EOR r8, r8, r9, ROR #24 + STR r8, [r0], #4 + UBFX r6, r5, #0, #8 + UBFX r7, r5, #8, #8 + UBFX r8, r5, #16, #8 + LSR r9, r5, #24 + LDRB r6, [r12, r6, LSL #2] + LDRB r7, [r12, r7, LSL #2] + LDRB r8, [r12, r8, LSL #2] + LDRB r9, [r12, r9, LSL #2] + LDR r6, [lr, r6, LSL #2] + LDR r7, [lr, r7, LSL #2] + LDR r8, [lr, r8, LSL #2] + LDR r9, [lr, r9, LSL #2] + EOR r8, r8, r6, ROR #16 + EOR r8, r8, r7, ROR #8 + EOR r8, r8, r9, ROR #24 + STR r8, [r0], #4 + SUBS r11, r11, #0x1 + BNE L_AES_invert_key_mix_loop + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 165 + .size AES_invert_key,.-AES_invert_key +#endif /* HAVE_AES_DECRYPT */ + .text + .type L_AES_Thumb2_rcon, %object + .size L_AES_Thumb2_rcon, 40 + .align 4 +L_AES_Thumb2_rcon: + .word 0x1000000 + .word 0x2000000 + .word 0x4000000 + .word 0x8000000 + .word 0x10000000 + .word 0x20000000 + .word 0x40000000 + .word 0x80000000 + .word 0x1b000000 + .word 0x36000000 + .text + .align 4 + .globl AES_set_encrypt_key + .type AES_set_encrypt_key, %function +AES_set_encrypt_key: + PUSH {r4, r5, r6, r7, r8, lr} + LDR r8, L_AES_Thumb2_te + ADR lr, L_AES_Thumb2_rcon + CMP r1, #0x80 + BEQ L_AES_set_encrypt_key_start_128 + CMP r1, #0xc0 + BEQ L_AES_set_encrypt_key_start_192 + LDRD r4, r5, [r0] + LDRD r6, r7, [r0, #8] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r2!, {r4, r5, r6, r7} + LDRD r4, r5, [r0, #16] + LDRD r6, r7, [r0, #24] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r2, {r4, r5, r6, r7} + SUB r2, r2, #0x10 + MOV r12, #0x6 +L_AES_set_encrypt_key_loop_256: + UBFX r4, r7, #0, #8 + UBFX r5, r7, #8, #8 + UBFX r6, r7, #16, #8 + LSR r7, r7, #24 + LDRB r4, [r8, r4, LSL #2] + LDRB r5, [r8, r5, LSL #2] + LDRB r6, [r8, r6, LSL #2] + LDRB r7, [r8, r7, LSL #2] + EOR r3, r7, r4, LSL #8 + EOR r3, r3, r5, LSL #16 + EOR r3, r3, r6, LSL #24 + LDM r2!, {r4, r5, r6, r7} + EOR r4, r4, r3 + LDM lr!, {r3} + EOR r4, r4, r3 + EOR r5, r5, r4 + EOR r6, r6, r5 + EOR r7, r7, r6 + ADD r2, r2, #0x10 + STM r2, {r4, r5, r6, r7} + SUB r2, r2, #0x10 + MOV r3, r7 + UBFX r4, r3, #8, #8 + UBFX r5, r3, #16, #8 + LSR r6, r3, #24 + UBFX r3, r3, #0, #8 + LDRB r4, [r8, r4, LSL #2] + LDRB r6, [r8, r6, LSL #2] + LDRB r5, [r8, r5, LSL #2] + LDRB r3, [r8, r3, LSL #2] + EOR r3, r3, r4, LSL #8 + EOR r3, r3, r5, LSL #16 + EOR r3, r3, r6, LSL #24 + LDM r2!, {r4, r5, r6, r7} + EOR r4, r4, r3 + EOR r5, r5, r4 + EOR r6, r6, r5 + EOR r7, r7, r6 + ADD r2, r2, #0x10 + STM r2, {r4, r5, r6, r7} + SUB r2, r2, #0x10 + SUBS r12, r12, #0x1 + BNE L_AES_set_encrypt_key_loop_256 + UBFX r4, r7, #0, #8 + UBFX r5, r7, #8, #8 + UBFX r6, r7, #16, #8 + LSR r7, r7, #24 + LDRB r4, [r8, r4, LSL #2] + LDRB r5, [r8, r5, LSL #2] + LDRB r6, [r8, r6, LSL #2] + LDRB r7, [r8, r7, LSL #2] + EOR r3, r7, r4, LSL #8 + EOR r3, r3, r5, LSL #16 + EOR r3, r3, r6, LSL #24 + LDM r2!, {r4, r5, r6, r7} + EOR r4, r4, r3 + LDM lr!, {r3} + EOR r4, r4, r3 + EOR r5, r5, r4 + EOR r6, r6, r5 + EOR r7, r7, r6 + ADD r2, r2, #0x10 + STM r2, {r4, r5, r6, r7} + SUB r2, r2, #0x10 + B L_AES_set_encrypt_key_end +L_AES_set_encrypt_key_start_192: + LDRD r4, r5, [r0] + LDRD r6, r7, [r0, #8] + LDRD r0, r1, [r0, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + REV r0, r0 + REV r1, r1 + STM r2, {r4, r5, r6, r7} + STRD r0, r1, [r2, #16] + MOV r7, r1 + MOV r12, #0x7 +L_AES_set_encrypt_key_loop_192: + UBFX r0, r7, #0, #8 + UBFX r1, r7, #8, #8 + UBFX r4, r7, #16, #8 + LSR r7, r7, #24 + LDRB r0, [r8, r0, LSL #2] + LDRB r1, [r8, r1, LSL #2] + LDRB r4, [r8, r4, LSL #2] + LDRB r7, [r8, r7, LSL #2] + EOR r3, r7, r0, LSL #8 + EOR r3, r3, r1, LSL #16 + EOR r3, r3, r4, LSL #24 + LDM r2!, {r0, r1, r4, r5, r6, r7} + EOR r0, r0, r3 + LDM lr!, {r3} + EOR r0, r0, r3 + EOR r1, r1, r0 + EOR r4, r4, r1 + EOR r5, r5, r4 + EOR r6, r6, r5 + EOR r7, r7, r6 + STM r2, {r0, r1, r4, r5, r6, r7} + SUBS r12, r12, #0x1 + BNE L_AES_set_encrypt_key_loop_192 + UBFX r0, r7, #0, #8 + UBFX r1, r7, #8, #8 + UBFX r4, r7, #16, #8 + LSR r7, r7, #24 + LDRB r0, [r8, r0, LSL #2] + LDRB r1, [r8, r1, LSL #2] + LDRB r4, [r8, r4, LSL #2] + LDRB r7, [r8, r7, LSL #2] + EOR r3, r7, r0, LSL #8 + EOR r3, r3, r1, LSL #16 + EOR r3, r3, r4, LSL #24 + LDM r2!, {r0, r1, r4, r5, r6, r7} + EOR r0, r0, r3 + LDM lr!, {r3} + EOR r0, r0, r3 + EOR r1, r1, r0 + EOR r4, r4, r1 + EOR r5, r5, r4 + STM r2, {r0, r1, r4, r5} + B L_AES_set_encrypt_key_end +L_AES_set_encrypt_key_start_128: + LDRD r4, r5, [r0] + LDRD r6, r7, [r0, #8] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r2, {r4, r5, r6, r7} + MOV r12, #0xa +L_AES_set_encrypt_key_loop_128: + UBFX r4, r7, #0, #8 + UBFX r5, r7, #8, #8 + UBFX r6, r7, #16, #8 + LSR r7, r7, #24 + LDRB r4, [r8, r4, LSL #2] + LDRB r5, [r8, r5, LSL #2] + LDRB r6, [r8, r6, LSL #2] + LDRB r7, [r8, r7, LSL #2] + EOR r3, r7, r4, LSL #8 + EOR r3, r3, r5, LSL #16 + EOR r3, r3, r6, LSL #24 + LDM r2!, {r4, r5, r6, r7} + EOR r4, r4, r3 + LDM lr!, {r3} + EOR r4, r4, r3 + EOR r5, r5, r4 + EOR r6, r6, r5 + EOR r7, r7, r6 + STM r2, {r4, r5, r6, r7} + SUBS r12, r12, #0x1 + BNE L_AES_set_encrypt_key_loop_128 +L_AES_set_encrypt_key_end: + POP {r4, r5, r6, r7, r8, pc} + # Cycle Count = 327 + .size AES_set_encrypt_key,.-AES_set_encrypt_key + .text + .align 4 + .globl AES_encrypt_block + .type AES_encrypt_block, %function +AES_encrypt_block: + PUSH {lr} +L_AES_encrypt_block_nr: + UBFX r8, r5, #16, #8 + LSR r11, r4, #24 + UBFX lr, r6, #8, #8 + UBFX r2, r7, #0, #8 + LDR r8, [r0, r8, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r9, r6, #16, #8 + EOR r8, r8, r11, ROR #24 + LSR r11, r5, #24 + EOR r8, r8, lr, ROR #8 + UBFX lr, r7, #8, #8 + EOR r8, r8, r2, ROR #16 + UBFX r2, r4, #0, #8 + LDR r9, [r0, r9, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r10, r7, #16, #8 + EOR r9, r9, r11, ROR #24 + LSR r11, r6, #24 + EOR r9, r9, lr, ROR #8 + UBFX lr, r4, #8, #8 + EOR r9, r9, r2, ROR #16 + UBFX r2, r5, #0, #8 + LDR r10, [r0, r10, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r6, r6, #0, #8 + EOR r10, r10, r11, ROR #24 + UBFX r11, r4, #16, #8 + EOR r10, r10, lr, ROR #8 + LSR lr, r7, #24 + EOR r10, r10, r2, ROR #16 + UBFX r2, r5, #8, #8 + LDR r6, [r0, r6, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR r2, [r0, r2, LSL #2] + EOR lr, lr, r6, ROR #24 + LDM r3!, {r4, r5, r6, r7} + EOR r11, r11, lr, ROR #24 + EOR r11, r11, r2, ROR #8 + # XOR in Key Schedule + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + UBFX r4, r9, #16, #8 + LSR r7, r8, #24 + UBFX lr, r10, #8, #8 + UBFX r2, r11, #0, #8 + LDR r4, [r0, r4, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r5, r10, #16, #8 + EOR r4, r4, r7, ROR #24 + LSR r7, r9, #24 + EOR r4, r4, lr, ROR #8 + UBFX lr, r11, #8, #8 + EOR r4, r4, r2, ROR #16 + UBFX r2, r8, #0, #8 + LDR r5, [r0, r5, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r6, r11, #16, #8 + EOR r5, r5, r7, ROR #24 + LSR r7, r10, #24 + EOR r5, r5, lr, ROR #8 + UBFX lr, r8, #8, #8 + EOR r5, r5, r2, ROR #16 + UBFX r2, r9, #0, #8 + LDR r6, [r0, r6, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r10, r10, #0, #8 + EOR r6, r6, r7, ROR #24 + UBFX r7, r8, #16, #8 + EOR r6, r6, lr, ROR #8 + LSR lr, r11, #24 + EOR r6, r6, r2, ROR #16 + UBFX r2, r9, #8, #8 + LDR r10, [r0, r10, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR r2, [r0, r2, LSL #2] + EOR lr, lr, r10, ROR #24 + LDM r3!, {r8, r9, r10, r11} + EOR r7, r7, lr, ROR #24 + EOR r7, r7, r2, ROR #8 + # XOR in Key Schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + SUBS r1, r1, #0x1 + BNE L_AES_encrypt_block_nr + UBFX r8, r5, #16, #8 + LSR r11, r4, #24 + UBFX lr, r6, #8, #8 + UBFX r2, r7, #0, #8 + LDR r8, [r0, r8, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r9, r6, #16, #8 + EOR r8, r8, r11, ROR #24 + LSR r11, r5, #24 + EOR r8, r8, lr, ROR #8 + UBFX lr, r7, #8, #8 + EOR r8, r8, r2, ROR #16 + UBFX r2, r4, #0, #8 + LDR r9, [r0, r9, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r10, r7, #16, #8 + EOR r9, r9, r11, ROR #24 + LSR r11, r6, #24 + EOR r9, r9, lr, ROR #8 + UBFX lr, r4, #8, #8 + EOR r9, r9, r2, ROR #16 + UBFX r2, r5, #0, #8 + LDR r10, [r0, r10, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r6, r6, #0, #8 + EOR r10, r10, r11, ROR #24 + UBFX r11, r4, #16, #8 + EOR r10, r10, lr, ROR #8 + LSR lr, r7, #24 + EOR r10, r10, r2, ROR #16 + UBFX r2, r5, #8, #8 + LDR r6, [r0, r6, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR r2, [r0, r2, LSL #2] + EOR lr, lr, r6, ROR #24 + LDM r3!, {r4, r5, r6, r7} + EOR r11, r11, lr, ROR #24 + EOR r11, r11, r2, ROR #8 + # XOR in Key Schedule + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + UBFX r4, r11, #0, #8 + UBFX r7, r10, #8, #8 + UBFX lr, r9, #16, #8 + LSR r2, r8, #24 + LDRB r4, [r0, r4, LSL #2] + LDRB r7, [r0, r7, LSL #2] + LDRB lr, [r0, lr, LSL #2] + LDRB r2, [r0, r2, LSL #2] + UBFX r5, r8, #0, #8 + EOR r4, r4, r7, LSL #8 + UBFX r7, r11, #8, #8 + EOR r4, r4, lr, LSL #16 + UBFX lr, r10, #16, #8 + EOR r4, r4, r2, LSL #24 + LSR r2, r9, #24 + LDRB r5, [r0, r5, LSL #2] + LDRB r7, [r0, r7, LSL #2] + LDRB lr, [r0, lr, LSL #2] + LDRB r2, [r0, r2, LSL #2] + UBFX r6, r9, #0, #8 + EOR r5, r5, r7, LSL #8 + UBFX r7, r8, #8, #8 + EOR r5, r5, lr, LSL #16 + UBFX lr, r11, #16, #8 + EOR r5, r5, r2, LSL #24 + LSR r2, r10, #24 + LDRB r6, [r0, r6, LSL #2] + LDRB r7, [r0, r7, LSL #2] + LDRB lr, [r0, lr, LSL #2] + LDRB r2, [r0, r2, LSL #2] + LSR r11, r11, #24 + EOR r6, r6, r7, LSL #8 + UBFX r7, r10, #0, #8 + EOR r6, r6, lr, LSL #16 + UBFX lr, r9, #8, #8 + EOR r6, r6, r2, LSL #24 + UBFX r2, r8, #16, #8 + LDRB r11, [r0, r11, LSL #2] + LDRB r7, [r0, r7, LSL #2] + LDRB lr, [r0, lr, LSL #2] + LDRB r2, [r0, r2, LSL #2] + EOR lr, lr, r11, LSL #16 + LDM r3, {r8, r9, r10, r11} + EOR r7, r7, lr, LSL #8 + EOR r7, r7, r2, LSL #16 + # XOR in Key Schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + POP {pc} + # Cycle Count = 285 + .size AES_encrypt_block,.-AES_encrypt_block +#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .type L_AES_Thumb2_te_ecb, %object + .size L_AES_Thumb2_te_ecb, 12 + .align 4 +L_AES_Thumb2_te_ecb: + .word L_AES_Thumb2_te_data +#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .align 4 + .globl AES_ECB_encrypt + .type AES_ECB_encrypt, %function +AES_ECB_encrypt: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + MOV lr, r0 + LDR r0, L_AES_Thumb2_te_ecb + LDR r12, [sp, #36] + PUSH {r3} + CMP r12, #0xa + BEQ L_AES_ECB_encrypt_start_block_128 + CMP r12, #0xc + BEQ L_AES_ECB_encrypt_start_block_192 +L_AES_ECB_encrypt_loop_block_256: + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + PUSH {r1, r2, lr} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_ECB_encrypt_loop_block_256 + B L_AES_ECB_encrypt_end +L_AES_ECB_encrypt_start_block_192: +L_AES_ECB_encrypt_loop_block_192: + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + PUSH {r1, r2, lr} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_ECB_encrypt_loop_block_192 + B L_AES_ECB_encrypt_end +L_AES_ECB_encrypt_start_block_128: +L_AES_ECB_encrypt_loop_block_128: + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + PUSH {r1, r2, lr} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_ECB_encrypt_loop_block_128 +L_AES_ECB_encrypt_end: + POP {r3} + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 212 + .size AES_ECB_encrypt,.-AES_ECB_encrypt +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC + .text + .align 4 + .globl AES_CBC_encrypt + .type AES_CBC_encrypt, %function +AES_CBC_encrypt: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + LDR r8, [sp, #36] + LDR r9, [sp, #40] + MOV lr, r0 + LDR r0, L_AES_Thumb2_te_ecb + LDM r9, {r4, r5, r6, r7} + PUSH {r3, r9} + CMP r8, #0xa + BEQ L_AES_CBC_encrypt_start_block_128 + CMP r8, #0xc + BEQ L_AES_CBC_encrypt_start_block_192 +L_AES_CBC_encrypt_loop_block_256: + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + PUSH {r1, r2, lr} + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CBC_encrypt_loop_block_256 + B L_AES_CBC_encrypt_end +L_AES_CBC_encrypt_start_block_192: +L_AES_CBC_encrypt_loop_block_192: + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + PUSH {r1, r2, lr} + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CBC_encrypt_loop_block_192 + B L_AES_CBC_encrypt_end +L_AES_CBC_encrypt_start_block_128: +L_AES_CBC_encrypt_loop_block_128: + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + PUSH {r1, r2, lr} + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CBC_encrypt_loop_block_128 +L_AES_CBC_encrypt_end: + POP {r3, r9} + STM r9, {r4, r5, r6, r7} + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 238 + .size AES_CBC_encrypt,.-AES_CBC_encrypt +#endif /* HAVE_AES_CBC */ +#ifdef WOLFSSL_AES_COUNTER + .text + .align 4 + .globl AES_CTR_encrypt + .type AES_CTR_encrypt, %function +AES_CTR_encrypt: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + LDR r12, [sp, #36] + LDR r8, [sp, #40] + MOV lr, r0 + LDR r0, L_AES_Thumb2_te_ecb + LDM r8, {r4, r5, r6, r7} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r8, {r4, r5, r6, r7} + PUSH {r3, r8} + CMP r12, #0xa + BEQ L_AES_CTR_encrypt_start_block_128 + CMP r12, #0xc + BEQ L_AES_CTR_encrypt_start_block_192 +L_AES_CTR_encrypt_loop_block_256: + PUSH {r1, r2, lr} + LDR lr, [sp, #16] + ADDS r11, r7, #0x1 + ADCS r10, r6, #0x0 + ADCS r9, r5, #0x0 + ADC r8, r4, #0x0 + STM lr, {r8, r9, r10, r11} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + LDR r8, [sp, #4] + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + LDM r8, {r4, r5, r6, r7} + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CTR_encrypt_loop_block_256 + B L_AES_CTR_encrypt_end +L_AES_CTR_encrypt_start_block_192: +L_AES_CTR_encrypt_loop_block_192: + PUSH {r1, r2, lr} + LDR lr, [sp, #16] + ADDS r11, r7, #0x1 + ADCS r10, r6, #0x0 + ADCS r9, r5, #0x0 + ADC r8, r4, #0x0 + STM lr, {r8, r9, r10, r11} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + LDR r8, [sp, #4] + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + LDM r8, {r4, r5, r6, r7} + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CTR_encrypt_loop_block_192 + B L_AES_CTR_encrypt_end +L_AES_CTR_encrypt_start_block_128: +L_AES_CTR_encrypt_loop_block_128: + PUSH {r1, r2, lr} + LDR lr, [sp, #16] + ADDS r11, r7, #0x1 + ADCS r10, r6, #0x0 + ADCS r9, r5, #0x0 + ADC r8, r4, #0x0 + STM lr, {r8, r9, r10, r11} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + LDR r8, [sp, #4] + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + LDM r8, {r4, r5, r6, r7} + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CTR_encrypt_loop_block_128 +L_AES_CTR_encrypt_end: + POP {r3, r8} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r8, {r4, r5, r6, r7} + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 293 + .size AES_CTR_encrypt,.-AES_CTR_encrypt +#endif /* WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) + .text + .align 4 + .globl AES_decrypt_block + .type AES_decrypt_block, %function +AES_decrypt_block: + PUSH {lr} +L_AES_decrypt_block_nr: + UBFX r8, r7, #16, #8 + LSR r11, r4, #24 + UBFX lr, r6, #8, #8 + UBFX r2, r5, #0, #8 + LDR r8, [r0, r8, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r9, r4, #16, #8 + EOR r8, r8, r11, ROR #24 + LSR r11, r5, #24 + EOR r8, r8, lr, ROR #8 + UBFX lr, r7, #8, #8 + EOR r8, r8, r2, ROR #16 + UBFX r2, r6, #0, #8 + LDR r9, [r0, r9, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r10, r5, #16, #8 + EOR r9, r9, r11, ROR #24 + LSR r11, r6, #24 + EOR r9, r9, lr, ROR #8 + UBFX lr, r4, #8, #8 + EOR r9, r9, r2, ROR #16 + UBFX r2, r7, #0, #8 + LDR r10, [r0, r10, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r4, r4, #0, #8 + EOR r10, r10, r11, ROR #24 + UBFX r11, r6, #16, #8 + EOR r10, r10, lr, ROR #8 + LSR lr, r7, #24 + EOR r10, r10, r2, ROR #16 + UBFX r2, r5, #8, #8 + LDR r4, [r0, r4, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR r2, [r0, r2, LSL #2] + EOR lr, lr, r4, ROR #24 + LDM r3!, {r4, r5, r6, r7} + EOR r11, r11, r2, ROR #8 + EOR r11, r11, lr, ROR #24 + # XOR in Key Schedule + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + UBFX r4, r11, #16, #8 + LSR r7, r8, #24 + UBFX lr, r10, #8, #8 + UBFX r2, r9, #0, #8 + LDR r4, [r0, r4, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r5, r8, #16, #8 + EOR r4, r4, r7, ROR #24 + LSR r7, r9, #24 + EOR r4, r4, lr, ROR #8 + UBFX lr, r11, #8, #8 + EOR r4, r4, r2, ROR #16 + UBFX r2, r10, #0, #8 + LDR r5, [r0, r5, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r6, r9, #16, #8 + EOR r5, r5, r7, ROR #24 + LSR r7, r10, #24 + EOR r5, r5, lr, ROR #8 + UBFX lr, r8, #8, #8 + EOR r5, r5, r2, ROR #16 + UBFX r2, r11, #0, #8 + LDR r6, [r0, r6, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r8, r8, #0, #8 + EOR r6, r6, r7, ROR #24 + UBFX r7, r10, #16, #8 + EOR r6, r6, lr, ROR #8 + LSR lr, r11, #24 + EOR r6, r6, r2, ROR #16 + UBFX r2, r9, #8, #8 + LDR r8, [r0, r8, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r7, [r0, r7, LSL #2] + LDR r2, [r0, r2, LSL #2] + EOR lr, lr, r8, ROR #24 + LDM r3!, {r8, r9, r10, r11} + EOR r7, r7, r2, ROR #8 + EOR r7, r7, lr, ROR #24 + # XOR in Key Schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + SUBS r1, r1, #0x1 + BNE L_AES_decrypt_block_nr + UBFX r8, r7, #16, #8 + LSR r11, r4, #24 + UBFX lr, r6, #8, #8 + UBFX r2, r5, #0, #8 + LDR r8, [r0, r8, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r9, r4, #16, #8 + EOR r8, r8, r11, ROR #24 + LSR r11, r5, #24 + EOR r8, r8, lr, ROR #8 + UBFX lr, r7, #8, #8 + EOR r8, r8, r2, ROR #16 + UBFX r2, r6, #0, #8 + LDR r9, [r0, r9, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r10, r5, #16, #8 + EOR r9, r9, r11, ROR #24 + LSR r11, r6, #24 + EOR r9, r9, lr, ROR #8 + UBFX lr, r4, #8, #8 + EOR r9, r9, r2, ROR #16 + UBFX r2, r7, #0, #8 + LDR r10, [r0, r10, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r2, [r0, r2, LSL #2] + UBFX r4, r4, #0, #8 + EOR r10, r10, r11, ROR #24 + UBFX r11, r6, #16, #8 + EOR r10, r10, lr, ROR #8 + LSR lr, r7, #24 + EOR r10, r10, r2, ROR #16 + UBFX r2, r5, #8, #8 + LDR r4, [r0, r4, LSL #2] + LDR lr, [r0, lr, LSL #2] + LDR r11, [r0, r11, LSL #2] + LDR r2, [r0, r2, LSL #2] + EOR lr, lr, r4, ROR #24 + LDM r3!, {r4, r5, r6, r7} + EOR r11, r11, r2, ROR #8 + EOR r11, r11, lr, ROR #24 + # XOR in Key Schedule + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + UBFX r4, r9, #0, #8 + UBFX r7, r10, #8, #8 + UBFX lr, r11, #16, #8 + LSR r2, r8, #24 + LDRB r4, [r12, r4] + LDRB r7, [r12, r7] + LDRB lr, [r12, lr] + LDRB r2, [r12, r2] + UBFX r5, r10, #0, #8 + EOR r4, r4, r7, LSL #8 + UBFX r7, r11, #8, #8 + EOR r4, r4, lr, LSL #16 + UBFX lr, r8, #16, #8 + EOR r4, r4, r2, LSL #24 + LSR r2, r9, #24 + LDRB r7, [r12, r7] + LDRB r2, [r12, r2] + LDRB r5, [r12, r5] + LDRB lr, [r12, lr] + UBFX r6, r11, #0, #8 + EOR r5, r5, r7, LSL #8 + UBFX r7, r8, #8, #8 + EOR r5, r5, lr, LSL #16 + UBFX lr, r9, #16, #8 + EOR r5, r5, r2, LSL #24 + LSR r2, r10, #24 + LDRB r7, [r12, r7] + LDRB r2, [r12, r2] + LDRB r6, [r12, r6] + LDRB lr, [r12, lr] + LSR r11, r11, #24 + EOR r6, r6, r7, LSL #8 + UBFX r7, r8, #0, #8 + EOR r6, r6, lr, LSL #16 + UBFX lr, r9, #8, #8 + EOR r6, r6, r2, LSL #24 + UBFX r2, r10, #16, #8 + LDRB r11, [r12, r11] + LDRB lr, [r12, lr] + LDRB r7, [r12, r7] + LDRB r2, [r12, r2] + EOR lr, lr, r11, LSL #16 + LDM r3, {r8, r9, r10, r11} + EOR r7, r7, lr, LSL #8 + EOR r7, r7, r2, LSL #16 + # XOR in Key Schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + POP {pc} + # Cycle Count = 285 + .size AES_decrypt_block,.-AES_decrypt_block + .text + .type L_AES_Thumb2_td_ecb, %object + .size L_AES_Thumb2_td_ecb, 12 + .align 4 +L_AES_Thumb2_td_ecb: + .word L_AES_Thumb2_td_data + .text + .type L_AES_Thumb2_td4, %object + .size L_AES_Thumb2_td4, 256 + .align 4 +L_AES_Thumb2_td4: + .byte 0x52 + .byte 0x9 + .byte 0x6a + .byte 0xd5 + .byte 0x30 + .byte 0x36 + .byte 0xa5 + .byte 0x38 + .byte 0xbf + .byte 0x40 + .byte 0xa3 + .byte 0x9e + .byte 0x81 + .byte 0xf3 + .byte 0xd7 + .byte 0xfb + .byte 0x7c + .byte 0xe3 + .byte 0x39 + .byte 0x82 + .byte 0x9b + .byte 0x2f + .byte 0xff + .byte 0x87 + .byte 0x34 + .byte 0x8e + .byte 0x43 + .byte 0x44 + .byte 0xc4 + .byte 0xde + .byte 0xe9 + .byte 0xcb + .byte 0x54 + .byte 0x7b + .byte 0x94 + .byte 0x32 + .byte 0xa6 + .byte 0xc2 + .byte 0x23 + .byte 0x3d + .byte 0xee + .byte 0x4c + .byte 0x95 + .byte 0xb + .byte 0x42 + .byte 0xfa + .byte 0xc3 + .byte 0x4e + .byte 0x8 + .byte 0x2e + .byte 0xa1 + .byte 0x66 + .byte 0x28 + .byte 0xd9 + .byte 0x24 + .byte 0xb2 + .byte 0x76 + .byte 0x5b + .byte 0xa2 + .byte 0x49 + .byte 0x6d + .byte 0x8b + .byte 0xd1 + .byte 0x25 + .byte 0x72 + .byte 0xf8 + .byte 0xf6 + .byte 0x64 + .byte 0x86 + .byte 0x68 + .byte 0x98 + .byte 0x16 + .byte 0xd4 + .byte 0xa4 + .byte 0x5c + .byte 0xcc + .byte 0x5d + .byte 0x65 + .byte 0xb6 + .byte 0x92 + .byte 0x6c + .byte 0x70 + .byte 0x48 + .byte 0x50 + .byte 0xfd + .byte 0xed + .byte 0xb9 + .byte 0xda + .byte 0x5e + .byte 0x15 + .byte 0x46 + .byte 0x57 + .byte 0xa7 + .byte 0x8d + .byte 0x9d + .byte 0x84 + .byte 0x90 + .byte 0xd8 + .byte 0xab + .byte 0x0 + .byte 0x8c + .byte 0xbc + .byte 0xd3 + .byte 0xa + .byte 0xf7 + .byte 0xe4 + .byte 0x58 + .byte 0x5 + .byte 0xb8 + .byte 0xb3 + .byte 0x45 + .byte 0x6 + .byte 0xd0 + .byte 0x2c + .byte 0x1e + .byte 0x8f + .byte 0xca + .byte 0x3f + .byte 0xf + .byte 0x2 + .byte 0xc1 + .byte 0xaf + .byte 0xbd + .byte 0x3 + .byte 0x1 + .byte 0x13 + .byte 0x8a + .byte 0x6b + .byte 0x3a + .byte 0x91 + .byte 0x11 + .byte 0x41 + .byte 0x4f + .byte 0x67 + .byte 0xdc + .byte 0xea + .byte 0x97 + .byte 0xf2 + .byte 0xcf + .byte 0xce + .byte 0xf0 + .byte 0xb4 + .byte 0xe6 + .byte 0x73 + .byte 0x96 + .byte 0xac + .byte 0x74 + .byte 0x22 + .byte 0xe7 + .byte 0xad + .byte 0x35 + .byte 0x85 + .byte 0xe2 + .byte 0xf9 + .byte 0x37 + .byte 0xe8 + .byte 0x1c + .byte 0x75 + .byte 0xdf + .byte 0x6e + .byte 0x47 + .byte 0xf1 + .byte 0x1a + .byte 0x71 + .byte 0x1d + .byte 0x29 + .byte 0xc5 + .byte 0x89 + .byte 0x6f + .byte 0xb7 + .byte 0x62 + .byte 0xe + .byte 0xaa + .byte 0x18 + .byte 0xbe + .byte 0x1b + .byte 0xfc + .byte 0x56 + .byte 0x3e + .byte 0x4b + .byte 0xc6 + .byte 0xd2 + .byte 0x79 + .byte 0x20 + .byte 0x9a + .byte 0xdb + .byte 0xc0 + .byte 0xfe + .byte 0x78 + .byte 0xcd + .byte 0x5a + .byte 0xf4 + .byte 0x1f + .byte 0xdd + .byte 0xa8 + .byte 0x33 + .byte 0x88 + .byte 0x7 + .byte 0xc7 + .byte 0x31 + .byte 0xb1 + .byte 0x12 + .byte 0x10 + .byte 0x59 + .byte 0x27 + .byte 0x80 + .byte 0xec + .byte 0x5f + .byte 0x60 + .byte 0x51 + .byte 0x7f + .byte 0xa9 + .byte 0x19 + .byte 0xb5 + .byte 0x4a + .byte 0xd + .byte 0x2d + .byte 0xe5 + .byte 0x7a + .byte 0x9f + .byte 0x93 + .byte 0xc9 + .byte 0x9c + .byte 0xef + .byte 0xa0 + .byte 0xe0 + .byte 0x3b + .byte 0x4d + .byte 0xae + .byte 0x2a + .byte 0xf5 + .byte 0xb0 + .byte 0xc8 + .byte 0xeb + .byte 0xbb + .byte 0x3c + .byte 0x83 + .byte 0x53 + .byte 0x99 + .byte 0x61 + .byte 0x17 + .byte 0x2b + .byte 0x4 + .byte 0x7e + .byte 0xba + .byte 0x77 + .byte 0xd6 + .byte 0x26 + .byte 0xe1 + .byte 0x69 + .byte 0x14 + .byte 0x63 + .byte 0x55 + .byte 0x21 + .byte 0xc + .byte 0x7d +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + .text + .align 4 + .globl AES_ECB_decrypt + .type AES_ECB_decrypt, %function +AES_ECB_decrypt: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + LDR r8, [sp, #36] + MOV lr, r0 + LDR r0, L_AES_Thumb2_td_ecb + ADR r12, L_AES_Thumb2_td4 + CMP r8, #0xa + BEQ L_AES_ECB_decrypt_start_block_128 + CMP r8, #0xc + BEQ L_AES_ECB_decrypt_start_block_192 +L_AES_ECB_decrypt_loop_block_256: + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + PUSH {r1, r2, r3, lr} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_decrypt_block + POP {r1, r2, r3, lr} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_ECB_decrypt_loop_block_256 + B L_AES_ECB_decrypt_end +L_AES_ECB_decrypt_start_block_192: +L_AES_ECB_decrypt_loop_block_192: + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + PUSH {r1, r2, r3, lr} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_decrypt_block + POP {r1, r2, r3, lr} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_ECB_decrypt_loop_block_192 + B L_AES_ECB_decrypt_end +L_AES_ECB_decrypt_start_block_128: +L_AES_ECB_decrypt_loop_block_128: + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + PUSH {r1, r2, r3, lr} + LDM r3!, {r8, r9, r10, r11} + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_decrypt_block + POP {r1, r2, r3, lr} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_ECB_decrypt_loop_block_128 +L_AES_ECB_decrypt_end: + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 209 + .size AES_ECB_decrypt,.-AES_ECB_decrypt +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC + .text + .align 4 + .globl AES_CBC_decrypt + .type AES_CBC_decrypt, %function +AES_CBC_decrypt: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + LDR r8, [sp, #36] + LDR r4, [sp, #40] + MOV lr, r0 + LDR r0, L_AES_Thumb2_td_ecb + ADR r12, L_AES_Thumb2_td4 + PUSH {r3, r4} + CMP r8, #0xa + BEQ L_AES_CBC_decrypt_loop_block_128 + CMP r8, #0xc + BEQ L_AES_CBC_decrypt_loop_block_192 +L_AES_CBC_decrypt_loop_block_256: + PUSH {r1, r2, lr} + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + LDR lr, [sp, #16] + STRD r4, r5, [lr, #16] + STRD r6, r7, [lr, #24] + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_decrypt_block + LDR lr, [sp, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDM lr, {r8, r9, r10, r11} + POP {r1, r2, lr} + LDR r3, [sp] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BEQ L_AES_CBC_decrypt_end_odd + PUSH {r1, r2, lr} + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + LDR lr, [sp, #16] + STRD r4, r5, [lr] + STRD r6, r7, [lr, #8] + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_decrypt_block + LDR lr, [sp, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDRD r8, r9, [lr, #16] + LDRD r10, r11, [lr, #24] + POP {r1, r2, lr} + LDR r3, [sp] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CBC_decrypt_loop_block_256 + B L_AES_CBC_decrypt_end +L_AES_CBC_decrypt_loop_block_192: + PUSH {r1, r2, lr} + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + LDR lr, [sp, #16] + STRD r4, r5, [lr, #16] + STRD r6, r7, [lr, #24] + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_decrypt_block + LDR lr, [sp, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDM lr, {r8, r9, r10, r11} + POP {r1, r2, lr} + LDR r3, [sp] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BEQ L_AES_CBC_decrypt_end_odd + PUSH {r1, r2, lr} + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + LDR lr, [sp, #16] + STRD r4, r5, [lr] + STRD r6, r7, [lr, #8] + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_decrypt_block + LDR lr, [sp, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDRD r8, r9, [lr, #16] + LDRD r10, r11, [lr, #24] + POP {r1, r2, lr} + LDR r3, [sp] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CBC_decrypt_loop_block_192 + B L_AES_CBC_decrypt_end +L_AES_CBC_decrypt_loop_block_128: + PUSH {r1, r2, lr} + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + LDR lr, [sp, #16] + STRD r4, r5, [lr, #16] + STRD r6, r7, [lr, #24] + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_decrypt_block + LDR lr, [sp, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDM lr, {r8, r9, r10, r11} + POP {r1, r2, lr} + LDR r3, [sp] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BEQ L_AES_CBC_decrypt_end_odd + PUSH {r1, r2, lr} + LDR r4, [lr] + LDR r5, [lr, #4] + LDR r6, [lr, #8] + LDR r7, [lr, #12] + LDR lr, [sp, #16] + STRD r4, r5, [lr] + STRD r6, r7, [lr, #8] + LDM r3!, {r8, r9, r10, r11} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_decrypt_block + LDR lr, [sp, #16] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDRD r8, r9, [lr, #16] + LDRD r10, r11, [lr, #24] + POP {r1, r2, lr} + LDR r3, [sp] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_CBC_decrypt_loop_block_128 + B L_AES_CBC_decrypt_end +L_AES_CBC_decrypt_end_odd: + LDR r4, [sp, #4] + LDRD r8, r9, [r4, #16] + LDRD r10, r11, [r4, #24] + STRD r8, r9, [r4] + STRD r10, r11, [r4, #8] +L_AES_CBC_decrypt_end: + POP {r3, r4} + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 517 + .size AES_CBC_decrypt,.-AES_CBC_decrypt +#endif /* HAVE_AES_CBC */ +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC */ +#endif /* HAVE_AES_DECRYPT */ +#ifdef HAVE_AESGCM + .text + .type L_GCM_gmult_len_r, %object + .size L_GCM_gmult_len_r, 64 + .align 4 +L_GCM_gmult_len_r: + .word 0x0 + .word 0x1c200000 + .word 0x38400000 + .word 0x24600000 + .word 0x70800000 + .word 0x6ca00000 + .word 0x48c00000 + .word 0x54e00000 + .word 0xe1000000 + .word 0xfd200000 + .word 0xd9400000 + .word 0xc5600000 + .word 0x91800000 + .word 0x8da00000 + .word 0xa9c00000 + .word 0xb5e00000 + .text + .align 4 + .globl GCM_gmult_len + .type GCM_gmult_len, %function +GCM_gmult_len: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ADR lr, L_GCM_gmult_len_r +L_GCM_gmult_len_start_block: + PUSH {r3} + LDR r12, [r0, #12] + LDR r3, [r2, #12] + EOR r12, r12, r3 + LSR r3, r12, #24 + AND r3, r3, #0xf + ADD r3, r1, r3, LSL #4 + LDM r3, {r8, r9, r10, r11} + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #28 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #16 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #20 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #8 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #12 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + AND r4, r12, #0xf + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #4 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + LDR r12, [r0, #8] + LDR r3, [r2, #8] + EOR r12, r12, r3 + LSR r3, r12, #24 + AND r3, r3, #0xf + ADD r3, r1, r3, LSL #4 + LDM r3, {r4, r5, r6, r7} + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #28 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #16 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #20 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #8 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #12 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + AND r4, r12, #0xf + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #4 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + LDR r12, [r0, #4] + LDR r3, [r2, #4] + EOR r12, r12, r3 + LSR r3, r12, #24 + AND r3, r3, #0xf + ADD r3, r1, r3, LSL #4 + LDM r3, {r4, r5, r6, r7} + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #28 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #16 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #20 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #8 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #12 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + AND r4, r12, #0xf + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #4 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + LDR r12, [r0] + LDR r3, [r2] + EOR r12, r12, r3 + LSR r3, r12, #24 + AND r3, r3, #0xf + ADD r3, r1, r3, LSL #4 + LDM r3, {r4, r5, r6, r7} + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #28 + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #16 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #20 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #8 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #12 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + AND r4, r12, #0xf + EOR r11, r11, r10, LSL #28 + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + LSR r6, r10, #4 + AND r3, r11, #0xf + LSR r11, r11, #4 + LSR r4, r12, #4 + EOR r11, r11, r10, LSL #28 + AND r4, r4, #0xf + LDR r3, [lr, r3, LSL #2] + ADD r4, r1, r4, LSL #4 + EOR r10, r6, r9, LSL #28 + LSR r9, r9, #4 + LDM r4, {r4, r5, r6, r7} + EOR r9, r9, r8, LSL #28 + EOR r8, r3, r8, LSR #4 + EOR r8, r8, r4 + EOR r9, r9, r5 + EOR r10, r10, r6 + EOR r11, r11, r7 + REV r8, r8 + REV r9, r9 + REV r10, r10 + REV r11, r11 + STM r0, {r8, r9, r10, r11} + POP {r3} + SUBS r3, r3, #0x10 + ADD r2, r2, #0x10 + BNE L_GCM_gmult_len_start_block + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 742 + .size GCM_gmult_len,.-GCM_gmult_len + .text + .type L_AES_Thumb2_te_gcm, %object + .size L_AES_Thumb2_te_gcm, 12 + .align 4 +L_AES_Thumb2_te_gcm: + .word L_AES_Thumb2_te_data + .text + .align 4 + .globl AES_GCM_encrypt + .type AES_GCM_encrypt, %function +AES_GCM_encrypt: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + LDR r12, [sp, #36] + LDR r8, [sp, #40] + MOV lr, r0 + LDR r0, L_AES_Thumb2_te_gcm + LDM r8, {r4, r5, r6, r7} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r8, {r4, r5, r6, r7} + PUSH {r3, r8} + CMP r12, #0xa + BEQ L_AES_GCM_encrypt_start_block_128 + CMP r12, #0xc + BEQ L_AES_GCM_encrypt_start_block_192 +L_AES_GCM_encrypt_loop_block_256: + PUSH {r1, r2, lr} + LDR lr, [sp, #16] + ADD r7, r7, #0x1 + LDM r3!, {r8, r9, r10, r11} + STR r7, [lr, #12] + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x6 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + LDR r8, [sp, #4] + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + LDM r8, {r4, r5, r6, r7} + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_GCM_encrypt_loop_block_256 + B L_AES_GCM_encrypt_end +L_AES_GCM_encrypt_start_block_192: +L_AES_GCM_encrypt_loop_block_192: + PUSH {r1, r2, lr} + LDR lr, [sp, #16] + ADD r7, r7, #0x1 + LDM r3!, {r8, r9, r10, r11} + STR r7, [lr, #12] + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x5 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + LDR r8, [sp, #4] + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + LDM r8, {r4, r5, r6, r7} + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_GCM_encrypt_loop_block_192 + B L_AES_GCM_encrypt_end +L_AES_GCM_encrypt_start_block_128: +L_AES_GCM_encrypt_loop_block_128: + PUSH {r1, r2, lr} + LDR lr, [sp, #16] + ADD r7, r7, #0x1 + LDM r3!, {r8, r9, r10, r11} + STR r7, [lr, #12] + # Round: 0 - XOR in key schedule + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + MOV r1, #0x4 + BL AES_encrypt_block + POP {r1, r2, lr} + LDR r3, [sp] + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + LDR r8, [lr] + LDR r9, [lr, #4] + LDR r10, [lr, #8] + LDR r11, [lr, #12] + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r6, r6, r10 + EOR r7, r7, r11 + LDR r8, [sp, #4] + STR r4, [r1] + STR r5, [r1, #4] + STR r6, [r1, #8] + STR r7, [r1, #12] + LDM r8, {r4, r5, r6, r7} + SUBS r2, r2, #0x10 + ADD lr, lr, #0x10 + ADD r1, r1, #0x10 + BNE L_AES_GCM_encrypt_loop_block_128 +L_AES_GCM_encrypt_end: + POP {r3, r8} + REV r4, r4 + REV r5, r5 + REV r6, r6 + REV r7, r7 + STM r8, {r4, r5, r6, r7} + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + # Cycle Count = 275 + .size AES_GCM_encrypt,.-AES_GCM_encrypt +#endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ +#endif /* !__aarch64__ && __thumb__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c new file mode 100644 index 000000000..0a29de362 --- /dev/null +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -0,0 +1,2613 @@ +/* thumb2-aes-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./aes/aes.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-aes-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__thumb__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#ifdef WOLFSSL_ARMASM_INLINE +#ifndef NO_AES +#include + +#ifdef HAVE_AES_DECRYPT +static const uint32_t L_AES_Thumb2_td_data[] = { + 0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, + 0xcb3bab6b, 0xf11f9d45, 0xabacfa58, 0x934be303, + 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, + 0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, + 0x49deb15a, 0x6725ba1b, 0x9845ea0e, 0xe15dfec0, + 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, + 0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, + 0x2dd4be83, 0xd3587421, 0x2949e069, 0x448ec9c8, + 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, + 0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, + 0x1863df4a, 0x82e51a31, 0x60975133, 0x4562537f, + 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, + 0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, + 0x23ab73d3, 0xe2724b02, 0x57e31f8f, 0x2a6655ab, + 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, + 0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, + 0x2b8acf1c, 0x92a779b4, 0xf0f307f2, 0xa14e69e2, + 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, + 0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, + 0x390b83ec, 0xaa4060ef, 0x065e719f, 0x51bd6e10, + 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, + 0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, + 0x241998fb, 0x97d6bde9, 0xcc894043, 0x7767d99e, + 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, + 0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, + 0x83098086, 0x48322bed, 0xac1e1170, 0x4e6c5a72, + 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, + 0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, + 0xb10c0a67, 0x0f9357e7, 0xd2b4ee96, 0x9e1b9b91, + 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, + 0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, + 0x0b0e090d, 0xadf28bc7, 0xb92db6a8, 0xc8141ea9, + 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, + 0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, + 0x768b4329, 0xdccb23c6, 0x68b6edfc, 0x63b8e4f1, + 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, + 0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, + 0x4b1d9e2f, 0xf3dcb230, 0xec0d8652, 0xd077c1e3, + 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, + 0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, + 0xc787494e, 0xc1d938d1, 0xfe8ccaa2, 0x3698d40b, + 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, + 0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, + 0xc2f68d13, 0xe890d8b8, 0x5e2e39f7, 0xf582c3af, + 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, + 0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, + 0x09cd2678, 0xf46e5918, 0x01ec9ab7, 0xa8834f9a, + 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, + 0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, + 0xaf31a4b2, 0x312a3f23, 0x30c6a594, 0xc035a266, + 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, + 0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, + 0x8d764dd6, 0x4d43efb0, 0x54ccaa4d, 0xdfe49604, + 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, + 0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, + 0x5ab3671d, 0x5292dbd2, 0x33e91056, 0x136dd647, + 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, + 0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, + 0x599cd2df, 0x3f55f273, 0x791814ce, 0xbf73c737, + 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, + 0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, + 0x72161dc3, 0x0cbce225, 0x8b283c49, 0x41ff0d95, + 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, + 0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857, +}; + +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +static const uint32_t L_AES_Thumb2_te_data[] = { + 0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, + 0x0dfff2f2, 0xbdd66b6b, 0xb1de6f6f, 0x5491c5c5, + 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, + 0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, + 0x458fcaca, 0x9d1f8282, 0x4089c9c9, 0x87fa7d7d, + 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, + 0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, + 0xbf239c9c, 0xf753a4a4, 0x96e47272, 0x5b9bc0c0, + 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, + 0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, + 0x5c683434, 0xf451a5a5, 0x34d1e5e5, 0x08f9f1f1, + 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, + 0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, + 0x28301818, 0xa1379696, 0x0f0a0505, 0xb52f9a9a, + 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, + 0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, + 0x1b120909, 0x9e1d8383, 0x74582c2c, 0x2e341a1a, + 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, + 0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, + 0x7b522929, 0x3edde3e3, 0x715e2f2f, 0x97138484, + 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, + 0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, + 0xbed46a6a, 0x468dcbcb, 0xd967bebe, 0x4b723939, + 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, + 0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, + 0xc5864343, 0xd79a4d4d, 0x55663333, 0x94118585, + 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, + 0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, + 0xf3a25151, 0xfe5da3a3, 0xc0804040, 0x8a058f8f, + 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, + 0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, + 0x30201010, 0x1ae5ffff, 0x0efdf3f3, 0x6dbfd2d2, + 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, + 0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, + 0x5793c4c4, 0xf255a7a7, 0x82fc7e7e, 0x477a3d3d, + 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, + 0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, + 0x66442222, 0x7e542a2a, 0xab3b9090, 0x830b8888, + 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, + 0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, + 0x3bdbe0e0, 0x56643232, 0x4e743a3a, 0x1e140a0a, + 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, + 0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, + 0xa8399191, 0xa4319595, 0x37d3e4e4, 0x8bf27979, + 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, + 0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, + 0xb4d86c6c, 0xfaac5656, 0x07f3f4f4, 0x25cfeaea, + 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, + 0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, + 0x24381c1c, 0xf157a6a6, 0xc773b4b4, 0x5197c6c6, + 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, + 0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, + 0x90e07070, 0x427c3e3e, 0xc471b5b5, 0xaacc6666, + 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, + 0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, + 0x91178686, 0x5899c1c1, 0x273a1d1d, 0xb9279e9e, + 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, + 0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, + 0xb62d9b9b, 0x223c1e1e, 0x92158787, 0x20c9e9e9, + 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, + 0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, + 0xda65bfbf, 0x31d7e6e6, 0xc6844242, 0xb8d06868, + 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, + 0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616, +}; + +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +static const uint32_t* L_AES_Thumb2_td = L_AES_Thumb2_td_data; +#endif /* HAVE_AES_DECRYPT */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +static const uint32_t* L_AES_Thumb2_te = L_AES_Thumb2_te_data; +#endif /* HAVE_AES_DECRYPT || HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +void AES_invert_key(unsigned char* ks, word32 rounds); +void AES_invert_key(unsigned char* ks_p, word32 rounds_p) +{ + register unsigned char* ks asm ("r0") = (unsigned char*)ks_p; + register word32 rounds asm ("r1") = (word32)rounds_p; + register uint32_t* L_AES_Thumb2_te_c asm ("r2") = (uint32_t*)L_AES_Thumb2_te; + register uint32_t* L_AES_Thumb2_td_c asm ("r3") = (uint32_t*)L_AES_Thumb2_td; + + __asm__ __volatile__ ( + "MOV r12, %[L_AES_Thumb2_te]\n\t" + "MOV lr, %[L_AES_Thumb2_td]\n\t" + "ADD r10, %[ks], %[rounds], LSL #4\n\t" + "MOV r11, %[rounds]\n\t" + "\n" + "L_AES_invert_key_loop_%=:\n\t" + "LDM %[ks], {r2, r3, r4, r5}\n\t" + "LDM r10, {r6, r7, r8, r9}\n\t" + "STM r10, {r2, r3, r4, r5}\n\t" + "STM %[ks]!, {r6, r7, r8, r9}\n\t" + "SUBS r11, r11, #0x2\n\t" + "SUB r10, r10, #0x10\n\t" + "BNE L_AES_invert_key_loop_%=\n\t" + "SUB %[ks], %[ks], %[rounds], LSL #3\n\t" + "ADD %[ks], %[ks], #0x10\n\t" + "SUB r11, %[rounds], #0x1\n\t" + "\n" + "L_AES_invert_key_mix_loop_%=:\n\t" + "LDM %[ks], {r2, r3, r4, r5}\n\t" + "UBFX r6, r2, #0, #8\n\t" + "UBFX r7, r2, #8, #8\n\t" + "UBFX r8, r2, #16, #8\n\t" + "LSR r9, r2, #24\n\t" + "LDRB r6, [r12, r6, LSL #2]\n\t" + "LDRB r7, [r12, r7, LSL #2]\n\t" + "LDRB r8, [r12, r8, LSL #2]\n\t" + "LDRB r9, [r12, r9, LSL #2]\n\t" + "LDR r6, [lr, r6, LSL #2]\n\t" + "LDR r7, [lr, r7, LSL #2]\n\t" + "LDR r8, [lr, r8, LSL #2]\n\t" + "LDR r9, [lr, r9, LSL #2]\n\t" + "EOR r8, r8, r6, ROR #16\n\t" + "EOR r8, r8, r7, ROR #8\n\t" + "EOR r8, r8, r9, ROR #24\n\t" + "STR r8, [%[ks]], #4\n\t" + "UBFX r6, r3, #0, #8\n\t" + "UBFX r7, r3, #8, #8\n\t" + "UBFX r8, r3, #16, #8\n\t" + "LSR r9, r3, #24\n\t" + "LDRB r6, [r12, r6, LSL #2]\n\t" + "LDRB r7, [r12, r7, LSL #2]\n\t" + "LDRB r8, [r12, r8, LSL #2]\n\t" + "LDRB r9, [r12, r9, LSL #2]\n\t" + "LDR r6, [lr, r6, LSL #2]\n\t" + "LDR r7, [lr, r7, LSL #2]\n\t" + "LDR r8, [lr, r8, LSL #2]\n\t" + "LDR r9, [lr, r9, LSL #2]\n\t" + "EOR r8, r8, r6, ROR #16\n\t" + "EOR r8, r8, r7, ROR #8\n\t" + "EOR r8, r8, r9, ROR #24\n\t" + "STR r8, [%[ks]], #4\n\t" + "UBFX r6, r4, #0, #8\n\t" + "UBFX r7, r4, #8, #8\n\t" + "UBFX r8, r4, #16, #8\n\t" + "LSR r9, r4, #24\n\t" + "LDRB r6, [r12, r6, LSL #2]\n\t" + "LDRB r7, [r12, r7, LSL #2]\n\t" + "LDRB r8, [r12, r8, LSL #2]\n\t" + "LDRB r9, [r12, r9, LSL #2]\n\t" + "LDR r6, [lr, r6, LSL #2]\n\t" + "LDR r7, [lr, r7, LSL #2]\n\t" + "LDR r8, [lr, r8, LSL #2]\n\t" + "LDR r9, [lr, r9, LSL #2]\n\t" + "EOR r8, r8, r6, ROR #16\n\t" + "EOR r8, r8, r7, ROR #8\n\t" + "EOR r8, r8, r9, ROR #24\n\t" + "STR r8, [%[ks]], #4\n\t" + "UBFX r6, r5, #0, #8\n\t" + "UBFX r7, r5, #8, #8\n\t" + "UBFX r8, r5, #16, #8\n\t" + "LSR r9, r5, #24\n\t" + "LDRB r6, [r12, r6, LSL #2]\n\t" + "LDRB r7, [r12, r7, LSL #2]\n\t" + "LDRB r8, [r12, r8, LSL #2]\n\t" + "LDRB r9, [r12, r9, LSL #2]\n\t" + "LDR r6, [lr, r6, LSL #2]\n\t" + "LDR r7, [lr, r7, LSL #2]\n\t" + "LDR r8, [lr, r8, LSL #2]\n\t" + "LDR r9, [lr, r9, LSL #2]\n\t" + "EOR r8, r8, r6, ROR #16\n\t" + "EOR r8, r8, r7, ROR #8\n\t" + "EOR r8, r8, r9, ROR #24\n\t" + "STR r8, [%[ks]], #4\n\t" + "SUBS r11, r11, #0x1\n\t" + "BNE L_AES_invert_key_mix_loop_%=\n\t" + : [ks] "+r" (ks), [rounds] "+r" (rounds), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_td] "+r" (L_AES_Thumb2_td_c) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +#endif /* HAVE_AES_DECRYPT */ +static const uint32_t L_AES_Thumb2_rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1b000000, 0x36000000, +}; + +void AES_set_encrypt_key(const unsigned char* key, word32 len, + unsigned char* ks); +void AES_set_encrypt_key(const unsigned char* key_p, word32 len_p, unsigned char* ks_p) +{ + register const unsigned char* key asm ("r0") = (const unsigned char*)key_p; + register word32 len asm ("r1") = (word32)len_p; + register unsigned char* ks asm ("r2") = (unsigned char*)ks_p; + register uint32_t* L_AES_Thumb2_te_c asm ("r3") = (uint32_t*)L_AES_Thumb2_te; + register uint32_t* L_AES_Thumb2_rcon_c asm ("r4") = (uint32_t*)&L_AES_Thumb2_rcon; + + __asm__ __volatile__ ( + "MOV r8, %[L_AES_Thumb2_te]\n\t" + "MOV lr, %[L_AES_Thumb2_rcon]\n\t" + "CMP %[len], #0x80\n\t" + "BEQ L_AES_set_encrypt_key_start_128_%=\n\t" + "CMP %[len], #0xc0\n\t" + "BEQ L_AES_set_encrypt_key_start_192_%=\n\t" + "LDRD r4, r5, [%[key]]\n\t" + "LDRD r6, r7, [%[key], #8]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM %[ks]!, {r4, r5, r6, r7}\n\t" + "LDRD r4, r5, [%[key], #16]\n\t" + "LDRD r6, r7, [%[key], #24]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "SUB %[ks], %[ks], #0x10\n\t" + "MOV r12, #0x6\n\t" + "\n" + "L_AES_set_encrypt_key_loop_256_%=:\n\t" + "UBFX r4, r7, #0, #8\n\t" + "UBFX r5, r7, #8, #8\n\t" + "UBFX r6, r7, #16, #8\n\t" + "LSR r7, r7, #24\n\t" + "LDRB r4, [r8, r4, LSL #2]\n\t" + "LDRB r5, [r8, r5, LSL #2]\n\t" + "LDRB r6, [r8, r6, LSL #2]\n\t" + "LDRB r7, [r8, r7, LSL #2]\n\t" + "EOR r3, r7, r4, LSL #8\n\t" + "EOR r3, r3, r5, LSL #16\n\t" + "EOR r3, r3, r6, LSL #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "EOR r4, r4, r3\n\t" + "LDM lr!, {r3}\n\t" + "EOR r4, r4, r3\n\t" + "EOR r5, r5, r4\n\t" + "EOR r6, r6, r5\n\t" + "EOR r7, r7, r6\n\t" + "ADD %[ks], %[ks], #0x10\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "SUB %[ks], %[ks], #0x10\n\t" + "MOV r3, r7\n\t" + "UBFX r4, r3, #8, #8\n\t" + "UBFX r5, r3, #16, #8\n\t" + "LSR r6, r3, #24\n\t" + "UBFX r3, r3, #0, #8\n\t" + "LDRB r4, [r8, r4, LSL #2]\n\t" + "LDRB r6, [r8, r6, LSL #2]\n\t" + "LDRB r5, [r8, r5, LSL #2]\n\t" + "LDRB r3, [r8, r3, LSL #2]\n\t" + "EOR r3, r3, r4, LSL #8\n\t" + "EOR r3, r3, r5, LSL #16\n\t" + "EOR r3, r3, r6, LSL #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "EOR r4, r4, r3\n\t" + "EOR r5, r5, r4\n\t" + "EOR r6, r6, r5\n\t" + "EOR r7, r7, r6\n\t" + "ADD %[ks], %[ks], #0x10\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "SUB %[ks], %[ks], #0x10\n\t" + "SUBS r12, r12, #0x1\n\t" + "BNE L_AES_set_encrypt_key_loop_256_%=\n\t" + "UBFX r4, r7, #0, #8\n\t" + "UBFX r5, r7, #8, #8\n\t" + "UBFX r6, r7, #16, #8\n\t" + "LSR r7, r7, #24\n\t" + "LDRB r4, [r8, r4, LSL #2]\n\t" + "LDRB r5, [r8, r5, LSL #2]\n\t" + "LDRB r6, [r8, r6, LSL #2]\n\t" + "LDRB r7, [r8, r7, LSL #2]\n\t" + "EOR r3, r7, r4, LSL #8\n\t" + "EOR r3, r3, r5, LSL #16\n\t" + "EOR r3, r3, r6, LSL #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "EOR r4, r4, r3\n\t" + "LDM lr!, {r3}\n\t" + "EOR r4, r4, r3\n\t" + "EOR r5, r5, r4\n\t" + "EOR r6, r6, r5\n\t" + "EOR r7, r7, r6\n\t" + "ADD %[ks], %[ks], #0x10\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "SUB %[ks], %[ks], #0x10\n\t" + "B L_AES_set_encrypt_key_end_%=\n\t" + "\n" + "L_AES_set_encrypt_key_start_192_%=:\n\t" + "LDRD r4, r5, [%[key]]\n\t" + "LDRD r6, r7, [%[key], #8]\n\t" + "LDRD %[key], %[len], [%[key], #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "REV %[key], %[key]\n\t" + "REV %[len], %[len]\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "STRD %[key], %[len], [%[ks], #16]\n\t" + "MOV r7, %[len]\n\t" + "MOV r12, #0x7\n\t" + "\n" + "L_AES_set_encrypt_key_loop_192_%=:\n\t" + "UBFX r0, r7, #0, #8\n\t" + "UBFX r1, r7, #8, #8\n\t" + "UBFX r4, r7, #16, #8\n\t" + "LSR r7, r7, #24\n\t" + "LDRB r0, [r8, r0, LSL #2]\n\t" + "LDRB r1, [r8, r1, LSL #2]\n\t" + "LDRB r4, [r8, r4, LSL #2]\n\t" + "LDRB r7, [r8, r7, LSL #2]\n\t" + "EOR r3, r7, r0, LSL #8\n\t" + "EOR r3, r3, r1, LSL #16\n\t" + "EOR r3, r3, r4, LSL #24\n\t" + "LDM %[ks]!, {r0, r1, r4, r5, r6, r7}\n\t" + "EOR r0, r0, r3\n\t" + "LDM lr!, {r3}\n\t" + "EOR r0, r0, r3\n\t" + "EOR r1, r1, r0\n\t" + "EOR r4, r4, r1\n\t" + "EOR r5, r5, r4\n\t" + "EOR r6, r6, r5\n\t" + "EOR r7, r7, r6\n\t" + "STM %[ks], {r0, r1, r4, r5, r6, r7}\n\t" + "SUBS r12, r12, #0x1\n\t" + "BNE L_AES_set_encrypt_key_loop_192_%=\n\t" + "UBFX r0, r7, #0, #8\n\t" + "UBFX r1, r7, #8, #8\n\t" + "UBFX r4, r7, #16, #8\n\t" + "LSR r7, r7, #24\n\t" + "LDRB r0, [r8, r0, LSL #2]\n\t" + "LDRB r1, [r8, r1, LSL #2]\n\t" + "LDRB r4, [r8, r4, LSL #2]\n\t" + "LDRB r7, [r8, r7, LSL #2]\n\t" + "EOR r3, r7, r0, LSL #8\n\t" + "EOR r3, r3, r1, LSL #16\n\t" + "EOR r3, r3, r4, LSL #24\n\t" + "LDM %[ks]!, {r0, r1, r4, r5, r6, r7}\n\t" + "EOR r0, r0, r3\n\t" + "LDM lr!, {r3}\n\t" + "EOR r0, r0, r3\n\t" + "EOR r1, r1, r0\n\t" + "EOR r4, r4, r1\n\t" + "EOR r5, r5, r4\n\t" + "STM %[ks], {r0, r1, r4, r5}\n\t" + "B L_AES_set_encrypt_key_end_%=\n\t" + "\n" + "L_AES_set_encrypt_key_start_128_%=:\n\t" + "LDRD r4, r5, [%[key]]\n\t" + "LDRD r6, r7, [%[key], #8]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "MOV r12, #0xa\n\t" + "\n" + "L_AES_set_encrypt_key_loop_128_%=:\n\t" + "UBFX r4, r7, #0, #8\n\t" + "UBFX r5, r7, #8, #8\n\t" + "UBFX r6, r7, #16, #8\n\t" + "LSR r7, r7, #24\n\t" + "LDRB r4, [r8, r4, LSL #2]\n\t" + "LDRB r5, [r8, r5, LSL #2]\n\t" + "LDRB r6, [r8, r6, LSL #2]\n\t" + "LDRB r7, [r8, r7, LSL #2]\n\t" + "EOR r3, r7, r4, LSL #8\n\t" + "EOR r3, r3, r5, LSL #16\n\t" + "EOR r3, r3, r6, LSL #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "EOR r4, r4, r3\n\t" + "LDM lr!, {r3}\n\t" + "EOR r4, r4, r3\n\t" + "EOR r5, r5, r4\n\t" + "EOR r6, r6, r5\n\t" + "EOR r7, r7, r6\n\t" + "STM %[ks], {r4, r5, r6, r7}\n\t" + "SUBS r12, r12, #0x1\n\t" + "BNE L_AES_set_encrypt_key_loop_128_%=\n\t" + "\n" + "L_AES_set_encrypt_key_end_%=:\n\t" + : [key] "+r" (key), [len] "+r" (len), [ks] "+r" (ks), [L_AES_Thumb2_te] "+r" (L_AES_Thumb2_te_c), [L_AES_Thumb2_rcon] "+r" (L_AES_Thumb2_rcon_c) + : + : "memory", "r12", "lr", "r5", "r6", "r7", "r8" + ); +} + +void AES_encrypt_block(const uint32_t* te, int nr, int len, const uint32_t* ks); +void AES_encrypt_block(const uint32_t* te_p, int nr_p, int len_p, const uint32_t* ks_p) +{ + register const uint32_t* te asm ("r0") = (const uint32_t*)te_p; + register int nr asm ("r1") = (int)nr_p; + register int len asm ("r2") = (int)len_p; + register const uint32_t* ks asm ("r3") = (const uint32_t*)ks_p; + + __asm__ __volatile__ ( + "\n" + "L_AES_encrypt_block_nr_%=:\n\t" + "UBFX r8, r5, #16, #8\n\t" + "LSR r11, r4, #24\n\t" + "UBFX lr, r6, #8, #8\n\t" + "UBFX r2, r7, #0, #8\n\t" + "LDR r8, [%[te], r8, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r9, r6, #16, #8\n\t" + "EOR r8, r8, r11, ROR #24\n\t" + "LSR r11, r5, #24\n\t" + "EOR r8, r8, lr, ROR #8\n\t" + "UBFX lr, r7, #8, #8\n\t" + "EOR r8, r8, r2, ROR #16\n\t" + "UBFX r2, r4, #0, #8\n\t" + "LDR r9, [%[te], r9, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r10, r7, #16, #8\n\t" + "EOR r9, r9, r11, ROR #24\n\t" + "LSR r11, r6, #24\n\t" + "EOR r9, r9, lr, ROR #8\n\t" + "UBFX lr, r4, #8, #8\n\t" + "EOR r9, r9, r2, ROR #16\n\t" + "UBFX r2, r5, #0, #8\n\t" + "LDR r10, [%[te], r10, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r6, r6, #0, #8\n\t" + "EOR r10, r10, r11, ROR #24\n\t" + "UBFX r11, r4, #16, #8\n\t" + "EOR r10, r10, lr, ROR #8\n\t" + "LSR lr, r7, #24\n\t" + "EOR r10, r10, r2, ROR #16\n\t" + "UBFX r2, r5, #8, #8\n\t" + "LDR r6, [%[te], r6, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "EOR lr, lr, r6, ROR #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "EOR r11, r11, lr, ROR #24\n\t" + "EOR r11, r11, r2, ROR #8\n\t" + /* XOR in Key Schedule */ + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "UBFX r4, r9, #16, #8\n\t" + "LSR r7, r8, #24\n\t" + "UBFX lr, r10, #8, #8\n\t" + "UBFX r2, r11, #0, #8\n\t" + "LDR r4, [%[te], r4, LSL #2]\n\t" + "LDR r7, [%[te], r7, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r5, r10, #16, #8\n\t" + "EOR r4, r4, r7, ROR #24\n\t" + "LSR r7, r9, #24\n\t" + "EOR r4, r4, lr, ROR #8\n\t" + "UBFX lr, r11, #8, #8\n\t" + "EOR r4, r4, r2, ROR #16\n\t" + "UBFX r2, r8, #0, #8\n\t" + "LDR r5, [%[te], r5, LSL #2]\n\t" + "LDR r7, [%[te], r7, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r6, r11, #16, #8\n\t" + "EOR r5, r5, r7, ROR #24\n\t" + "LSR r7, r10, #24\n\t" + "EOR r5, r5, lr, ROR #8\n\t" + "UBFX lr, r8, #8, #8\n\t" + "EOR r5, r5, r2, ROR #16\n\t" + "UBFX r2, r9, #0, #8\n\t" + "LDR r6, [%[te], r6, LSL #2]\n\t" + "LDR r7, [%[te], r7, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r10, r10, #0, #8\n\t" + "EOR r6, r6, r7, ROR #24\n\t" + "UBFX r7, r8, #16, #8\n\t" + "EOR r6, r6, lr, ROR #8\n\t" + "LSR lr, r11, #24\n\t" + "EOR r6, r6, r2, ROR #16\n\t" + "UBFX r2, r9, #8, #8\n\t" + "LDR r10, [%[te], r10, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r7, [%[te], r7, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "EOR lr, lr, r10, ROR #24\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "EOR r7, r7, lr, ROR #24\n\t" + "EOR r7, r7, r2, ROR #8\n\t" + /* XOR in Key Schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "SUBS %[nr], %[nr], #0x1\n\t" + "BNE L_AES_encrypt_block_nr_%=\n\t" + "UBFX r8, r5, #16, #8\n\t" + "LSR r11, r4, #24\n\t" + "UBFX lr, r6, #8, #8\n\t" + "UBFX r2, r7, #0, #8\n\t" + "LDR r8, [%[te], r8, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r9, r6, #16, #8\n\t" + "EOR r8, r8, r11, ROR #24\n\t" + "LSR r11, r5, #24\n\t" + "EOR r8, r8, lr, ROR #8\n\t" + "UBFX lr, r7, #8, #8\n\t" + "EOR r8, r8, r2, ROR #16\n\t" + "UBFX r2, r4, #0, #8\n\t" + "LDR r9, [%[te], r9, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r10, r7, #16, #8\n\t" + "EOR r9, r9, r11, ROR #24\n\t" + "LSR r11, r6, #24\n\t" + "EOR r9, r9, lr, ROR #8\n\t" + "UBFX lr, r4, #8, #8\n\t" + "EOR r9, r9, r2, ROR #16\n\t" + "UBFX r2, r5, #0, #8\n\t" + "LDR r10, [%[te], r10, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "UBFX r6, r6, #0, #8\n\t" + "EOR r10, r10, r11, ROR #24\n\t" + "UBFX r11, r4, #16, #8\n\t" + "EOR r10, r10, lr, ROR #8\n\t" + "LSR lr, r7, #24\n\t" + "EOR r10, r10, r2, ROR #16\n\t" + "UBFX r2, r5, #8, #8\n\t" + "LDR r6, [%[te], r6, LSL #2]\n\t" + "LDR lr, [%[te], lr, LSL #2]\n\t" + "LDR r11, [%[te], r11, LSL #2]\n\t" + "LDR r2, [%[te], r2, LSL #2]\n\t" + "EOR lr, lr, r6, ROR #24\n\t" + "LDM %[ks]!, {r4, r5, r6, r7}\n\t" + "EOR r11, r11, lr, ROR #24\n\t" + "EOR r11, r11, r2, ROR #8\n\t" + /* XOR in Key Schedule */ + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "UBFX r4, r11, #0, #8\n\t" + "UBFX r7, r10, #8, #8\n\t" + "UBFX lr, r9, #16, #8\n\t" + "LSR r2, r8, #24\n\t" + "LDRB r4, [%[te], r4, LSL #2]\n\t" + "LDRB r7, [%[te], r7, LSL #2]\n\t" + "LDRB lr, [%[te], lr, LSL #2]\n\t" + "LDRB r2, [%[te], r2, LSL #2]\n\t" + "UBFX r5, r8, #0, #8\n\t" + "EOR r4, r4, r7, LSL #8\n\t" + "UBFX r7, r11, #8, #8\n\t" + "EOR r4, r4, lr, LSL #16\n\t" + "UBFX lr, r10, #16, #8\n\t" + "EOR r4, r4, r2, LSL #24\n\t" + "LSR r2, r9, #24\n\t" + "LDRB r5, [%[te], r5, LSL #2]\n\t" + "LDRB r7, [%[te], r7, LSL #2]\n\t" + "LDRB lr, [%[te], lr, LSL #2]\n\t" + "LDRB r2, [%[te], r2, LSL #2]\n\t" + "UBFX r6, r9, #0, #8\n\t" + "EOR r5, r5, r7, LSL #8\n\t" + "UBFX r7, r8, #8, #8\n\t" + "EOR r5, r5, lr, LSL #16\n\t" + "UBFX lr, r11, #16, #8\n\t" + "EOR r5, r5, r2, LSL #24\n\t" + "LSR r2, r10, #24\n\t" + "LDRB r6, [%[te], r6, LSL #2]\n\t" + "LDRB r7, [%[te], r7, LSL #2]\n\t" + "LDRB lr, [%[te], lr, LSL #2]\n\t" + "LDRB r2, [%[te], r2, LSL #2]\n\t" + "LSR r11, r11, #24\n\t" + "EOR r6, r6, r7, LSL #8\n\t" + "UBFX r7, r10, #0, #8\n\t" + "EOR r6, r6, lr, LSL #16\n\t" + "UBFX lr, r9, #8, #8\n\t" + "EOR r6, r6, r2, LSL #24\n\t" + "UBFX r2, r8, #16, #8\n\t" + "LDRB r11, [%[te], r11, LSL #2]\n\t" + "LDRB r7, [%[te], r7, LSL #2]\n\t" + "LDRB lr, [%[te], lr, LSL #2]\n\t" + "LDRB r2, [%[te], r2, LSL #2]\n\t" + "EOR lr, lr, r11, LSL #16\n\t" + "LDM %[ks], {r8, r9, r10, r11}\n\t" + "EOR r7, r7, lr, LSL #8\n\t" + "EOR r7, r7, r2, LSL #16\n\t" + /* XOR in Key Schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + : [te] "+r" (te), [nr] "+r" (nr), [len] "+r" (len), [ks] "+r" (ks) + : + : "memory", "lr" + ); +} + +#if defined(HAVE_AES_CBC) || defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +static const uint32_t* L_AES_Thumb2_te_ecb = L_AES_Thumb2_te_data; +#endif /* HAVE_AES_CBC || HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#if defined(HAVE_AESCCM) || defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr); +void AES_ECB_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_te_ecb; + + __asm__ __volatile__ ( + "MOV lr, %[in]\n\t" + "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" + "MOV r12, r4\n\t" + "PUSH {%[ks]}\n\t" + "CMP r12, #0xa\n\t" + "BEQ L_AES_ECB_encrypt_start_block_128_%=\n\t" + "CMP r12, #0xc\n\t" + "BEQ L_AES_ECB_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_ECB_encrypt_loop_block_256_%=:\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_ECB_encrypt_loop_block_256_%=\n\t" + "B L_AES_ECB_encrypt_end_%=\n\t" + "\n" + "L_AES_ECB_encrypt_start_block_192_%=:\n\t" + "\n" + "L_AES_ECB_encrypt_loop_block_192_%=:\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_ECB_encrypt_loop_block_192_%=\n\t" + "B L_AES_ECB_encrypt_end_%=\n\t" + "\n" + "L_AES_ECB_encrypt_start_block_128_%=:\n\t" + "\n" + "L_AES_ECB_encrypt_loop_block_128_%=:\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_ECB_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_ECB_encrypt_end_%=:\n\t" + "POP {%[ks]}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) + : + : "memory", "r12", "lr", "r6", "r7", "r8", "r9", "r10", "r11" + ); + (void)nr; +} + +#endif /* HAVE_AESCCM || HAVE_AESGCM || WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC +void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +void AES_CBC_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; + register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; + + __asm__ __volatile__ ( + "MOV r8, r4\n\t" + "MOV r9, r5\n\t" + "MOV lr, %[in]\n\t" + "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" + "LDM r9, {r4, r5, r6, r7}\n\t" + "PUSH {%[ks], r9}\n\t" + "CMP r8, #0xa\n\t" + "BEQ L_AES_CBC_encrypt_start_block_128_%=\n\t" + "CMP r8, #0xc\n\t" + "BEQ L_AES_CBC_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_CBC_encrypt_loop_block_256_%=:\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CBC_encrypt_loop_block_256_%=\n\t" + "B L_AES_CBC_encrypt_end_%=\n\t" + "\n" + "L_AES_CBC_encrypt_start_block_192_%=:\n\t" + "\n" + "L_AES_CBC_encrypt_loop_block_192_%=:\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CBC_encrypt_loop_block_192_%=\n\t" + "B L_AES_CBC_encrypt_end_%=\n\t" + "\n" + "L_AES_CBC_encrypt_start_block_128_%=:\n\t" + "\n" + "L_AES_CBC_encrypt_loop_block_128_%=:\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CBC_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_CBC_encrypt_end_%=:\n\t" + "POP {%[ks], r9}\n\t" + "STM r9, {r4, r5, r6, r7}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" + ); + (void)nr; + (void)iv; +} + +#endif /* HAVE_AES_CBC */ +#ifdef WOLFSSL_AES_COUNTER +void AES_CTR_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +void AES_CTR_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; + register uint32_t* L_AES_Thumb2_te_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_ecb; + + __asm__ __volatile__ ( + "MOV r12, r4\n\t" + "MOV r8, r5\n\t" + "MOV lr, %[in]\n\t" + "MOV r0, %[L_AES_Thumb2_te_ecb]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM r8, {r4, r5, r6, r7}\n\t" + "PUSH {%[ks], r8}\n\t" + "CMP r12, #0xa\n\t" + "BEQ L_AES_CTR_encrypt_start_block_128_%=\n\t" + "CMP r12, #0xc\n\t" + "BEQ L_AES_CTR_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_CTR_encrypt_loop_block_256_%=:\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDR lr, [sp, #16]\n\t" + "ADDS r11, r7, #0x1\n\t" + "ADCS r10, r6, #0x0\n\t" + "ADCS r9, r5, #0x0\n\t" + "ADC r8, r4, #0x0\n\t" + "STM lr, {r8, r9, r10, r11}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "LDR r8, [sp, #4]\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CTR_encrypt_loop_block_256_%=\n\t" + "B L_AES_CTR_encrypt_end_%=\n\t" + "\n" + "L_AES_CTR_encrypt_start_block_192_%=:\n\t" + "\n" + "L_AES_CTR_encrypt_loop_block_192_%=:\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDR lr, [sp, #16]\n\t" + "ADDS r11, r7, #0x1\n\t" + "ADCS r10, r6, #0x0\n\t" + "ADCS r9, r5, #0x0\n\t" + "ADC r8, r4, #0x0\n\t" + "STM lr, {r8, r9, r10, r11}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "LDR r8, [sp, #4]\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CTR_encrypt_loop_block_192_%=\n\t" + "B L_AES_CTR_encrypt_end_%=\n\t" + "\n" + "L_AES_CTR_encrypt_start_block_128_%=:\n\t" + "\n" + "L_AES_CTR_encrypt_loop_block_128_%=:\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDR lr, [sp, #16]\n\t" + "ADDS r11, r7, #0x1\n\t" + "ADCS r10, r6, #0x0\n\t" + "ADCS r9, r5, #0x0\n\t" + "ADC r8, r4, #0x0\n\t" + "STM lr, {r8, r9, r10, r11}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "LDR r8, [sp, #4]\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CTR_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_CTR_encrypt_end_%=:\n\t" + "POP {%[ks], r8}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM r8, {r4, r5, r6, r7}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_ecb] "+r" (L_AES_Thumb2_te_ecb_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" + ); + (void)nr; + (void)ctr; +} + +#endif /* WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_DECRYPT +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) || defined(HAVE_AES_CBC) +void AES_decrypt_block(const uint32_t* td, int nr); +void AES_decrypt_block(const uint32_t* td_p, int nr_p) +{ + register const uint32_t* td asm ("r0") = (const uint32_t*)td_p; + register int nr asm ("r1") = (int)nr_p; + + __asm__ __volatile__ ( + "\n" + "L_AES_decrypt_block_nr_%=:\n\t" + "UBFX r8, r7, #16, #8\n\t" + "LSR r11, r4, #24\n\t" + "UBFX lr, r6, #8, #8\n\t" + "UBFX r2, r5, #0, #8\n\t" + "LDR r8, [%[td], r8, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r9, r4, #16, #8\n\t" + "EOR r8, r8, r11, ROR #24\n\t" + "LSR r11, r5, #24\n\t" + "EOR r8, r8, lr, ROR #8\n\t" + "UBFX lr, r7, #8, #8\n\t" + "EOR r8, r8, r2, ROR #16\n\t" + "UBFX r2, r6, #0, #8\n\t" + "LDR r9, [%[td], r9, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r10, r5, #16, #8\n\t" + "EOR r9, r9, r11, ROR #24\n\t" + "LSR r11, r6, #24\n\t" + "EOR r9, r9, lr, ROR #8\n\t" + "UBFX lr, r4, #8, #8\n\t" + "EOR r9, r9, r2, ROR #16\n\t" + "UBFX r2, r7, #0, #8\n\t" + "LDR r10, [%[td], r10, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r4, r4, #0, #8\n\t" + "EOR r10, r10, r11, ROR #24\n\t" + "UBFX r11, r6, #16, #8\n\t" + "EOR r10, r10, lr, ROR #8\n\t" + "LSR lr, r7, #24\n\t" + "EOR r10, r10, r2, ROR #16\n\t" + "UBFX r2, r5, #8, #8\n\t" + "LDR r4, [%[td], r4, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "EOR lr, lr, r4, ROR #24\n\t" + "LDM r3!, {r4, r5, r6, r7}\n\t" + "EOR r11, r11, r2, ROR #8\n\t" + "EOR r11, r11, lr, ROR #24\n\t" + /* XOR in Key Schedule */ + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "UBFX r4, r11, #16, #8\n\t" + "LSR r7, r8, #24\n\t" + "UBFX lr, r10, #8, #8\n\t" + "UBFX r2, r9, #0, #8\n\t" + "LDR r4, [%[td], r4, LSL #2]\n\t" + "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r5, r8, #16, #8\n\t" + "EOR r4, r4, r7, ROR #24\n\t" + "LSR r7, r9, #24\n\t" + "EOR r4, r4, lr, ROR #8\n\t" + "UBFX lr, r11, #8, #8\n\t" + "EOR r4, r4, r2, ROR #16\n\t" + "UBFX r2, r10, #0, #8\n\t" + "LDR r5, [%[td], r5, LSL #2]\n\t" + "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r6, r9, #16, #8\n\t" + "EOR r5, r5, r7, ROR #24\n\t" + "LSR r7, r10, #24\n\t" + "EOR r5, r5, lr, ROR #8\n\t" + "UBFX lr, r8, #8, #8\n\t" + "EOR r5, r5, r2, ROR #16\n\t" + "UBFX r2, r11, #0, #8\n\t" + "LDR r6, [%[td], r6, LSL #2]\n\t" + "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r8, r8, #0, #8\n\t" + "EOR r6, r6, r7, ROR #24\n\t" + "UBFX r7, r10, #16, #8\n\t" + "EOR r6, r6, lr, ROR #8\n\t" + "LSR lr, r11, #24\n\t" + "EOR r6, r6, r2, ROR #16\n\t" + "UBFX r2, r9, #8, #8\n\t" + "LDR r8, [%[td], r8, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r7, [%[td], r7, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "EOR lr, lr, r8, ROR #24\n\t" + "LDM r3!, {r8, r9, r10, r11}\n\t" + "EOR r7, r7, r2, ROR #8\n\t" + "EOR r7, r7, lr, ROR #24\n\t" + /* XOR in Key Schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "SUBS %[nr], %[nr], #0x1\n\t" + "BNE L_AES_decrypt_block_nr_%=\n\t" + "UBFX r8, r7, #16, #8\n\t" + "LSR r11, r4, #24\n\t" + "UBFX lr, r6, #8, #8\n\t" + "UBFX r2, r5, #0, #8\n\t" + "LDR r8, [%[td], r8, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r9, r4, #16, #8\n\t" + "EOR r8, r8, r11, ROR #24\n\t" + "LSR r11, r5, #24\n\t" + "EOR r8, r8, lr, ROR #8\n\t" + "UBFX lr, r7, #8, #8\n\t" + "EOR r8, r8, r2, ROR #16\n\t" + "UBFX r2, r6, #0, #8\n\t" + "LDR r9, [%[td], r9, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r10, r5, #16, #8\n\t" + "EOR r9, r9, r11, ROR #24\n\t" + "LSR r11, r6, #24\n\t" + "EOR r9, r9, lr, ROR #8\n\t" + "UBFX lr, r4, #8, #8\n\t" + "EOR r9, r9, r2, ROR #16\n\t" + "UBFX r2, r7, #0, #8\n\t" + "LDR r10, [%[td], r10, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "UBFX r4, r4, #0, #8\n\t" + "EOR r10, r10, r11, ROR #24\n\t" + "UBFX r11, r6, #16, #8\n\t" + "EOR r10, r10, lr, ROR #8\n\t" + "LSR lr, r7, #24\n\t" + "EOR r10, r10, r2, ROR #16\n\t" + "UBFX r2, r5, #8, #8\n\t" + "LDR r4, [%[td], r4, LSL #2]\n\t" + "LDR lr, [%[td], lr, LSL #2]\n\t" + "LDR r11, [%[td], r11, LSL #2]\n\t" + "LDR r2, [%[td], r2, LSL #2]\n\t" + "EOR lr, lr, r4, ROR #24\n\t" + "LDM r3!, {r4, r5, r6, r7}\n\t" + "EOR r11, r11, r2, ROR #8\n\t" + "EOR r11, r11, lr, ROR #24\n\t" + /* XOR in Key Schedule */ + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "UBFX r4, r9, #0, #8\n\t" + "UBFX r7, r10, #8, #8\n\t" + "UBFX lr, r11, #16, #8\n\t" + "LSR r2, r8, #24\n\t" + "LDRB r4, [r12, r4]\n\t" + "LDRB r7, [r12, r7]\n\t" + "LDRB lr, [r12, lr]\n\t" + "LDRB r2, [r12, r2]\n\t" + "UBFX r5, r10, #0, #8\n\t" + "EOR r4, r4, r7, LSL #8\n\t" + "UBFX r7, r11, #8, #8\n\t" + "EOR r4, r4, lr, LSL #16\n\t" + "UBFX lr, r8, #16, #8\n\t" + "EOR r4, r4, r2, LSL #24\n\t" + "LSR r2, r9, #24\n\t" + "LDRB r7, [r12, r7]\n\t" + "LDRB r2, [r12, r2]\n\t" + "LDRB r5, [r12, r5]\n\t" + "LDRB lr, [r12, lr]\n\t" + "UBFX r6, r11, #0, #8\n\t" + "EOR r5, r5, r7, LSL #8\n\t" + "UBFX r7, r8, #8, #8\n\t" + "EOR r5, r5, lr, LSL #16\n\t" + "UBFX lr, r9, #16, #8\n\t" + "EOR r5, r5, r2, LSL #24\n\t" + "LSR r2, r10, #24\n\t" + "LDRB r7, [r12, r7]\n\t" + "LDRB r2, [r12, r2]\n\t" + "LDRB r6, [r12, r6]\n\t" + "LDRB lr, [r12, lr]\n\t" + "LSR r11, r11, #24\n\t" + "EOR r6, r6, r7, LSL #8\n\t" + "UBFX r7, r8, #0, #8\n\t" + "EOR r6, r6, lr, LSL #16\n\t" + "UBFX lr, r9, #8, #8\n\t" + "EOR r6, r6, r2, LSL #24\n\t" + "UBFX r2, r10, #16, #8\n\t" + "LDRB r11, [r12, r11]\n\t" + "LDRB lr, [r12, lr]\n\t" + "LDRB r7, [r12, r7]\n\t" + "LDRB r2, [r12, r2]\n\t" + "EOR lr, lr, r11, LSL #16\n\t" + "LDM r3, {r8, r9, r10, r11}\n\t" + "EOR r7, r7, lr, LSL #8\n\t" + "EOR r7, r7, r2, LSL #16\n\t" + /* XOR in Key Schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + : [td] "+r" (td), [nr] "+r" (nr) + : + : "memory", "lr" + ); +} + +static const uint32_t* L_AES_Thumb2_td_ecb = L_AES_Thumb2_td_data; +static const unsigned char L_AES_Thumb2_td4[] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, +}; + +#if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) +void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr); +void AES_ECB_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r5") = (uint32_t*)L_AES_Thumb2_td_ecb; + register unsigned char* L_AES_Thumb2_td4_c asm ("r6") = (unsigned char*)&L_AES_Thumb2_td4; + + __asm__ __volatile__ ( + "MOV r8, r4\n\t" + "MOV lr, %[in]\n\t" + "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" + "MOV r12, %[L_AES_Thumb2_td4]\n\t" + "CMP r8, #0xa\n\t" + "BEQ L_AES_ECB_decrypt_start_block_128_%=\n\t" + "CMP r8, #0xc\n\t" + "BEQ L_AES_ECB_decrypt_start_block_192_%=\n\t" + "\n" + "L_AES_ECB_decrypt_loop_block_256_%=:\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "PUSH {r1, r2, %[ks], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_decrypt_block\n\t" + "POP {r1, r2, %[ks], lr}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_ECB_decrypt_loop_block_256_%=\n\t" + "B L_AES_ECB_decrypt_end_%=\n\t" + "\n" + "L_AES_ECB_decrypt_start_block_192_%=:\n\t" + "\n" + "L_AES_ECB_decrypt_loop_block_192_%=:\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "PUSH {r1, r2, %[ks], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_decrypt_block\n\t" + "POP {r1, r2, %[ks], lr}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_ECB_decrypt_loop_block_192_%=\n\t" + "B L_AES_ECB_decrypt_end_%=\n\t" + "\n" + "L_AES_ECB_decrypt_start_block_128_%=:\n\t" + "\n" + "L_AES_ECB_decrypt_loop_block_128_%=:\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "PUSH {r1, r2, %[ks], lr}\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_decrypt_block\n\t" + "POP {r1, r2, %[ks], lr}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_ECB_decrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_ECB_decrypt_end_%=:\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" + ); + (void)nr; +} + +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ +#ifdef HAVE_AES_CBC +void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* iv); +void AES_CBC_decrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* iv_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* iv asm ("r5") = (unsigned char*)iv_p; + register uint32_t* L_AES_Thumb2_td_ecb_c asm ("r6") = (uint32_t*)L_AES_Thumb2_td_ecb; + register unsigned char* L_AES_Thumb2_td4_c asm ("r7") = (unsigned char*)&L_AES_Thumb2_td4; + + __asm__ __volatile__ ( + "MOV r8, r4\n\t" + "MOV r4, r5\n\t" + "MOV lr, %[in]\n\t" + "MOV r0, %[L_AES_Thumb2_td_ecb]\n\t" + "MOV r12, %[L_AES_Thumb2_td4]\n\t" + "PUSH {%[ks], r4}\n\t" + "CMP r8, #0xa\n\t" + "BEQ L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "CMP r8, #0xc\n\t" + "BEQ L_AES_CBC_decrypt_loop_block_192_%=\n\t" + "\n" + "L_AES_CBC_decrypt_loop_block_256_%=:\n\t" + "PUSH {r1, r2, lr}\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "LDR lr, [sp, #16]\n\t" + "STRD r4, r5, [lr, #16]\n\t" + "STRD r6, r7, [lr, #24]\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_decrypt_block\n\t" + "LDR lr, [sp, #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDM lr, {r8, r9, r10, r11}\n\t" + "POP {r1, r2, lr}\n\t" + "LDR %[ks], [sp]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" + "PUSH {r1, r2, lr}\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "LDR lr, [sp, #16]\n\t" + "STRD r4, r5, [lr]\n\t" + "STRD r6, r7, [lr, #8]\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_decrypt_block\n\t" + "LDR lr, [sp, #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDRD r8, r9, [lr, #16]\n\t" + "LDRD r10, r11, [lr, #24]\n\t" + "POP {r1, r2, lr}\n\t" + "LDR %[ks], [sp]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CBC_decrypt_loop_block_256_%=\n\t" + "B L_AES_CBC_decrypt_end_%=\n\t" + "\n" + "L_AES_CBC_decrypt_loop_block_192_%=:\n\t" + "PUSH {r1, r2, lr}\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "LDR lr, [sp, #16]\n\t" + "STRD r4, r5, [lr, #16]\n\t" + "STRD r6, r7, [lr, #24]\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_decrypt_block\n\t" + "LDR lr, [sp, #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDM lr, {r8, r9, r10, r11}\n\t" + "POP {r1, r2, lr}\n\t" + "LDR %[ks], [sp]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" + "PUSH {r1, r2, lr}\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "LDR lr, [sp, #16]\n\t" + "STRD r4, r5, [lr]\n\t" + "STRD r6, r7, [lr, #8]\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_decrypt_block\n\t" + "LDR lr, [sp, #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDRD r8, r9, [lr, #16]\n\t" + "LDRD r10, r11, [lr, #24]\n\t" + "POP {r1, r2, lr}\n\t" + "LDR %[ks], [sp]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CBC_decrypt_loop_block_192_%=\n\t" + "B L_AES_CBC_decrypt_end_%=\n\t" + "\n" + "L_AES_CBC_decrypt_loop_block_128_%=:\n\t" + "PUSH {r1, r2, lr}\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "LDR lr, [sp, #16]\n\t" + "STRD r4, r5, [lr, #16]\n\t" + "STRD r6, r7, [lr, #24]\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_decrypt_block\n\t" + "LDR lr, [sp, #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDM lr, {r8, r9, r10, r11}\n\t" + "POP {r1, r2, lr}\n\t" + "LDR %[ks], [sp]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BEQ L_AES_CBC_decrypt_end_odd_%=\n\t" + "PUSH {r1, r2, lr}\n\t" + "LDR r4, [lr]\n\t" + "LDR r5, [lr, #4]\n\t" + "LDR r6, [lr, #8]\n\t" + "LDR r7, [lr, #12]\n\t" + "LDR lr, [sp, #16]\n\t" + "STRD r4, r5, [lr]\n\t" + "STRD r6, r7, [lr, #8]\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_decrypt_block\n\t" + "LDR lr, [sp, #16]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDRD r8, r9, [lr, #16]\n\t" + "LDRD r10, r11, [lr, #24]\n\t" + "POP {r1, r2, lr}\n\t" + "LDR %[ks], [sp]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_CBC_decrypt_loop_block_128_%=\n\t" + "B L_AES_CBC_decrypt_end_%=\n\t" + "\n" + "L_AES_CBC_decrypt_end_odd_%=:\n\t" + "LDR r4, [sp, #4]\n\t" + "LDRD r8, r9, [r4, #16]\n\t" + "LDRD r10, r11, [r4, #24]\n\t" + "STRD r8, r9, [r4]\n\t" + "STRD r10, r11, [r4, #8]\n\t" + "\n" + "L_AES_CBC_decrypt_end_%=:\n\t" + "POP {%[ks], r4}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [iv] "+r" (iv), [L_AES_Thumb2_td_ecb] "+r" (L_AES_Thumb2_td_ecb_c), [L_AES_Thumb2_td4] "+r" (L_AES_Thumb2_td4_c) + : + : "memory", "r12", "lr", "r8", "r9", "r10", "r11" + ); + (void)nr; + (void)iv; +} + +#endif /* HAVE_AES_CBC */ +#endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER || HAVE_AES_CBC */ +#endif /* HAVE_AES_DECRYPT */ +#ifdef HAVE_AESGCM +static const uint32_t L_GCM_gmult_len_r[] = { + 0x00000000, 0x1c200000, 0x38400000, 0x24600000, + 0x70800000, 0x6ca00000, 0x48c00000, 0x54e00000, + 0xe1000000, 0xfd200000, 0xd9400000, 0xc5600000, + 0x91800000, 0x8da00000, 0xa9c00000, 0xb5e00000, +}; + +void GCM_gmult_len(unsigned char* x, const unsigned char** m, + const unsigned char* data, unsigned long len); +void GCM_gmult_len(unsigned char* x_p, const unsigned char** m_p, const unsigned char* data_p, unsigned long len_p) +{ + register unsigned char* x asm ("r0") = (unsigned char*)x_p; + register const unsigned char** m asm ("r1") = (const unsigned char**)m_p; + register const unsigned char* data asm ("r2") = (const unsigned char*)data_p; + register unsigned long len asm ("r3") = (unsigned long)len_p; + register uint32_t* L_GCM_gmult_len_r_c asm ("r4") = (uint32_t*)&L_GCM_gmult_len_r; + + __asm__ __volatile__ ( + "MOV lr, %[L_GCM_gmult_len_r]\n\t" + "\n" + "L_GCM_gmult_len_start_block_%=:\n\t" + "PUSH {r3}\n\t" + "LDR r12, [r0, #12]\n\t" + "LDR %[len], [r2, #12]\n\t" + "EOR r12, r12, %[len]\n\t" + "LSR %[len], r12, #24\n\t" + "AND %[len], %[len], #0xf\n\t" + "ADD %[len], %[m], %[len], LSL #4\n\t" + "LDM %[len], {r8, r9, r10, r11}\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #28\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #16\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #20\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #8\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #12\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "AND r4, r12, #0xf\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "LDR r12, [r0, #8]\n\t" + "LDR %[len], [r2, #8]\n\t" + "EOR r12, r12, %[len]\n\t" + "LSR %[len], r12, #24\n\t" + "AND %[len], %[len], #0xf\n\t" + "ADD %[len], %[m], %[len], LSL #4\n\t" + "LDM %[len], {r4, r5, r6, r7}\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #28\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #16\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #20\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #8\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #12\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "AND r4, r12, #0xf\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "LDR r12, [r0, #4]\n\t" + "LDR %[len], [r2, #4]\n\t" + "EOR r12, r12, %[len]\n\t" + "LSR %[len], r12, #24\n\t" + "AND %[len], %[len], #0xf\n\t" + "ADD %[len], %[m], %[len], LSL #4\n\t" + "LDM %[len], {r4, r5, r6, r7}\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #28\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #16\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #20\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #8\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #12\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "AND r4, r12, #0xf\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "LDR r12, [r0]\n\t" + "LDR %[len], [r2]\n\t" + "EOR r12, r12, %[len]\n\t" + "LSR %[len], r12, #24\n\t" + "AND %[len], %[len], #0xf\n\t" + "ADD %[len], %[m], %[len], LSL #4\n\t" + "LDM %[len], {r4, r5, r6, r7}\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #28\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #16\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #20\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #8\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #12\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "AND r4, r12, #0xf\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "LSR r6, r10, #4\n\t" + "AND %[len], r11, #0xf\n\t" + "LSR r11, r11, #4\n\t" + "LSR r4, r12, #4\n\t" + "EOR r11, r11, r10, LSL #28\n\t" + "AND r4, r4, #0xf\n\t" + "LDR %[len], [lr, r3, LSL #2]\n\t" + "ADD r4, %[m], r4, LSL #4\n\t" + "EOR r10, r6, r9, LSL #28\n\t" + "LSR r9, r9, #4\n\t" + "LDM r4, {r4, r5, r6, r7}\n\t" + "EOR r9, r9, r8, LSL #28\n\t" + "EOR r8, %[len], r8, LSR #4\n\t" + "EOR r8, r8, r4\n\t" + "EOR r9, r9, r5\n\t" + "EOR r10, r10, r6\n\t" + "EOR r11, r11, r7\n\t" + "REV r8, r8\n\t" + "REV r9, r9\n\t" + "REV r10, r10\n\t" + "REV r11, r11\n\t" + "STM %[x], {r8, r9, r10, r11}\n\t" + "POP {r3}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD %[data], %[data], #0x10\n\t" + "BNE L_GCM_gmult_len_start_block_%=\n\t" + : [x] "+r" (x), [m] "+r" (m), [data] "+r" (data), [len] "+r" (len), [L_GCM_gmult_len_r] "+r" (L_GCM_gmult_len_r_c) + : + : "memory", "r12", "lr", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); +} + +static const uint32_t* L_AES_Thumb2_te_gcm = L_AES_Thumb2_te_data; +void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, + unsigned long len, const unsigned char* ks, int nr, unsigned char* ctr); +void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned long len_p, const unsigned char* ks_p, int nr_p, unsigned char* ctr_p) +{ + register const unsigned char* in asm ("r0") = (const unsigned char*)in_p; + register unsigned char* out asm ("r1") = (unsigned char*)out_p; + register unsigned long len asm ("r2") = (unsigned long)len_p; + register const unsigned char* ks asm ("r3") = (const unsigned char*)ks_p; + register int nr asm ("r4") = (int)nr_p; + register unsigned char* ctr asm ("r5") = (unsigned char*)ctr_p; + register uint32_t* L_AES_Thumb2_te_gcm_c asm ("r6") = (uint32_t*)L_AES_Thumb2_te_gcm; + + __asm__ __volatile__ ( + "MOV r12, r4\n\t" + "MOV r8, r5\n\t" + "MOV lr, %[in]\n\t" + "MOV r0, %[L_AES_Thumb2_te_gcm]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM r8, {r4, r5, r6, r7}\n\t" + "PUSH {%[ks], r8}\n\t" + "CMP r12, #0xa\n\t" + "BEQ L_AES_GCM_encrypt_start_block_128_%=\n\t" + "CMP r12, #0xc\n\t" + "BEQ L_AES_GCM_encrypt_start_block_192_%=\n\t" + "\n" + "L_AES_GCM_encrypt_loop_block_256_%=:\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDR lr, [sp, #16]\n\t" + "ADD r7, r7, #0x1\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "STR r7, [lr, #12]\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x6\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "LDR r8, [sp, #4]\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_GCM_encrypt_loop_block_256_%=\n\t" + "B L_AES_GCM_encrypt_end_%=\n\t" + "\n" + "L_AES_GCM_encrypt_start_block_192_%=:\n\t" + "\n" + "L_AES_GCM_encrypt_loop_block_192_%=:\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDR lr, [sp, #16]\n\t" + "ADD r7, r7, #0x1\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "STR r7, [lr, #12]\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x5\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "LDR r8, [sp, #4]\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_GCM_encrypt_loop_block_192_%=\n\t" + "B L_AES_GCM_encrypt_end_%=\n\t" + "\n" + "L_AES_GCM_encrypt_start_block_128_%=:\n\t" + "\n" + "L_AES_GCM_encrypt_loop_block_128_%=:\n\t" + "PUSH {r1, %[len], lr}\n\t" + "LDR lr, [sp, #16]\n\t" + "ADD r7, r7, #0x1\n\t" + "LDM %[ks]!, {r8, r9, r10, r11}\n\t" + "STR r7, [lr, #12]\n\t" + /* Round: 0 - XOR in key schedule */ + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "MOV r1, #0x4\n\t" + "BL AES_encrypt_block\n\t" + "POP {r1, %[len], lr}\n\t" + "LDR %[ks], [sp]\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "LDR r8, [lr]\n\t" + "LDR r9, [lr, #4]\n\t" + "LDR r10, [lr, #8]\n\t" + "LDR r11, [lr, #12]\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r6, r6, r10\n\t" + "EOR r7, r7, r11\n\t" + "LDR r8, [sp, #4]\n\t" + "STR r4, [%[out]]\n\t" + "STR r5, [%[out], #4]\n\t" + "STR r6, [%[out], #8]\n\t" + "STR r7, [%[out], #12]\n\t" + "LDM r8, {r4, r5, r6, r7}\n\t" + "SUBS %[len], %[len], #0x10\n\t" + "ADD lr, lr, #0x10\n\t" + "ADD %[out], %[out], #0x10\n\t" + "BNE L_AES_GCM_encrypt_loop_block_128_%=\n\t" + "\n" + "L_AES_GCM_encrypt_end_%=:\n\t" + "POP {%[ks], r8}\n\t" + "REV r4, r4\n\t" + "REV r5, r5\n\t" + "REV r6, r6\n\t" + "REV r7, r7\n\t" + "STM r8, {r4, r5, r6, r7}\n\t" + : [in] "+r" (in), [out] "+r" (out), [len] "+r" (len), [ks] "+r" (ks), [nr] "+r" (nr), [ctr] "+r" (ctr), [L_AES_Thumb2_te_gcm] "+r" (L_AES_Thumb2_te_gcm_c) + : + : "memory", "r12", "lr", "r7", "r8", "r9", "r10", "r11" + ); + (void)nr; + (void)ctr; +} + +#endif /* HAVE_AESGCM */ +#endif /* !NO_AES */ +#endif /* !__aarch64__ && __thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519.S b/wolfcrypt/src/port/arm/thumb2-curve25519.S index c6d1f9d2e..65911ca40 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519.S +++ b/wolfcrypt/src/port/arm/thumb2-curve25519.S @@ -280,13 +280,14 @@ fe_1: # Set one MOV r2, #0x1 MOV r3, #0x0 - STRD r2, r3, [r0] + STM r0!, {r2, r3} MOV r2, #0x0 - STRD r2, r3, [r0, #8] - STRD r2, r3, [r0, #16] - STRD r2, r3, [r0, #24] + STM r0!, {r2, r3} + STM r0!, {r2, r3} + STM r0!, {r2, r3} + SUB r0, r0, #0x20 BX lr - # Cycle Count = 19 + # Cycle Count = 20 .size fe_1,.-fe_1 .text .align 4 @@ -296,12 +297,13 @@ fe_0: # Set zero MOV r2, #0x0 MOV r3, #0x0 - STRD r2, r3, [r0] - STRD r2, r3, [r0, #8] - STRD r2, r3, [r0, #16] - STRD r2, r3, [r0, #24] + STM r0!, {r2, r3} + STM r0!, {r2, r3} + STM r0!, {r2, r3} + STM r0!, {r2, r3} + SUB r0, r0, #0x20 BX lr - # Cycle Count = 18 + # Cycle Count = 19 .size fe_0,.-fe_0 .text .align 4 @@ -1751,6 +1753,7 @@ fe_sq: POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} # Cycle Count = 24 .size fe_sq,.-fe_sq +#ifdef HAVE_CURVE25519 .text .align 4 .globl fe_mul121666 @@ -1803,26 +1806,31 @@ curve25519: # Set one MOV r10, #0x1 MOV r11, #0x0 - STRD r10, r11, [r0] + STM r0!, {r10, r11} MOV r10, #0x0 - STRD r10, r11, [r0, #8] - STRD r10, r11, [r0, #16] - STRD r10, r11, [r0, #24] + STM r0!, {r10, r11} + STM r0!, {r10, r11} + STM r0!, {r10, r11} + SUB r0, r0, #0x20 + MOV r3, sp # Set zero MOV r10, #0x0 MOV r11, #0x0 - STRD r10, r11, [sp] - STRD r10, r11, [sp, #8] - STRD r10, r11, [sp, #16] - STRD r10, r11, [sp, #24] + STM r3!, {r10, r11} + STM r3!, {r10, r11} + STM r3!, {r10, r11} + STM r3!, {r10, r11} + SUB r3, r3, #0x20 + ADD r3, sp, #0x20 # Set one MOV r10, #0x1 MOV r11, #0x0 - STRD r10, r11, [sp, #32] + STM r3!, {r10, r11} MOV r10, #0x0 - STRD r10, r11, [sp, #40] - STRD r10, r11, [sp, #48] - STRD r10, r11, [sp, #56] + STM r3!, {r10, r11} + STM r3!, {r10, r11} + STM r3!, {r10, r11} + SUB r3, r3, #0x20 ADD r3, sp, #0x40 # Copy LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} @@ -1845,8 +1853,10 @@ L_curve25519_bits: LDR r0, [sp, #160] # Conditional Swap RSB r1, r1, #0x0 - LDRD r4, r5, [r0] - LDRD r6, r7, [sp, #64] + MOV r3, r0 + ADD r12, sp, #0x40 + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1855,10 +1865,10 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [r0] - STRD r6, r7, [sp, #64] - LDRD r4, r5, [r0, #8] - LDRD r6, r7, [sp, #72] + STM r3!, {r4, r5} + STM r12!, {r6, r7} + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1867,10 +1877,10 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [r0, #8] - STRD r6, r7, [sp, #72] - LDRD r4, r5, [r0, #16] - LDRD r6, r7, [sp, #80] + STM r3!, {r4, r5} + STM r12!, {r6, r7} + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1879,10 +1889,10 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [r0, #16] - STRD r6, r7, [sp, #80] - LDRD r4, r5, [r0, #24] - LDRD r6, r7, [sp, #88] + STM r3!, {r4, r5} + STM r12!, {r6, r7} + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1891,13 +1901,15 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [r0, #24] - STRD r6, r7, [sp, #88] + STM r3!, {r4, r5} + STM r12!, {r6, r7} LDR r1, [sp, #172] # Conditional Swap RSB r1, r1, #0x0 - LDRD r4, r5, [sp] - LDRD r6, r7, [sp, #32] + MOV r3, sp + ADD r12, sp, #0x20 + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1906,10 +1918,10 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [sp] - STRD r6, r7, [sp, #32] - LDRD r4, r5, [sp, #8] - LDRD r6, r7, [sp, #40] + STM r3!, {r4, r5} + STM r12!, {r6, r7} + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1918,10 +1930,10 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [sp, #8] - STRD r6, r7, [sp, #40] - LDRD r4, r5, [sp, #16] - LDRD r6, r7, [sp, #48] + STM r3!, {r4, r5} + STM r12!, {r6, r7} + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1930,10 +1942,10 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [sp, #16] - STRD r6, r7, [sp, #48] - LDRD r4, r5, [sp, #24] - LDRD r6, r7, [sp, #56] + STM r3!, {r4, r5} + STM r12!, {r6, r7} + LDM r3, {r4, r5} + LDM r12, {r6, r7} EOR r8, r4, r6 EOR r9, r5, r7 AND r8, r8, r1 @@ -1942,8 +1954,8 @@ L_curve25519_bits: EOR r5, r5, r9 EOR r6, r6, r8 EOR r7, r7, r9 - STRD r4, r5, [sp, #24] - STRD r6, r7, [sp, #56] + STM r3!, {r4, r5} + STM r12!, {r6, r7} LDR r1, [sp, #184] STR r1, [sp, #172] MOV r3, sp @@ -2165,7 +2177,7 @@ L_curve25519_inv_8: MOV r0, #0x0 ADD sp, sp, #0xbc POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 684 + # Cycle Count = 693 .size curve25519,.-curve25519 #else .text @@ -2188,26 +2200,31 @@ curve25519: # Set one MOV r10, #0x1 MOV r11, #0x0 - STRD r10, r11, [r0] + STM r0!, {r10, r11} MOV r10, #0x0 - STRD r10, r11, [r0, #8] - STRD r10, r11, [r0, #16] - STRD r10, r11, [r0, #24] + STM r0!, {r10, r11} + STM r0!, {r10, r11} + STM r0!, {r10, r11} + SUB r0, r0, #0x20 + MOV r3, sp # Set zero MOV r10, #0x0 MOV r11, #0x0 - STRD r10, r11, [sp] - STRD r10, r11, [sp, #8] - STRD r10, r11, [sp, #16] - STRD r10, r11, [sp, #24] + STM r3!, {r10, r11} + STM r3!, {r10, r11} + STM r3!, {r10, r11} + STM r3!, {r10, r11} + SUB r3, r3, #0x20 + ADD r3, sp, #0x20 # Set one MOV r10, #0x1 MOV r11, #0x0 - STRD r10, r11, [sp, #32] + STM r3!, {r10, r11} MOV r10, #0x0 - STRD r10, r11, [sp, #40] - STRD r10, r11, [sp, #48] - STRD r10, r11, [sp, #56] + STM r3!, {r10, r11} + STM r3!, {r10, r11} + STM r3!, {r10, r11} + SUB r3, r3, #0x20 ADD r3, sp, #0x40 # Copy LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11} @@ -2470,9 +2487,10 @@ L_curve25519_inv_8: MOV r0, #0x0 ADD sp, sp, #0xc0 POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} - # Cycle Count = 595 + # Cycle Count = 600 .size curve25519,.-curve25519 #endif /* WC_NO_CACHE_RESISTANT */ +#endif /* HAVE_CURVE25519 */ #ifdef HAVE_ED25519 .text .align 4 diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index e28885cb6..531137ba7 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -48,7 +48,7 @@ #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) -void fe_init(void) +void fe_init() { __asm__ __volatile__ ( "\n\t" @@ -59,7 +59,7 @@ void fe_init(void) } void fe_add_sub_op(void); -void fe_add_sub_op(void) +void fe_add_sub_op() { __asm__ __volatile__ ( /* Add-Sub */ @@ -156,7 +156,7 @@ void fe_add_sub_op(void) } void fe_sub_op(void); -void fe_sub_op(void) +void fe_sub_op() { __asm__ __volatile__ ( /* Sub */ @@ -190,18 +190,22 @@ void fe_sub_op(void) ); } -void fe_sub(fe r, const fe a, const fe b) +void fe_sub(fe r_p, const fe a_p, const fe b_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + register const sword32* b asm ("r2") = (const sword32*)b_p; + __asm__ __volatile__ ( "BL fe_sub_op\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } void fe_add_op(void); -void fe_add_op(void) +void fe_add_op() { __asm__ __volatile__ ( /* Add */ @@ -235,31 +239,41 @@ void fe_add_op(void) ); } -void fe_add(fe r, const fe a, const fe b) +void fe_add(fe r_p, const fe a_p, const fe b_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + register const sword32* b asm ("r2") = (const sword32*)b_p; + __asm__ __volatile__ ( "BL fe_add_op\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } #ifdef HAVE_ED25519 -void fe_frombytes(fe out, const unsigned char* in) +void fe_frombytes(fe out_p, const unsigned char* in_p) { + register sword32* out asm ("r0") = (sword32*)out_p; + register const unsigned char* in asm ("r1") = (const unsigned char*)in_p; + __asm__ __volatile__ ( "LDM %[in], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "BFC r9, #31, #1\n\t" "STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - : [out] "+l" (out), [in] "+l" (in) + : [out] "+r" (out), [in] "+r" (in) : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } -void fe_tobytes(unsigned char* out, const fe n) +void fe_tobytes(unsigned char* out_p, const fe n_p) { + register unsigned char* out asm ("r0") = (unsigned char*)out_p; + register const sword32* n asm ("r1") = (const sword32*)n_p; + __asm__ __volatile__ ( "LDM %[n], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADDS r10, r2, #0x13\n\t" @@ -282,47 +296,56 @@ void fe_tobytes(unsigned char* out, const fe n) "ADC r9, r9, #0x0\n\t" "BFC r9, #31, #1\n\t" "STM %[out], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - : [out] "+l" (out), [n] "+l" (n) + : [out] "+r" (out), [n] "+r" (n) : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); } -void fe_1(fe n) +void fe_1(fe n_p) { + register sword32* n asm ("r0") = (sword32*)n_p; + __asm__ __volatile__ ( /* Set one */ "MOV r2, #0x1\n\t" "MOV r3, #0x0\n\t" - "STRD r2, r3, [%[n]]\n\t" + "STM %[n]!, {r2, r3}\n\t" "MOV r2, #0x0\n\t" - "STRD r2, r3, [%[n], #8]\n\t" - "STRD r2, r3, [%[n], #16]\n\t" - "STRD r2, r3, [%[n], #24]\n\t" - : [n] "+l" (n) + "STM %[n]!, {r2, r3}\n\t" + "STM %[n]!, {r2, r3}\n\t" + "STM %[n]!, {r2, r3}\n\t" + "SUB %[n], %[n], #0x20\n\t" + : [n] "+r" (n) : : "memory", "r2", "r3" ); } -void fe_0(fe n) +void fe_0(fe n_p) { + register sword32* n asm ("r0") = (sword32*)n_p; + __asm__ __volatile__ ( /* Set zero */ "MOV r2, #0x0\n\t" "MOV r3, #0x0\n\t" - "STRD r2, r3, [%[n]]\n\t" - "STRD r2, r3, [%[n], #8]\n\t" - "STRD r2, r3, [%[n], #16]\n\t" - "STRD r2, r3, [%[n], #24]\n\t" - : [n] "+l" (n) + "STM %[n]!, {r2, r3}\n\t" + "STM %[n]!, {r2, r3}\n\t" + "STM %[n]!, {r2, r3}\n\t" + "STM %[n]!, {r2, r3}\n\t" + "SUB %[n], %[n], #0x20\n\t" + : [n] "+r" (n) : : "memory", "r2", "r3" ); } -void fe_copy(fe r, const fe a) +void fe_copy(fe r_p, const fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + __asm__ __volatile__ ( /* Copy */ "LDRD r2, r3, [%[a]]\n\t" @@ -333,14 +356,17 @@ void fe_copy(fe r, const fe a) "LDRD r4, r5, [%[a], #24]\n\t" "STRD r2, r3, [%[r], #16]\n\t" "STRD r4, r5, [%[r], #24]\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r4", "r5" ); } -void fe_neg(fe r, const fe a) +void fe_neg(fe r_p, const fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + __asm__ __volatile__ ( "MVN r7, #0x0\n\t" "MVN r6, #0x12\n\t" @@ -357,14 +383,16 @@ void fe_neg(fe r, const fe a) "SBCS r4, r7, r4\n\t" "SBC r5, r6, r5\n\t" "STM %[r]!, {r2, r3, r4, r5}\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); } -int fe_isnonzero(const fe a) +int fe_isnonzero(const fe a_p) { + register const sword32* a asm ("r0") = (const sword32*)a_p; + __asm__ __volatile__ ( "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADDS r1, r2, #0x13\n\t" @@ -393,15 +421,17 @@ int fe_isnonzero(const fe a) "ORR r4, r4, r6\n\t" "ORR r2, r2, r8\n\t" "ORR %[a], r2, r4\n\t" - : [a] "+l" (a) + : [a] "+r" (a) : : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); return (uint32_t)(size_t)a; } -int fe_isnegative(const fe a) +int fe_isnegative(const fe a_p) { + register const sword32* a asm ("r0") = (const sword32*)a_p; + __asm__ __volatile__ ( "LDM %[a]!, {r2, r3, r4, r5}\n\t" "ADDS r1, r2, #0x13\n\t" @@ -417,7 +447,7 @@ int fe_isnegative(const fe a) "AND %[a], r2, #0x1\n\t" "LSR r1, r1, #31\n\t" "EOR %[a], %[a], r1\n\t" - : [a] "+l" (a) + : [a] "+r" (a) : : "memory", "r1", "r2", "r3", "r4", "r5" ); @@ -425,8 +455,12 @@ int fe_isnegative(const fe a) } #ifndef WC_NO_CACHE_RESISTANT -void fe_cmov_table(fe* r, fe* base, signed char b) +void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) { + register fe* r asm ("r0") = (fe*)r_p; + register fe* base asm ("r1") = (fe*)base_p; + register signed char b asm ("r2") = (signed char)b_p; + __asm__ __volatile__ ( "SXTB %[b], %[b]\n\t" "SBFX r3, %[b], #7, #1\n\t" @@ -1391,15 +1425,19 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "STRD r4, r5, [%[r], #24]\n\t" "STRD r6, r7, [%[r], #56]\n\t" "STRD r8, r9, [%[r], #88]\n\t" - : [r] "+l" (r), [base] "+l" (base), [b] "+l" (b) + : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r3", "r10", "r11", "r12", "lr" ); } #else -void fe_cmov_table(fe* r, fe* base, signed char b) +void fe_cmov_table(fe* r_p, fe* base_p, signed char b_p) { + register fe* r asm ("r0") = (fe*)r_p; + register fe* base asm ("r1") = (fe*)base_p; + register signed char b asm ("r2") = (signed char)b_p; + __asm__ __volatile__ ( "SXTB %[b], %[b]\n\t" "SBFX r3, %[b], #7, #1\n\t" @@ -1493,7 +1531,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b) "AND r7, r7, lr\n\t" "STM %[r]!, {r4, r5, r6, r7}\n\t" "SUB %[base], %[base], %[b]\n\t" - : [r] "+l" (r), [base] "+l" (base), [b] "+l" (b) + : [r] "+r" (r), [base] "+r" (base), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); @@ -1502,7 +1540,7 @@ void fe_cmov_table(fe* r, fe* base, signed char b) #endif /* WC_NO_CACHE_RESISTANT */ #endif /* HAVE_ED25519 */ void fe_mul_op(void); -void fe_mul_op(void) +void fe_mul_op() { __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" @@ -1634,18 +1672,22 @@ void fe_mul_op(void) ); } -void fe_mul(fe r, const fe a, const fe b) +void fe_mul(fe r_p, const fe a_p, const fe b_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + register const sword32* b asm ("r2") = (const sword32*)b_p; + __asm__ __volatile__ ( "BL fe_mul_op\n\t" - : [r] "+l" (r), [a] "+l" (a), [b] "+l" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } void fe_sq_op(void); -void fe_sq_op(void) +void fe_sq_op() { __asm__ __volatile__ ( "SUB sp, sp, #0x20\n\t" @@ -1763,18 +1805,25 @@ void fe_sq_op(void) ); } -void fe_sq(fe r, const fe a) +void fe_sq(fe r_p, const fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + __asm__ __volatile__ ( "BL fe_sq_op\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void fe_mul121666(fe r, fe a) +#ifdef HAVE_CURVE25519 +void fe_mul121666(fe r_p, fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register sword32* a asm ("r1") = (sword32*)a_p; + __asm__ __volatile__ ( /* Multiply by 121666 */ "LDM %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" @@ -1803,15 +1852,19 @@ void fe_mul121666(fe r, fe a) "ADCS r8, r8, #0x0\n\t" "ADC r9, r9, #0x0\n\t" "STM %[r], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } #ifndef WC_NO_CACHE_RESISTANT -int curve25519(byte* r, const byte* n, const byte* a) +int curve25519(byte* r_p, const byte* n_p, const byte* a_p) { + register byte* r asm ("r0") = (byte*)r_p; + register const byte* n asm ("r1") = (const byte*)n_p; + register const byte* a asm ("r2") = (const byte*)a_p; + __asm__ __volatile__ ( "SUB sp, sp, #0xbc\n\t" "STR %[r], [sp, #160]\n\t" @@ -1822,26 +1875,31 @@ int curve25519(byte* r, const byte* n, const byte* a) /* Set one */ "MOV r10, #0x1\n\t" "MOV r11, #0x0\n\t" - "STRD r10, r11, [%[r]]\n\t" + "STM %[r]!, {r10, r11}\n\t" "MOV r10, #0x0\n\t" - "STRD r10, r11, [%[r], #8]\n\t" - "STRD r10, r11, [%[r], #16]\n\t" - "STRD r10, r11, [%[r], #24]\n\t" + "STM %[r]!, {r10, r11}\n\t" + "STM %[r]!, {r10, r11}\n\t" + "STM %[r]!, {r10, r11}\n\t" + "SUB %[r], %[r], #0x20\n\t" + "MOV r3, sp\n\t" /* Set zero */ "MOV r10, #0x0\n\t" "MOV r11, #0x0\n\t" - "STRD r10, r11, [sp]\n\t" - "STRD r10, r11, [sp, #8]\n\t" - "STRD r10, r11, [sp, #16]\n\t" - "STRD r10, r11, [sp, #24]\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "SUB r3, r3, #0x20\n\t" + "ADD r3, sp, #0x20\n\t" /* Set one */ "MOV r10, #0x1\n\t" "MOV r11, #0x0\n\t" - "STRD r10, r11, [sp, #32]\n\t" + "STM r3!, {r10, r11}\n\t" "MOV r10, #0x0\n\t" - "STRD r10, r11, [sp, #40]\n\t" - "STRD r10, r11, [sp, #48]\n\t" - "STRD r10, r11, [sp, #56]\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "SUB r3, r3, #0x20\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -1866,8 +1924,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "LDR %[r], [sp, #160]\n\t" /* Conditional Swap */ "RSB %[n], %[n], #0x0\n\t" - "LDRD r4, r5, [%[r]]\n\t" - "LDRD r6, r7, [sp, #64]\n\t" + "MOV r3, r0\n\t" + "ADD r12, sp, #0x40\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1876,10 +1936,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [%[r]]\n\t" - "STRD r6, r7, [sp, #64]\n\t" - "LDRD r4, r5, [%[r], #8]\n\t" - "LDRD r6, r7, [sp, #72]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1888,10 +1948,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [%[r], #8]\n\t" - "STRD r6, r7, [sp, #72]\n\t" - "LDRD r4, r5, [%[r], #16]\n\t" - "LDRD r6, r7, [sp, #80]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1900,10 +1960,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [%[r], #16]\n\t" - "STRD r6, r7, [sp, #80]\n\t" - "LDRD r4, r5, [%[r], #24]\n\t" - "LDRD r6, r7, [sp, #88]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1912,13 +1972,15 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [%[r], #24]\n\t" - "STRD r6, r7, [sp, #88]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" "LDR %[n], [sp, #172]\n\t" /* Conditional Swap */ "RSB %[n], %[n], #0x0\n\t" - "LDRD r4, r5, [sp]\n\t" - "LDRD r6, r7, [sp, #32]\n\t" + "MOV r3, sp\n\t" + "ADD r12, sp, #0x20\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1927,10 +1989,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [sp]\n\t" - "STRD r6, r7, [sp, #32]\n\t" - "LDRD r4, r5, [sp, #8]\n\t" - "LDRD r6, r7, [sp, #40]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1939,10 +2001,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [sp, #8]\n\t" - "STRD r6, r7, [sp, #40]\n\t" - "LDRD r4, r5, [sp, #16]\n\t" - "LDRD r6, r7, [sp, #48]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1951,10 +2013,10 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [sp, #16]\n\t" - "STRD r6, r7, [sp, #48]\n\t" - "LDRD r4, r5, [sp, #24]\n\t" - "LDRD r6, r7, [sp, #56]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" + "LDM r3, {r4, r5}\n\t" + "LDM r12, {r6, r7}\n\t" "EOR r8, r4, r6\n\t" "EOR r9, r5, r7\n\t" "AND r8, r8, %[n]\n\t" @@ -1963,8 +2025,8 @@ int curve25519(byte* r, const byte* n, const byte* a) "EOR r5, r5, r9\n\t" "EOR r6, r6, r8\n\t" "EOR r7, r7, r9\n\t" - "STRD r4, r5, [sp, #24]\n\t" - "STRD r6, r7, [sp, #56]\n\t" + "STM r3!, {r4, r5}\n\t" + "STM r12!, {r6, r7}\n\t" "LDR %[n], [sp, #184]\n\t" "STR %[n], [sp, #172]\n\t" "MOV r3, sp\n\t" @@ -2193,7 +2255,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "BL fe_mul_op\n\t" "MOV r0, #0x0\n\t" "ADD sp, sp, #0xbc\n\t" - : [r] "+l" (r), [n] "+l" (n), [a] "+l" (a) + : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" ); @@ -2201,8 +2263,12 @@ int curve25519(byte* r, const byte* n, const byte* a) } #else -int curve25519(byte* r, const byte* n, const byte* a) +int curve25519(byte* r_p, const byte* n_p, const byte* a_p) { + register byte* r asm ("r0") = (byte*)r_p; + register const byte* n asm ("r1") = (const byte*)n_p; + register const byte* a asm ("r2") = (const byte*)a_p; + __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" "STR %[r], [sp, #176]\n\t" @@ -2218,26 +2284,31 @@ int curve25519(byte* r, const byte* n, const byte* a) /* Set one */ "MOV r10, #0x1\n\t" "MOV r11, #0x0\n\t" - "STRD r10, r11, [%[r]]\n\t" + "STM %[r]!, {r10, r11}\n\t" "MOV r10, #0x0\n\t" - "STRD r10, r11, [%[r], #8]\n\t" - "STRD r10, r11, [%[r], #16]\n\t" - "STRD r10, r11, [%[r], #24]\n\t" + "STM %[r]!, {r10, r11}\n\t" + "STM %[r]!, {r10, r11}\n\t" + "STM %[r]!, {r10, r11}\n\t" + "SUB %[r], %[r], #0x20\n\t" + "MOV r3, sp\n\t" /* Set zero */ "MOV r10, #0x0\n\t" "MOV r11, #0x0\n\t" - "STRD r10, r11, [sp]\n\t" - "STRD r10, r11, [sp, #8]\n\t" - "STRD r10, r11, [sp, #16]\n\t" - "STRD r10, r11, [sp, #24]\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "SUB r3, r3, #0x20\n\t" + "ADD r3, sp, #0x20\n\t" /* Set one */ "MOV r10, #0x1\n\t" "MOV r11, #0x0\n\t" - "STRD r10, r11, [sp, #32]\n\t" + "STM r3!, {r10, r11}\n\t" "MOV r10, #0x0\n\t" - "STRD r10, r11, [sp, #40]\n\t" - "STRD r10, r11, [sp, #48]\n\t" - "STRD r10, r11, [sp, #56]\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "STM r3!, {r10, r11}\n\t" + "SUB r3, r3, #0x20\n\t" "ADD r3, sp, #0x40\n\t" /* Copy */ "LDM r2, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" @@ -2508,7 +2579,7 @@ int curve25519(byte* r, const byte* n, const byte* a) "STM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "MOV r0, #0x0\n\t" "ADD sp, sp, #0xc0\n\t" - : [r] "+l" (r), [n] "+l" (n), [a] "+l" (a) + : [r] "+r" (r), [n] "+r" (n), [a] "+r" (a) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12", "lr" ); @@ -2516,9 +2587,13 @@ int curve25519(byte* r, const byte* n, const byte* a) } #endif /* WC_NO_CACHE_RESISTANT */ +#endif /* HAVE_CURVE25519 */ #ifdef HAVE_ED25519 -void fe_invert(fe r, const fe a) +void fe_invert(fe r_p, const fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x88\n\t" /* Invert */ @@ -2678,14 +2753,17 @@ void fe_invert(fe r, const fe a) "LDR %[a], [sp, #132]\n\t" "LDR %[r], [sp, #128]\n\t" "ADD sp, sp, #0x88\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -void fe_sq2(fe r, const fe a) +void fe_sq2(fe r_p, const fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x24\n\t" "STRD r0, r1, [sp, #28]\n\t" @@ -2831,14 +2909,17 @@ void fe_sq2(fe r, const fe a) "STM r12, {r0, r1, r2, r3, r4, r5, r6, r7}\n\t" "MOV r0, r12\n\t" "MOV r1, lr\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "lr" ); } -void fe_pow22523(fe r, const fe a) +void fe_pow22523(fe r_p, const fe a_p) { + register sword32* r asm ("r0") = (sword32*)r_p; + register const sword32* a asm ("r1") = (const sword32*)a_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x68\n\t" /* pow22523 */ @@ -2998,14 +3079,17 @@ void fe_pow22523(fe r, const fe a) "LDR %[a], [sp, #100]\n\t" "LDR %[r], [sp, #96]\n\t" "ADD sp, sp, #0x68\n\t" - : [r] "+l" (r), [a] "+l" (a) + : [r] "+r" (r), [a] "+r" (a) : : "memory", "lr", "r12", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } -void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) +void ge_p1p1_to_p2(ge_p2 * r_p, const ge_p1p1 * p_p) { + register ge_p2 * r asm ("r0") = (ge_p2 *)r_p; + register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" "STR %[r], [sp]\n\t" @@ -3025,14 +3109,17 @@ void ge_p1p1_to_p2(ge_p2 * r, const ge_p1p1 * p) "ADD r0, r0, #0x40\n\t" "BL fe_mul_op\n\t" "ADD sp, sp, #0x8\n\t" - : [r] "+l" (r), [p] "+l" (p) + : [r] "+r" (r), [p] "+r" (p) : : "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } -void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) +void ge_p1p1_to_p3(ge_p3 * r_p, const ge_p1p1 * p_p) { + register ge_p3 * r asm ("r0") = (ge_p3 *)r_p; + register const ge_p1p1 * p asm ("r1") = (const ge_p1p1 *)p_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" "STR %[r], [sp]\n\t" @@ -3057,14 +3144,17 @@ void ge_p1p1_to_p3(ge_p3 * r, const ge_p1p1 * p) "ADD r0, r0, #0x60\n\t" "BL fe_mul_op\n\t" "ADD sp, sp, #0x8\n\t" - : [r] "+l" (r), [p] "+l" (p) + : [r] "+r" (r), [p] "+r" (p) : : "memory", "lr", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } -void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) +void ge_p2_dbl(ge_p1p1 * r_p, const ge_p2 * p_p) { + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p2 * p asm ("r1") = (const ge_p2 *)p_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x8\n\t" "STR %[r], [sp]\n\t" @@ -3101,14 +3191,18 @@ void ge_p2_dbl(ge_p1p1 * r, const ge_p2 * p) "MOV r1, r0\n\t" "BL fe_sub_op\n\t" "ADD sp, sp, #0x8\n\t" - : [r] "+l" (r), [p] "+l" (p) + : [r] "+r" (r), [p] "+r" (p) : : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) +void ge_madd(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) { + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; + __asm__ __volatile__ ( "SUB sp, sp, #0xc\n\t" "STR %[r], [sp]\n\t" @@ -3179,14 +3273,18 @@ void ge_madd(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "ADD r1, r0, #0x20\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0xc\n\t" - : [r] "+l" (r), [p] "+l" (p), [q] "+l" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) +void ge_msub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_precomp * q_p) { + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_precomp * q asm ("r2") = (const ge_precomp *)q_p; + __asm__ __volatile__ ( "SUB sp, sp, #0xc\n\t" "STR %[r], [sp]\n\t" @@ -3258,14 +3356,18 @@ void ge_msub(ge_p1p1 * r, const ge_p3 * p, const ge_precomp * q) "ADD r0, r0, #0x20\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0xc\n\t" - : [r] "+l" (r), [p] "+l" (p), [q] "+l" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) +void ge_add(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) { + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" "STR %[r], [sp]\n\t" @@ -3337,14 +3439,18 @@ void ge_add(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "ADD r0, r0, #0x20\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0x2c\n\t" - : [r] "+l" (r), [p] "+l" (p), [q] "+l" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) +void ge_sub(ge_p1p1 * r_p, const ge_p3 * p_p, const ge_cached* q_p) { + register ge_p1p1 * r asm ("r0") = (ge_p1p1 *)r_p; + register const ge_p3 * p asm ("r1") = (const ge_p3 *)p_p; + register const ge_cached* q asm ("r2") = (const ge_cached*)q_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x2c\n\t" "STR %[r], [sp]\n\t" @@ -3416,14 +3522,16 @@ void ge_sub(ge_p1p1 * r, const ge_p3 * p, const ge_cached* q) "ADD r0, r0, #0x40\n\t" "BL fe_add_sub_op\n\t" "ADD sp, sp, #0x2c\n\t" - : [r] "+l" (r), [p] "+l" (p), [q] "+l" (q) + : [r] "+r" (r), [p] "+r" (p), [q] "+r" (q) : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void sc_reduce(byte* s) +void sc_reduce(byte* s_p) { + register byte* s asm ("r0") = (byte*)s_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x34\n\t" /* Load bits 252-511 */ @@ -3694,14 +3802,19 @@ void sc_reduce(byte* s) /* Store result */ "STM %[s], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "ADD sp, sp, #0x34\n\t" - : [s] "+l" (s) + : [s] "+r" (s) : : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } -void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) +void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) { + register byte* s asm ("r0") = (byte*)s_p; + register const byte* a asm ("r1") = (const byte*)a_p; + register const byte* b asm ("r2") = (const byte*)b_p; + register const byte* c asm ("r3") = (const byte*)c_p; + __asm__ __volatile__ ( "SUB sp, sp, #0x50\n\t" "ADD lr, sp, #0x44\n\t" @@ -4096,7 +4209,7 @@ void sc_muladd(byte* s, const byte* a, const byte* b, const byte* c) /* Store result */ "STM %[s], {%[b], %[c], r4, r5, r6, r7, r8, r9}\n\t" "ADD sp, sp, #0x50\n\t" - : [s] "+l" (s), [a] "+l" (a), [b] "+l" (b), [c] "+l" (c) + : [s] "+r" (s), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) : : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); diff --git a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c index 3eb6ec355..f7b396a82 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha256-asm_c.c @@ -42,75 +42,32 @@ #ifdef WOLFSSL_ARMASM_NO_NEON static const uint32_t L_SHA256_transform_len_k[] = { - 0x428a2f98, - 0x71374491, - 0xb5c0fbcf, - 0xe9b5dba5, - 0x3956c25b, - 0x59f111f1, - 0x923f82a4, - 0xab1c5ed5, - 0xd807aa98, - 0x12835b01, - 0x243185be, - 0x550c7dc3, - 0x72be5d74, - 0x80deb1fe, - 0x9bdc06a7, - 0xc19bf174, - 0xe49b69c1, - 0xefbe4786, - 0xfc19dc6, - 0x240ca1cc, - 0x2de92c6f, - 0x4a7484aa, - 0x5cb0a9dc, - 0x76f988da, - 0x983e5152, - 0xa831c66d, - 0xb00327c8, - 0xbf597fc7, - 0xc6e00bf3, - 0xd5a79147, - 0x6ca6351, - 0x14292967, - 0x27b70a85, - 0x2e1b2138, - 0x4d2c6dfc, - 0x53380d13, - 0x650a7354, - 0x766a0abb, - 0x81c2c92e, - 0x92722c85, - 0xa2bfe8a1, - 0xa81a664b, - 0xc24b8b70, - 0xc76c51a3, - 0xd192e819, - 0xd6990624, - 0xf40e3585, - 0x106aa070, - 0x19a4c116, - 0x1e376c08, - 0x2748774c, - 0x34b0bcb5, - 0x391c0cb3, - 0x4ed8aa4a, - 0x5b9cca4f, - 0x682e6ff3, - 0x748f82ee, - 0x78a5636f, - 0x84c87814, - 0x8cc70208, - 0x90befffa, - 0xa4506ceb, - 0xbef9a3f7, - 0xc67178f2, + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, }; void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len); -void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) +void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) { + register wc_Sha256* sha256 asm ("r0") = (wc_Sha256*)sha256_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint32_t* L_SHA256_transform_len_k_c asm ("r3") = (uint32_t*)&L_SHA256_transform_len_k; + __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" "MOV r3, %[L_SHA256_transform_len_k]\n\t" @@ -1463,9 +1420,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256, const byte* data, word32 len) "ADD %[data], %[data], #0x40\n\t" "BNE L_SHA256_transform_len_begin_%=\n\t" "ADD sp, sp, #0xc0\n\t" - : [sha256] "+l" (sha256), [data] "+l" (data), [len] "+l" (len) - : [L_SHA256_transform_len_k] "r" (L_SHA256_transform_len_k) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : [sha256] "+r" (sha256), [data] "+r" (data), [len] "+r" (len), [L_SHA256_transform_len_k] "+r" (L_SHA256_transform_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c index 9ec7e190d..9a0cd79c6 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm_c.c @@ -42,91 +42,56 @@ #ifdef WOLFSSL_ARMASM_NO_NEON static const uint64_t L_SHA512_transform_len_k[] = { - 0x428a2f98d728ae22UL, - 0x7137449123ef65cdUL, - 0xb5c0fbcfec4d3b2fUL, - 0xe9b5dba58189dbbcUL, - 0x3956c25bf348b538UL, - 0x59f111f1b605d019UL, - 0x923f82a4af194f9bUL, - 0xab1c5ed5da6d8118UL, - 0xd807aa98a3030242UL, - 0x12835b0145706fbeUL, - 0x243185be4ee4b28cUL, - 0x550c7dc3d5ffb4e2UL, - 0x72be5d74f27b896fUL, - 0x80deb1fe3b1696b1UL, - 0x9bdc06a725c71235UL, - 0xc19bf174cf692694UL, - 0xe49b69c19ef14ad2UL, - 0xefbe4786384f25e3UL, - 0xfc19dc68b8cd5b5UL, - 0x240ca1cc77ac9c65UL, - 0x2de92c6f592b0275UL, - 0x4a7484aa6ea6e483UL, - 0x5cb0a9dcbd41fbd4UL, - 0x76f988da831153b5UL, - 0x983e5152ee66dfabUL, - 0xa831c66d2db43210UL, - 0xb00327c898fb213fUL, - 0xbf597fc7beef0ee4UL, - 0xc6e00bf33da88fc2UL, - 0xd5a79147930aa725UL, - 0x6ca6351e003826fUL, - 0x142929670a0e6e70UL, - 0x27b70a8546d22ffcUL, - 0x2e1b21385c26c926UL, - 0x4d2c6dfc5ac42aedUL, - 0x53380d139d95b3dfUL, - 0x650a73548baf63deUL, - 0x766a0abb3c77b2a8UL, - 0x81c2c92e47edaee6UL, - 0x92722c851482353bUL, - 0xa2bfe8a14cf10364UL, - 0xa81a664bbc423001UL, - 0xc24b8b70d0f89791UL, - 0xc76c51a30654be30UL, - 0xd192e819d6ef5218UL, - 0xd69906245565a910UL, - 0xf40e35855771202aUL, - 0x106aa07032bbd1b8UL, - 0x19a4c116b8d2d0c8UL, - 0x1e376c085141ab53UL, - 0x2748774cdf8eeb99UL, - 0x34b0bcb5e19b48a8UL, - 0x391c0cb3c5c95a63UL, - 0x4ed8aa4ae3418acbUL, - 0x5b9cca4f7763e373UL, - 0x682e6ff3d6b2b8a3UL, - 0x748f82ee5defb2fcUL, - 0x78a5636f43172f60UL, - 0x84c87814a1f0ab72UL, - 0x8cc702081a6439ecUL, - 0x90befffa23631e28UL, - 0xa4506cebde82bde9UL, - 0xbef9a3f7b2c67915UL, - 0xc67178f2e372532bUL, - 0xca273eceea26619cUL, - 0xd186b8c721c0c207UL, - 0xeada7dd6cde0eb1eUL, - 0xf57d4f7fee6ed178UL, - 0x6f067aa72176fbaUL, - 0xa637dc5a2c898a6UL, - 0x113f9804bef90daeUL, - 0x1b710b35131c471bUL, - 0x28db77f523047d84UL, - 0x32caab7b40c72493UL, - 0x3c9ebe0a15c9bebcUL, - 0x431d67c49c100d4cUL, - 0x4cc5d4becb3e42b6UL, - 0x597f299cfc657e2aUL, - 0x5fcb6fab3ad6faecUL, - 0x6c44198c4a475817UL, + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, }; void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len); -void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) +void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) { + register wc_Sha512* sha512 asm ("r0") = (wc_Sha512*)sha512_p; + register const byte* data asm ("r1") = (const byte*)data_p; + register word32 len asm ("r2") = (word32)len_p; + register uint64_t* L_SHA512_transform_len_k_c asm ("r3") = (uint64_t*)&L_SHA512_transform_len_k; + __asm__ __volatile__ ( "SUB sp, sp, #0xc0\n\t" "MOV r3, %[L_SHA512_transform_len_k]\n\t" @@ -3578,9 +3543,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len) "BNE L_SHA512_transform_len_begin_%=\n\t" "EOR r0, r0, r0\n\t" "ADD sp, sp, #0xc0\n\t" - : [sha512] "+l" (sha512), [data] "+l" (data), [len] "+l" (len) - : [L_SHA512_transform_len_k] "r" (L_SHA512_transform_len_k) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len), [L_SHA512_transform_len_k] "+r" (L_SHA512_transform_len_k_c) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index fb868f237..60fbdb4c4 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -56,7 +56,7 @@ typedef struct Gcm { } Gcm; WOLFSSL_LOCAL void GenerateM0(Gcm* gcm); -#if defined(WOLFSSL_ARMASM) && !defined(WOLFSSL_ARMASM_NO_NEON) +#ifdef WOLFSSL_ARMASM WOLFSSL_LOCAL void GMULT(byte* X, byte* Y); #endif WOLFSSL_LOCAL void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, diff --git a/wolfssl/wolfcrypt/ge_operations.h b/wolfssl/wolfcrypt/ge_operations.h index 1cf87e462..c8a8b8a94 100644 --- a/wolfssl/wolfcrypt/ge_operations.h +++ b/wolfssl/wolfcrypt/ge_operations.h @@ -115,7 +115,7 @@ typedef struct { void ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p); void ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p); void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p); -#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)r, (ge_p2 *)p) +#define ge_p3_dbl(r, p) ge_p2_dbl((ge_p1p1 *)(r), (ge_p2 *)(p)) void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q); void ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q);