From ec59acbae0ef1451d88a32503b28c035d47f9274 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 24 Sep 2020 11:03:50 +1000 Subject: [PATCH] SP Cortex-M4 assembly: Fix for GCC compile b.n not allowed with GCC. Fix sizeof to use tmp_arr not tmp which is now a pointer. --- wolfcrypt/src/sp_cortexm.c | 538 +++++++++++++++++++++++++++++++++++-- 1 file changed, 521 insertions(+), 17 deletions(-) diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 794fc7de0..163e22c40 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -2099,7 +2099,11 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -2137,7 +2141,11 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -2195,11 +2203,19 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -2212,7 +2228,11 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -2221,7 +2241,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Square a and put result in r. (r = a * a) @@ -2257,7 +2277,11 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -2269,7 +2293,11 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -2285,13 +2313,25 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -2305,7 +2345,11 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -2317,7 +2361,11 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -2375,7 +2423,11 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -2413,7 +2465,11 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -2470,11 +2526,19 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -2485,7 +2549,11 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "mov r9, r8\n\t" "mov r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -2494,7 +2562,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Square a and put result in r. (r = a * a) @@ -2530,7 +2598,11 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -2542,7 +2614,11 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -2557,13 +2633,25 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r6, #128\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -2575,7 +2663,11 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r9, r8\n\t" "mov r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -2585,7 +2677,11 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -2647,7 +2743,11 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -2698,7 +2798,11 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -2757,7 +2861,11 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+30] += m[30] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -2790,7 +2898,11 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #120\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -2861,7 +2973,11 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -2955,7 +3071,11 @@ SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -2967,7 +3087,7 @@ SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -3370,7 +3490,11 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -3429,7 +3553,11 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+62] += m[62] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -3462,7 +3590,11 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #248\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -3619,7 +3751,11 @@ SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -3631,7 +3767,7 @@ SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -3681,7 +3817,7 @@ static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_dig /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -4201,7 +4337,11 @@ SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -5351,11 +5491,19 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -5366,7 +5514,11 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -5375,7 +5527,7 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Square a and put result in r. (r = a * a) @@ -5410,7 +5562,11 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -5422,7 +5578,11 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -5437,13 +5597,25 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "mov r6, #48\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -5455,7 +5627,11 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -5465,7 +5641,11 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #96\n\t" "add sp, sp, r6\n\t" : @@ -6795,7 +6975,11 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -6833,7 +7017,11 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -6894,11 +7082,19 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -6911,7 +7107,11 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -6920,7 +7120,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Square a and put result in r. (r = a * a) @@ -6958,7 +7158,11 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -6970,7 +7174,11 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -6987,13 +7195,25 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add r6, r6, #128\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -7007,7 +7227,11 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -7019,7 +7243,11 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -7077,7 +7305,11 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -7115,7 +7347,11 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -7172,11 +7408,19 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -7189,7 +7433,11 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #120\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -7198,7 +7446,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Square a and put result in r. (r = a * a) @@ -7235,7 +7483,11 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -7247,7 +7499,11 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -7262,13 +7518,25 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov r6, #192\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -7282,7 +7550,11 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #120\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -7294,7 +7566,11 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #128\n\t" @@ -7357,7 +7633,11 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -7408,7 +7688,11 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -7467,7 +7751,11 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+46] += m[46] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -7500,7 +7788,11 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #184\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -7571,7 +7863,11 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -7665,7 +7961,11 @@ SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -7677,7 +7977,7 @@ SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -8081,7 +8381,11 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -8140,7 +8444,11 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+94] += m[94] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -8173,7 +8481,11 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #376\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -8332,7 +8644,11 @@ SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -8344,7 +8660,7 @@ SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -8394,7 +8710,7 @@ static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_dig /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -8914,7 +9230,11 @@ SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -10987,7 +11307,11 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -11025,7 +11349,11 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -11085,11 +11413,19 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -11102,7 +11438,11 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -11111,7 +11451,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Square a and put result in r. (r = a * a) @@ -11149,7 +11489,11 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -11161,7 +11505,11 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -11177,13 +11525,25 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -11197,7 +11557,11 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -11209,7 +11573,11 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #4\n\t" "lsl r6, r6, #8\n\t" "add sp, sp, r6\n\t" @@ -11269,7 +11637,11 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -11322,7 +11694,11 @@ SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -11381,7 +11757,11 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+126] += m[126] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -11414,7 +11794,11 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #504\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -11573,7 +11957,11 @@ SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -11585,7 +11973,7 @@ SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -11635,7 +12023,7 @@ static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_di /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -12156,7 +12544,11 @@ SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -15291,7 +15683,11 @@ SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -15334,7 +15730,11 @@ SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -15424,7 +15824,11 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "add %[a], %[a], #4\n\t" "mov r6, #8\n\t" "cmp r9, r6\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "sub %[a], %[a], #32\n\t" "mov r3, r1\n\t" "sub r1, r1, #1\n\t" @@ -15513,7 +15917,11 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+6] += m[6] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -15546,7 +15954,11 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* /* Next word in a */ "sub r10, r10, #24\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -15628,7 +16040,11 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -16269,7 +16685,11 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6" @@ -20165,7 +20585,11 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -20247,7 +20671,11 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -20336,7 +20764,7 @@ static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division. @@ -22597,11 +23025,19 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" "cmp %[a], r14\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "mov r6, r9\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r12\n\t" "mov r8, r9\n\t" @@ -22612,7 +23048,11 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r], r8]\n\t" "mov %[a], r10\n\t" "mov %[b], r11\n\t" @@ -22621,7 +23061,7 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, tmp_arr, sizeof(tmp_arr)); } /* Conditionally subtract b from a using the mask m. @@ -22652,7 +23092,11 @@ SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -22713,7 +23157,11 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, "adc r4, r4, #0\n\t" "str r5, [r10], #4\n\t" "cmp r10, r14\n\t" +#ifdef __GNUC__ + "blt 2b\n\t" +#else "blt.n 2b\n\t" +#endif /* __GNUC__ */ /* a[i+10] += m[10] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" @@ -22746,7 +23194,11 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, /* Next word in a */ "sub r10, r10, #40\n\t" "cmp r10, r11\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) @@ -22805,7 +23257,11 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" +#ifdef __GNUC__ + "beq 4f\n\t" +#else "beq.n 4f\n\t" +#endif /* __GNUC__ */ /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r8, [r2]\n\t" @@ -22817,7 +23273,11 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ +#ifdef __GNUC__ + "bal 5f\n\t" +#else "bal.n 5f\n\t" +#endif /* __GNUC__ */ "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" @@ -22832,13 +23292,25 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "mov r6, #48\n\t" "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "beq 3f\n\t" +#else "beq.n 3f\n\t" +#endif /* __GNUC__ */ "cmp %[a], r2\n\t" +#ifdef __GNUC__ + "bgt 3f\n\t" +#else "bgt.n 3f\n\t" +#endif /* __GNUC__ */ "mov r8, r9\n\t" "add r8, r8, r10\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "ble 2b\n\t" +#else "ble.n 2b\n\t" +#endif /* __GNUC__ */ "\n3:\n\t" "mov %[r], r11\n\t" "mov r8, r9\n\t" @@ -22850,7 +23322,11 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "mov r9, r8\n\t" "mov r6, #88\n\t" "cmp r8, r6\n\t" +#ifdef __GNUC__ + "ble 1b\n\t" +#else "ble.n 1b\n\t" +#endif /* __GNUC__ */ "mov %[a], r10\n\t" "str r3, [%[r], r8]\n\t" "mov %[r], r12\n\t" @@ -22860,7 +23336,11 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" "subs r3, r3, #4\n\t" +#ifdef __GNUC__ + "bge 4b\n\t" +#else "bge.n 4b\n\t" +#endif /* __GNUC__ */ "mov r6, #96\n\t" "add sp, sp, r6\n\t" : @@ -23029,7 +23509,11 @@ SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b) "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" +#ifdef __GNUC__ + "bge 1b\n\t" +#else "bge.n 1b\n\t" +#endif /* __GNUC__ */ : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) : "r3", "r4", "r5", "r6", "r8" @@ -23115,7 +23599,11 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r8" @@ -23251,7 +23739,11 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, "add %[b], %[b], #4\n\t" "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6" @@ -23359,7 +23851,11 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, c "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r8", "r9" @@ -27054,7 +27550,11 @@ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" "cmp %[a], r8\n\t" +#ifdef __GNUC__ + "bne 1b\n\t" +#else "bne.n 1b\n\t" +#endif /* __GNUC__ */ : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6", "r8" @@ -27146,7 +27646,11 @@ SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, "mov r3, r4\n\t" "mov r4, r5\n\t" "cmp %[a], r9\n\t" +#ifdef __GNUC__ + "blt 1b\n\t" +#else "blt.n 1b\n\t" +#endif /* __GNUC__ */ "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) @@ -27239,7 +27743,7 @@ static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m) /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * - * a Nmber to be divided. + * a Number to be divided. * d Number to divide with. * m Multiplier result. * r Remainder from the division.