diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 752e99546..42909b897 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -219,95 +219,243 @@ static void sp_2048_to_bin(sp_digit* r, byte* a) SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[8 * 2]; + sp_digit t[8 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #32\n\t" - "add r6, r9\n\t" + "movs r6, #32\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #28\n\t" + "\n" + "L_sp_2048_mul_8_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #28\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_2048_mul_8_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_2048_mul_8_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_2048_mul_8_mul_%=\n\t" + "\n" + "L_sp_2048_mul_8_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #56\n\t" + "movs r6, #56\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_2048_mul_8_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -318,142 +466,420 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #64\n\t" + "movs r6, #0x40\n\t" +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #28\n\t" + "\n" + "L_sp_2048_sqr_8_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #28\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_2048_sqr_8_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_2048_sqr_8_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_2048_sqr_8_done_sqr_%=\n\t" + "\n" + "L_sp_2048_sqr_8_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #32\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_2048_sqr_8_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #32\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_2048_sqr_8_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_2048_sqr_8_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_sqr_8_mul_%=\n\t" + "\n" + "L_sp_2048_sqr_8_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #56\n\t" + "movs r6, #56\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_2048_sqr_8_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #60\n\t" - "\n4:\n\t" + "movs r3, #60\n\t" + "\n" + "L_sp_2048_sqr_8_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #64\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_2048_sqr_8_store_%=\n\t" + "movs r6, #0x40\n\t" + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -467,134 +893,234 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -606,81 +1132,147 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -771,153 +1363,283 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) (void)sp_2048_add_16(r + 16, r + 16, z2); } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -929,145 +1651,275 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -1162,286 +2014,560 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) (void)sp_2048_add_32(r + 32, r + 32, z2); } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -1453,281 +2579,563 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "mvns r6, r6\n\t" +#else + "mvn r6, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -1833,34 +3241,70 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "mov r4, #1\n\t" - "lsl r4, #8\n\t" - "sub r7, #1\n\t" - "add r6, r4\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0xff\n\t" +#ifdef __clang__ + "adds r4, r4, #1\n\t" +#else + "add r4, r4, #1\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, r4\n\t" +#else + "add r6, r6, r4\n\t" +#endif + "\n" + "L_sp_2048_add_64_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_2048_add_64_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -1873,34 +3317,67 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "mov r5, #1\n\t" - "lsl r5, #8\n\t" - "add r7, r5\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" + "movs r5, #0xff\n\t" +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r7, r7, r5\n\t" +#else + "add r7, r7, r5\n\t" +#endif + "\n" + "L_sp_2048_sub_in_place_64_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_2048_sub_in_place_64_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -1914,98 +3391,253 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[64 * 2]; + sp_digit t[64 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, r9\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #252\n\t" + "\n" + "L_sp_2048_mul_64_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #0xfc\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_2048_mul_64_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_2048_mul_64_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_2048_mul_64_mul_%=\n\t" + "\n" + "L_sp_2048_mul_64_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0xf9\n\t" +#else + "add r6, r6, #0xf9\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_2048_mul_64_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -2016,149 +3648,445 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #2\n\t" + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" +#endif +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #252\n\t" + "\n" + "L_sp_2048_sqr_64_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #0xfc\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_2048_sqr_64_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_2048_sqr_64_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_2048_sqr_64_done_sqr_%=\n\t" + "\n" + "L_sp_2048_sqr_64_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_2048_sqr_64_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_2048_sqr_64_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_2048_sqr_64_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_sqr_64_mul_%=\n\t" + "\n" + "L_sp_2048_sqr_64_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0xf9\n\t" +#else + "add r6, r6, #0xf9\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_2048_sqr_64_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #1\n\t" - "lsl r3, r3, #8\n\t" - "add r3, #252\n\t" - "\n4:\n\t" + "movs r3, #0xff\n\t" +#ifdef __clang__ + "adds r3, r3, #0xfd\n\t" +#else + "add r3, r3, #0xfd\n\t" +#endif + "\n" + "L_sp_2048_sqr_64_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #2\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_2048_sqr_64_store_%=\n\t" + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -2192,32 +4120,64 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "add r6, #128\n\t" - "sub r7, #1\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #0x80\n\t" +#else + "add r6, r6, #0x80\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif + "\n" + "L_sp_2048_add_32_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_2048_add_32_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -2230,32 +4190,61 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, #128\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #0x80\n\t" +#else + "add r7, r7, #0x80\n\t" +#endif + "\n" + "L_sp_2048_sub_in_place_32_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_2048_sub_in_place_32_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -2269,95 +4258,243 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[32 * 2]; + sp_digit t[32 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #128\n\t" - "add r6, r9\n\t" + "movs r6, #0x80\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #124\n\t" + "\n" + "L_sp_2048_mul_32_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #0x7c\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_2048_mul_32_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_2048_mul_32_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_2048_mul_32_mul_%=\n\t" + "\n" + "L_sp_2048_mul_32_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #248\n\t" + "movs r6, #0xf8\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_2048_mul_32_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -2368,144 +4505,430 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #124\n\t" + "\n" + "L_sp_2048_sqr_32_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #0x7c\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_2048_sqr_32_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_2048_sqr_32_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_2048_sqr_32_done_sqr_%=\n\t" + "\n" + "L_sp_2048_sqr_32_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #128\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_2048_sqr_32_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #0x80\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_2048_sqr_32_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_2048_sqr_32_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_sqr_32_mul_%=\n\t" + "\n" + "L_sp_2048_sqr_32_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #248\n\t" + "movs r6, #0xf8\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_2048_sqr_32_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #252\n\t" - "\n4:\n\t" + "movs r3, #0xfc\n\t" + "\n" + "L_sp_2048_sqr_32_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_2048_sqr_32_store_%=\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -2543,60 +4966,189 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, %[a]\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_2048_mul_d_64_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_2048_mul_d_64_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -2627,30 +5179,50 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #128\n\t" + "movs r4, #0\n\t" + "movs r5, #0x80\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_2048_cond_sub_32_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_2048_cond_sub_32_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Reduce the number back to 2048 bits using Montgomery reduction. @@ -2663,136 +5235,372 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_2048_mont_reduce_32_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #124\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #0x7c\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_2048_mont_reduce_32_word_%=\n\t" "# a[i+31] += m[31] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[31] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[31] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #128\n\t" + "movs r4, #0x80\n\t" "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_2048_mont_reduce_32_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); } @@ -2836,59 +5644,184 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #128\n\t" - "add r6, %[a]\n\t" + "movs r6, #0x80\n\t" +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_2048_mul_d_32_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_2048_mul_d_32_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -2905,135 +5838,512 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_2048_word_32_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_2048_word_32_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* Compare a with b in constant time. @@ -3045,38 +6355,93 @@ SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, */ SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #124\n\t" - "1:\n\t" +#endif + "movs r6, #0x7c\n\t" + "\n" + "L_sp_2048_cmp_32_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_2048_cmp_32_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -3458,31 +6823,55 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #1\n\t" - "lsl r5, r5, #8\n\t" + "movs r4, #0\n\t" + "movs r5, #0xff\n\t" +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_2048_cond_sub_64_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_2048_cond_sub_64_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Reduce the number back to 2048 bits using Montgomery reduction. @@ -3495,137 +6884,377 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_2048_mont_reduce_64_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #252\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #0xfc\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_2048_mont_reduce_64_word_%=\n\t" "# a[i+63] += m[63] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[63] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[63] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" + "movs r4, #0xff\n\t" +#ifdef __clang__ + "adds r4, r4, #1\n\t" +#else + "add r4, r4, #1\n\t" +#endif "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_2048_mont_reduce_64_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); } @@ -3671,135 +7300,512 @@ static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_2048_word_64_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_2048_word_64_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* AND m into each word of a and store in r. @@ -3841,38 +7847,93 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) */ SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #252\n\t" - "1:\n\t" +#endif + "movs r6, #0xfc\n\t" + "\n" + "L_sp_2048_cmp_64_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_2048_cmp_64_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -4419,35 +8480,59 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #128\n\t" + "movs r4, #0\n\t" + "movs r5, #0x80\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_2048_cond_add_32_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, #1\n\t" - "add r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, #1\n\t" +#else + "sub r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r4\n\t" +#else + "add r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" +#endif + "movs r4, #0\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_2048_cond_add_32_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* RSA private key operation. @@ -4758,406 +8843,1468 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_2048 -static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n) +/* Lefy shift a by n bits into r. (r = a << n) + * + * r A single precision integer. + * a A single precision integer. + * n Integer representing number of bits to shift. + */ +static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "mov r6, #31\n\t" - "sub r6, r6, %[n]\n\t" - "add %[a], %[a], #192\n\t" - "add %[r], %[r], #192\n\t" - "ldr r3, [%[a], #60]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #56]\n\t" - "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" - "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" - "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" - "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" - "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #4]\n\t" - "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" - "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" + "movs r7, #31\n\t" +#ifdef __clang__ + "subs r7, r7, %[n]\n\t" +#else + "sub r7, r7, %[n]\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0xc0\n\t" +#else + "add %[a], %[a], #0xc0\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0xc0\n\t" +#else + "add %[r], %[r], #0xc0\n\t" +#endif "ldr r4, [%[a], #60]\n\t" - "str r3, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r5, r4, #1\n\t" +#else "lsr r5, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, r7\n\t" +#else + "lsr r5, r5, r7\n\t" +#endif "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #48]\n\t" "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #36]\n\t" "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #24]\n\t" "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #12]\n\t" "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #56]\n\t" "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #52]\n\t" "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #44]\n\t" "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #40]\n\t" "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #28]\n\t" "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #20]\n\t" "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #16]\n\t" "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #8]\n\t" "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #4]\n\t" "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "str r3, [%[r]]\n\t" - "str r4, [%[r], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r5, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #52]\n\t" + "str r5, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #40]\n\t" + "str r5, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #28]\n\t" + "str r5, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #16]\n\t" + "str r5, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #4]\n\t" + "str r5, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a]]\n\t" + "str r4, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #56]\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #44]\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #32]\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #20]\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #8]\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "str r3, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -5569,95 +10716,243 @@ static void sp_3072_to_bin(sp_digit* r, byte* a) SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[12 * 2]; + sp_digit t[12 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #48\n\t" - "add r6, r9\n\t" + "movs r6, #48\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #44\n\t" + "\n" + "L_sp_3072_mul_12_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #44\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_3072_mul_12_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_3072_mul_12_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_3072_mul_12_mul_%=\n\t" + "\n" + "L_sp_3072_mul_12_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #88\n\t" + "movs r6, #0x58\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_3072_mul_12_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -5668,142 +10963,420 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #96\n\t" + "movs r6, #0x60\n\t" +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #44\n\t" + "\n" + "L_sp_3072_sqr_12_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #44\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_3072_sqr_12_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_3072_sqr_12_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_3072_sqr_12_done_sqr_%=\n\t" + "\n" + "L_sp_3072_sqr_12_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #48\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_3072_sqr_12_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #48\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_3072_sqr_12_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_3072_sqr_12_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_sqr_12_mul_%=\n\t" + "\n" + "L_sp_3072_sqr_12_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #88\n\t" + "movs r6, #0x58\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_3072_sqr_12_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #92\n\t" - "\n4:\n\t" + "movs r3, #0x5c\n\t" + "\n" + "L_sp_3072_sqr_12_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #96\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_3072_sqr_12_store_%=\n\t" + "movs r6, #0x60\n\t" + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -5817,182 +11390,330 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -6004,113 +11725,211 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a, SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -6205,222 +12024,432 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) (void)sp_3072_add_24(r + 24, r + 24, z2); } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -6432,217 +12461,435 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "mvns r6, r6\n\t" +#else + "mvn r6, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -6737,419 +12984,837 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) (void)sp_3072_add_48(r + 48, r + 48, z2); } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -7161,415 +13826,845 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "mvns r6, r6\n\t" +#else + "mvn r6, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -7675,35 +14770,70 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "mov r4, #1\n\t" - "lsl r4, #8\n\t" - "add r4, #128\n\t" - "sub r7, #1\n\t" - "add r6, r4\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0xff\n\t" +#ifdef __clang__ + "adds r4, r4, #0x81\n\t" +#else + "add r4, r4, #0x81\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, r4\n\t" +#else + "add r6, r6, r4\n\t" +#endif + "\n" + "L_sp_3072_add_96_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_3072_add_96_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -7716,35 +14846,67 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "mov r5, #1\n\t" - "lsl r5, #8\n\t" - "add r5, #128\n\t" - "add r7, r5\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" + "movs r5, #0xff\n\t" +#ifdef __clang__ + "adds r5, r5, #0x81\n\t" +#else + "add r5, r5, #0x81\n\t" +#endif +#ifdef __clang__ + "adds r7, r7, r5\n\t" +#else + "add r7, r7, r5\n\t" +#endif + "\n" + "L_sp_3072_sub_in_place_96_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_3072_sub_in_place_96_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -7758,101 +14920,263 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[96 * 2]; + sp_digit t[96 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, r9\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x81\n\t" +#else + "add r6, r6, #0x81\n\t" +#endif + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #124\n\t" + "\n" + "L_sp_3072_mul_96_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x7d\n\t" +#else + "add r6, r6, #0x7d\n\t" +#endif "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_3072_mul_96_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_3072_mul_96_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_3072_mul_96_mul_%=\n\t" + "\n" + "L_sp_3072_mul_96_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #2\n\t" + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, #0xf8\n\t" +#else + "add r6, r6, #0xf8\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_3072_mul_96_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -7863,152 +15187,460 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #3\n\t" + "movs r6, #3\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" +#endif +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #124\n\t" + "\n" + "L_sp_3072_sqr_96_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x7d\n\t" +#else + "add r6, r6, #0x7d\n\t" +#endif "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_3072_sqr_96_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_3072_sqr_96_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_3072_sqr_96_done_sqr_%=\n\t" + "\n" + "L_sp_3072_sqr_96_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_3072_sqr_96_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x81\n\t" +#else + "add r6, r6, #0x81\n\t" +#endif + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_3072_sqr_96_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_3072_sqr_96_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_sqr_96_mul_%=\n\t" + "\n" + "L_sp_3072_sqr_96_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #2\n\t" + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, #0xf8\n\t" +#else + "add r6, r6, #0xf8\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_3072_sqr_96_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #2\n\t" + "movs r3, #2\n\t" +#ifdef __clang__ + "lsls r3, r3, #8\n\t" +#else "lsl r3, r3, #8\n\t" - "add r3, #252\n\t" - "\n4:\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #0xfc\n\t" +#else + "add r3, r3, #0xfc\n\t" +#endif + "\n" + "L_sp_3072_sqr_96_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #3\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_3072_sqr_96_store_%=\n\t" + "movs r6, #3\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -8042,32 +15674,64 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "add r6, #192\n\t" - "sub r7, #1\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #0xc0\n\t" +#else + "add r6, r6, #0xc0\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif + "\n" + "L_sp_3072_add_48_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_3072_add_48_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -8080,32 +15744,61 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, #192\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #0xc0\n\t" +#else + "add r7, r7, #0xc0\n\t" +#endif + "\n" + "L_sp_3072_sub_in_place_48_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_3072_sub_in_place_48_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -8119,97 +15812,248 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[48 * 2]; + sp_digit t[48 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #192\n\t" - "add r6, r9\n\t" + "movs r6, #0xc0\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #188\n\t" + "\n" + "L_sp_3072_mul_48_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #0xbc\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_3072_mul_48_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_3072_mul_48_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_3072_mul_48_mul_%=\n\t" + "\n" + "L_sp_3072_mul_48_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #120\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x79\n\t" +#else + "add r6, r6, #0x79\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_3072_mul_48_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -8220,150 +16064,440 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x81\n\t" +#else + "add r6, r6, #0x81\n\t" +#endif +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #188\n\t" + "\n" + "L_sp_3072_sqr_48_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #0xbc\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_3072_sqr_48_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_3072_sqr_48_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_3072_sqr_48_done_sqr_%=\n\t" + "\n" + "L_sp_3072_sqr_48_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #192\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_3072_sqr_48_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #0xc0\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_3072_sqr_48_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_3072_sqr_48_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_sqr_48_mul_%=\n\t" + "\n" + "L_sp_3072_sqr_48_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #120\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x79\n\t" +#else + "add r6, r6, #0x79\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_3072_sqr_48_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #1\n\t" - "lsl r3, r3, #8\n\t" - "add r3, #124\n\t" - "\n4:\n\t" + "movs r3, #0xff\n\t" +#ifdef __clang__ + "adds r3, r3, #0x7d\n\t" +#else + "add r3, r3, #0x7d\n\t" +#endif + "\n" + "L_sp_3072_sqr_48_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_3072_sqr_48_store_%=\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x81\n\t" +#else + "add r6, r6, #0x81\n\t" +#endif + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -8401,61 +16535,189 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, %[a]\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x81\n\t" +#else + "add r6, r6, #0x81\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_3072_mul_d_96_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_3072_mul_d_96_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -8486,30 +16748,50 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #192\n\t" + "movs r4, #0\n\t" + "movs r5, #0xc0\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_3072_cond_sub_48_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_3072_cond_sub_48_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -8522,136 +16804,372 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_3072_mont_reduce_48_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #188\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #0xbc\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_3072_mont_reduce_48_word_%=\n\t" "# a[i+47] += m[47] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[47] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[47] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #192\n\t" + "movs r4, #0xc0\n\t" "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_3072_mont_reduce_48_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); } @@ -8695,59 +17213,184 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #192\n\t" - "add r6, %[a]\n\t" + "movs r6, #0xc0\n\t" +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_3072_mul_d_48_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_3072_mul_d_48_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -8764,135 +17407,512 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_3072_word_48_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_3072_word_48_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* Compare a with b in constant time. @@ -8904,38 +17924,93 @@ SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, */ SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #188\n\t" - "1:\n\t" +#endif + "movs r6, #0xbc\n\t" + "\n" + "L_sp_3072_cmp_48_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_3072_cmp_48_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -9317,32 +18392,55 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #1\n\t" - "lsl r5, r5, #8\n\t" - "add r5, #128\n\t" + "movs r4, #0\n\t" + "movs r5, #0xff\n\t" +#ifdef __clang__ + "adds r5, r5, #0x81\n\t" +#else + "add r5, r5, #0x81\n\t" +#endif "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_3072_cond_sub_96_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_3072_cond_sub_96_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -9355,140 +18453,382 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_3072_mont_reduce_96_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, #124\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #0xff\n\t" +#ifdef __clang__ + "adds r4, r4, #0x7d\n\t" +#else + "add r4, r4, #0x7d\n\t" +#endif + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_3072_mont_reduce_96_word_%=\n\t" "# a[i+95] += m[95] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[95] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[95] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, #128\n\t" + "movs r4, #0xff\n\t" +#ifdef __clang__ + "adds r4, r4, #0x81\n\t" +#else + "add r4, r4, #0x81\n\t" +#endif "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_3072_mont_reduce_96_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); } @@ -9534,135 +18874,512 @@ static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_3072_word_96_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_3072_word_96_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* AND m into each word of a and store in r. @@ -9704,40 +19421,98 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) */ SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #124\n\t" - "1:\n\t" +#endif + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0x7d\n\t" +#else + "add r6, r6, #0x7d\n\t" +#endif + "\n" + "L_sp_3072_cmp_96_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_3072_cmp_96_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -10284,35 +20059,59 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #192\n\t" + "movs r4, #0\n\t" + "movs r5, #0xc0\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_3072_cond_add_48_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, #1\n\t" - "add r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, #1\n\t" +#else + "sub r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r4\n\t" +#else + "add r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" +#endif + "movs r4, #0\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_3072_cond_add_48_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* RSA private key operation. @@ -10623,604 +20422,2202 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_3072 -static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n) +/* Lefy shift a by n bits into r. (r = a << n) + * + * r A single precision integer. + * a A single precision integer. + * n Integer representing number of bits to shift. + */ +static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "mov r6, #31\n\t" - "sub r6, r6, %[n]\n\t" - "add %[a], %[a], #255\n\t" - "add %[r], %[r], #255\n\t" - "add %[a], %[a], #65\n\t" - "add %[r], %[r], #65\n\t" - "ldr r3, [%[a], #60]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #56]\n\t" - "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" - "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" - "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" - "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" - "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #4]\n\t" - "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" - "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" + "movs r7, #31\n\t" +#ifdef __clang__ + "subs r7, r7, %[n]\n\t" +#else + "sub r7, r7, %[n]\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0xff\n\t" +#else + "add %[a], %[a], #0xff\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0xff\n\t" +#else + "add %[r], %[r], #0xff\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x41\n\t" +#else + "add %[a], %[a], #0x41\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x41\n\t" +#else + "add %[r], %[r], #0x41\n\t" +#endif "ldr r4, [%[a], #60]\n\t" - "str r3, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r5, r4, #1\n\t" +#else "lsr r5, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, r7\n\t" +#else + "lsr r5, r5, r7\n\t" +#endif "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #48]\n\t" "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #36]\n\t" "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #24]\n\t" "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #12]\n\t" "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #56]\n\t" "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #52]\n\t" "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #44]\n\t" "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #40]\n\t" "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #28]\n\t" "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #20]\n\t" "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #16]\n\t" "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #8]\n\t" "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #4]\n\t" "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" - "ldr r2, [%[a], #60]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r5, [%[a], #60]\n\t" "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #56]\n\t" "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #48]\n\t" "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #44]\n\t" "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #36]\n\t" "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #32]\n\t" "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #24]\n\t" "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #20]\n\t" "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #12]\n\t" "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #8]\n\t" "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a]]\n\t" "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif "ldr r4, [%[a], #60]\n\t" "str r3, [%[r], #68]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #48]\n\t" "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #36]\n\t" "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #24]\n\t" "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #12]\n\t" "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "str r4, [%[r]]\n\t" - "str r2, [%[r], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r3, [%[a], #60]\n\t" + "str r5, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #48]\n\t" + "str r5, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #36]\n\t" + "str r5, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #24]\n\t" + "str r5, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #12]\n\t" + "str r5, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r5, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #56]\n\t" + "str r3, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #52]\n\t" + "str r5, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #44]\n\t" + "str r3, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #40]\n\t" + "str r5, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #32]\n\t" + "str r3, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #28]\n\t" + "str r5, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #20]\n\t" + "str r3, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #16]\n\t" + "str r5, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #8]\n\t" + "str r3, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #4]\n\t" + "str r5, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a]]\n\t" + "str r4, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "str r5, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -11623,552 +23020,1114 @@ static void sp_4096_to_bin(sp_digit* r, byte* a) } #ifndef WOLFSSL_SP_SMALL -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" - "ldr r3, [%[a], #0]\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -12180,549 +24139,1127 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mvn r7, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "mvns r6, r6\n\t" +#else + "mvn r6, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "add %[c], r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0x80\n\t" +#else + "add %[a], %[a], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #0x80\n\t" +#else + "add %[b], %[b], #0x80\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0x80\n\t" +#else + "add %[r], %[r], #0x80\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r7" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } /* Multiply a and b into r. (r = a * b) @@ -12798,34 +25335,70 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "mov r4, #2\n\t" - "lsl r4, #8\n\t" - "sub r7, #1\n\t" - "add r6, r4\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #2\n\t" +#ifdef __clang__ + "lsls r4, r4, #8\n\t" +#else + "lsl r4, r4, #8\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, r4\n\t" +#else + "add r6, r6, r4\n\t" +#endif + "\n" + "L_sp_4096_add_128_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_4096_add_128_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -12838,34 +25411,67 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "mov r5, #2\n\t" - "lsl r5, #8\n\t" - "add r7, r5\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" + "movs r5, #2\n\t" +#ifdef __clang__ + "lsls r5, r5, #8\n\t" +#else + "lsl r5, r5, #8\n\t" +#endif +#ifdef __clang__ + "adds r7, r7, r5\n\t" +#else + "add r7, r7, r5\n\t" +#endif + "\n" + "L_sp_4096_sub_in_place_128_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_4096_sub_in_place_128_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -12879,100 +25485,263 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[128 * 2]; + sp_digit t[128 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #2\n\t" + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, r9\n\t" +#endif + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #252\n\t" + "\n" + "L_sp_4096_mul_128_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0xfd\n\t" +#else + "add r6, r6, #0xfd\n\t" +#endif "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_4096_mul_128_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_4096_mul_128_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_4096_mul_128_mul_%=\n\t" + "\n" + "L_sp_4096_mul_128_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #3\n\t" + "movs r6, #3\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, #0xf8\n\t" +#else + "add r6, r6, #0xf8\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_4096_mul_128_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -12983,151 +25752,460 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #4\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" +#endif +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #252\n\t" + "\n" + "L_sp_4096_sqr_128_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0xfd\n\t" +#else + "add r6, r6, #0xfd\n\t" +#endif "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_4096_sqr_128_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_4096_sqr_128_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_4096_sqr_128_done_sqr_%=\n\t" + "\n" + "L_sp_4096_sqr_128_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #2\n\t" + "\n" + "L_sp_4096_sqr_128_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, r9\n\t" +#endif + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_4096_sqr_128_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_4096_sqr_128_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_4096_sqr_128_mul_%=\n\t" + "\n" + "L_sp_4096_sqr_128_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #3\n\t" + "movs r6, #3\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, #0xf8\n\t" +#else + "add r6, r6, #0xf8\n\t" +#endif "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_4096_sqr_128_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #3\n\t" + "movs r3, #3\n\t" +#ifdef __clang__ + "lsls r3, r3, #8\n\t" +#else "lsl r3, r3, #8\n\t" - "add r3, #252\n\t" - "\n4:\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #0xfc\n\t" +#else + "add r3, r3, #0xfc\n\t" +#endif + "\n" + "L_sp_4096_sqr_128_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #4\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_4096_sqr_128_store_%=\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -13163,60 +26241,189 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #2\n\t" + "movs r6, #2\n\t" +#ifdef __clang__ + "lsls r6, r6, #8\n\t" +#else "lsl r6, r6, #8\n\t" - "add r6, %[a]\n\t" +#endif +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_4096_mul_d_128_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_4096_mul_d_128_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -13248,31 +26455,55 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #2\n\t" + "movs r4, #0\n\t" + "movs r5, #2\n\t" +#ifdef __clang__ + "lsls r5, r5, #8\n\t" +#else "lsl r5, r5, #8\n\t" +#endif "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_4096_cond_sub_128_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_4096_cond_sub_128_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Reduce the number back to 4096 bits using Montgomery reduction. @@ -13285,139 +26516,382 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_4096_mont_reduce_128_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #1\n\t" - "lsl r4, r4, #8\n\t" - "add r4, #252\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #0xff\n\t" +#ifdef __clang__ + "adds r4, r4, #0xfd\n\t" +#else + "add r4, r4, #0xfd\n\t" +#endif + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_4096_mont_reduce_128_word_%=\n\t" "# a[i+127] += m[127] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[127] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[127] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #2\n\t" + "movs r4, #2\n\t" +#ifdef __clang__ + "lsls r4, r4, #8\n\t" +#else "lsl r4, r4, #8\n\t" +#endif "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_4096_mont_reduce_128_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); } @@ -13463,135 +26937,512 @@ static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_4096_word_128_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_4096_word_128_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* AND m into each word of a and store in r. @@ -13633,40 +27484,98 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) */ SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add r6, #252\n\t" - "1:\n\t" +#endif + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #0xfd\n\t" +#else + "add r6, r6, #0xfd\n\t" +#endif + "\n" + "L_sp_4096_cmp_128_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_4096_cmp_128_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -14213,36 +28122,64 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #1\n\t" - "lsl r5, r5, #8\n\t" + "movs r4, #0\n\t" + "movs r5, #0xff\n\t" +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_4096_cond_add_64_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, #1\n\t" - "add r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, #1\n\t" +#else + "sub r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r4\n\t" +#else + "add r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" +#endif + "movs r4, #0\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_4096_cond_add_64_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* RSA private key operation. @@ -14553,800 +28490,2926 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, #ifdef WOLFSSL_HAVE_SP_DH #ifdef HAVE_FFDHE_4096 -static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n) +/* Lefy shift a by n bits into r. (r = a << n) + * + * r A single precision integer. + * a A single precision integer. + * n Integer representing number of bits to shift. + */ +static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "mov r6, #31\n\t" - "sub r6, r6, %[n]\n\t" - "add %[a], %[a], #255\n\t" - "add %[r], %[r], #255\n\t" - "add %[a], %[a], #193\n\t" - "add %[r], %[r], #193\n\t" - "ldr r3, [%[a], #60]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #56]\n\t" - "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" - "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" - "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" - "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" - "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #4]\n\t" - "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" - "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" - "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" + "movs r7, #31\n\t" +#ifdef __clang__ + "subs r7, r7, %[n]\n\t" +#else + "sub r7, r7, %[n]\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0xff\n\t" +#else + "add %[a], %[a], #0xff\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0xff\n\t" +#else + "add %[r], %[r], #0xff\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #0xc1\n\t" +#else + "add %[a], %[a], #0xc1\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #0xc1\n\t" +#else + "add %[r], %[r], #0xc1\n\t" +#endif "ldr r4, [%[a], #60]\n\t" - "str r3, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r5, r4, #1\n\t" +#else "lsr r5, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, r7\n\t" +#else + "lsr r5, r5, r7\n\t" +#endif "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #48]\n\t" "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #36]\n\t" "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #24]\n\t" "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #12]\n\t" "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #56]\n\t" "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #52]\n\t" "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #44]\n\t" "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #40]\n\t" "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #28]\n\t" "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #20]\n\t" "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #16]\n\t" "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #8]\n\t" "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #4]\n\t" "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" - "ldr r2, [%[a], #60]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r5, [%[a], #60]\n\t" "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #56]\n\t" "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #48]\n\t" "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #44]\n\t" "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #36]\n\t" "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #32]\n\t" "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #24]\n\t" "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #20]\n\t" "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #12]\n\t" "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #8]\n\t" "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a]]\n\t" "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif "ldr r4, [%[a], #60]\n\t" "str r3, [%[r], #68]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #48]\n\t" "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #36]\n\t" "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #24]\n\t" "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #12]\n\t" "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #56]\n\t" "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #52]\n\t" "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #44]\n\t" "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #40]\n\t" "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #28]\n\t" "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #20]\n\t" "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #16]\n\t" "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #8]\n\t" "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #4]\n\t" "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "sub %[a], %[a], #64\n\t" - "sub %[r], %[r], #64\n\t" - "ldr r2, [%[a], #60]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r5, [%[a], #60]\n\t" "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #56]\n\t" "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #48]\n\t" "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #44]\n\t" "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #36]\n\t" "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #32]\n\t" "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #24]\n\t" "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #20]\n\t" "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #12]\n\t" "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif "ldr r4, [%[a], #8]\n\t" "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" + "str r5, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a]]\n\t" "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "str r2, [%[r]]\n\t" - "str r3, [%[r], #4]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r4, [%[a], #60]\n\t" + "str r3, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #56]\n\t" + "str r5, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #48]\n\t" + "str r3, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #44]\n\t" + "str r5, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #36]\n\t" + "str r3, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #32]\n\t" + "str r5, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #24]\n\t" + "str r3, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #20]\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #12]\n\t" + "str r3, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #8]\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a]]\n\t" + "str r3, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "subs %[a], %[a], #0x40\n\t" +#else + "sub %[a], %[a], #0x40\n\t" +#endif +#ifdef __clang__ + "subs %[r], %[r], #0x40\n\t" +#else + "sub %[r], %[r], #0x40\n\t" +#endif + "ldr r3, [%[a], #60]\n\t" + "str r5, [%[r], #68]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #52]\n\t" + "str r3, [%[r], #60]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #48]\n\t" + "str r5, [%[r], #56]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #40]\n\t" + "str r3, [%[r], #48]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #36]\n\t" + "str r5, [%[r], #44]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #28]\n\t" + "str r3, [%[r], #36]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #24]\n\t" + "str r5, [%[r], #32]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #16]\n\t" + "str r3, [%[r], #24]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a], #12]\n\t" + "str r5, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r5, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" +#ifdef __clang__ + "lsrs r6, r5, #1\n\t" +#else + "lsr r6, r5, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r5, %[n]\n\t" +#else + "lsl r5, r5, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r3, r6\n\t" +#else + "orr r3, r6\n\t" +#endif + "ldr r4, [%[a], #4]\n\t" + "str r3, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, %[n]\n\t" +#else + "lsl r4, r4, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r3, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" +#ifdef __clang__ + "lsrs r6, r3, #1\n\t" +#else + "lsr r6, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, %[n]\n\t" +#else + "lsl r3, r3, %[n]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, r7\n\t" +#else + "lsr r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "str r3, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -15629,95 +31692,243 @@ static const sp_digit p256_b[8] = { SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[8 * 2]; + sp_digit t[8 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #32\n\t" - "add r6, r9\n\t" + "movs r6, #32\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #28\n\t" + "\n" + "L_sp_256_mul_8_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #28\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_256_mul_8_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_256_mul_8_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_256_mul_8_mul_%=\n\t" + "\n" + "L_sp_256_mul_8_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #56\n\t" + "movs r6, #56\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_256_mul_8_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -15728,142 +31939,420 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #64\n\t" + "movs r6, #0x40\n\t" +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #28\n\t" + "\n" + "L_sp_256_sqr_8_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #28\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_256_sqr_8_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_256_sqr_8_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_256_sqr_8_done_sqr_%=\n\t" + "\n" + "L_sp_256_sqr_8_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #32\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_256_sqr_8_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #32\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_256_sqr_8_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_256_sqr_8_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_256_sqr_8_mul_%=\n\t" + "\n" + "L_sp_256_sqr_8_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #56\n\t" + "movs r6, #56\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_256_sqr_8_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #60\n\t" - "\n4:\n\t" + "movs r3, #60\n\t" + "\n" + "L_sp_256_sqr_8_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #64\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_256_sqr_8_store_%=\n\t" + "movs r6, #0x40\n\t" + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -15878,32 +32367,64 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "add r6, #32\n\t" - "sub r7, #1\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #32\n\t" +#else + "add r6, r6, #32\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif + "\n" + "L_sp_256_add_8_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_256_add_8_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -15916,49 +32437,83 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -15972,30 +32527,58 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "add r6, #32\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r6, %[a]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #32\n\t" +#else + "add r6, r6, #32\n\t" +#endif + "\n" + "L_sp_256_sub_8_word_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "sbcs r4, r5\n\t" +#else "sbc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_256_sub_8_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -16008,48 +32591,83 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "movs r3, #0\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" + "ldr r6, [%[b]]\n\t" "ldr r7, [%[b], #4]\n\t" - "sub r4, r6\n\t" +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -16112,14 +32730,14 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* t[5] += t[4] >> 32; t[4] &= 0xffffffff; t[6] += t[5] >> 32; t[5] &= 0xffffffff; t[7] += t[6] >> 32; t[6] &= 0xffffffff; - r[0] = t[0]; - r[1] = t[1]; - r[2] = t[2]; - r[3] = t[3]; - r[4] = t[4]; - r[5] = t[5]; - r[6] = t[6]; - r[7] = t[7]; + r[0] = (sp_digit)t[0]; + r[1] = (sp_digit)t[1]; + r[2] = (sp_digit)t[2]; + r[3] = (sp_digit)t[3]; + r[4] = (sp_digit)t[4]; + r[5] = (sp_digit)t[5]; + r[6] = (sp_digit)t[6]; + r[7] = (sp_digit)t[7]; return MP_OKAY; } @@ -16327,30 +32945,50 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #32\n\t" + "movs r4, #0\n\t" + "movs r5, #32\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_256_cond_sub_8_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_256_cond_sub_8_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Reduce the number back to 256 bits using Montgomery reduction. @@ -16366,116 +33004,265 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, (void)m; __asm__ __volatile__ ( - "mov r2, #0\n\t" - "mov r1, #0\n\t" + "movs r2, #0\n\t" + "movs r1, #0\n\t" "# i = 0\n\t" "mov r8, r2\n\t" - "\n1:\n\t" - "mov r4, #0\n\t" + "\n" + "L_sp_256_mont_reduce_8_mod_%=: \n\t" + "movs r4, #0\n\t" "# mu = a[i] * 1 (mp) = a[i]\n\t" "ldr r3, [%[a]]\n\t" "# a[i+0] += -1 * mu\n\t" - "mov r5, r3\n\t" - "str r4, [%[a], #0]\n\t" + "movs r5, r3\n\t" + "str r4, [%[a]]\n\t" "# a[i+1] += -1 * mu\n\t" "ldr r6, [%[a], #4]\n\t" - "mov r4, r3\n\t" - "sub r5, r3\n\t" + "movs r4, r3\n\t" +#ifdef __clang__ + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r2\n\t" +#else "sbc r4, r2\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r2\n\t" +#else "adc r4, r2\n\t" +#endif "str r5, [%[a], #4]\n\t" "# a[i+2] += -1 * mu\n\t" "ldr r6, [%[a], #8]\n\t" - "mov r5, r3\n\t" - "sub r4, r3\n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "subs r4, r4, r3\n\t" +#else + "sub r4, r4, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r2\n\t" +#else "sbc r5, r2\n\t" - "add r4, r6\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r2\n\t" +#else "adc r5, r2\n\t" +#endif "str r4, [%[a], #8]\n\t" "# a[i+3] += 0 * mu\n\t" "ldr r6, [%[a], #12]\n\t" - "mov r4, #0\n\t" - "add r5, r6\n\t" + "movs r4, #0\n\t" +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r2\n\t" +#else "adc r4, r2\n\t" +#endif "str r5, [%[a], #12]\n\t" "# a[i+4] += 0 * mu\n\t" "ldr r6, [%[a], #16]\n\t" - "mov r5, #0\n\t" - "add r4, r6\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r2\n\t" +#else "adc r5, r2\n\t" +#endif "str r4, [%[a], #16]\n\t" "# a[i+5] += 0 * mu\n\t" "ldr r6, [%[a], #20]\n\t" - "mov r4, #0\n\t" - "add r5, r6\n\t" + "movs r4, #0\n\t" +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r2\n\t" +#else "adc r4, r2\n\t" +#endif "str r5, [%[a], #20]\n\t" "# a[i+6] += 1 * mu\n\t" "ldr r6, [%[a], #24]\n\t" - "mov r5, #0\n\t" - "add r4, r3\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r4, r4, r3\n\t" +#else + "add r4, r4, r3\n\t" +#endif +#ifdef __clang__ + "adcs r5, r2\n\t" +#else "adc r5, r2\n\t" - "add r4, r6\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r2\n\t" +#else "adc r5, r2\n\t" +#endif "str r4, [%[a], #24]\n\t" "# a[i+7] += -1 * mu\n\t" "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[a], #32]\n\t" +#ifdef __clang__ + "adds r4, r1, r3\n\t" +#else "add r4, r1, r3\n\t" - "mov r1, #0\n\t" +#endif + "movs r1, #0\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" - "sub r5, r3\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r2\n\t" +#else "sbc r4, r2\n\t" +#endif +#ifdef __clang__ + "sbcs r1, r2\n\t" +#else "sbc r1, r2\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" - "str r5, [%[a], #28]\n\t" +#endif + "str r5, [%[a], #28]\n\t" "str r4, [%[a], #32]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r8, r6\n\t" - "add %[a], #4\n\t" - "mov r6, #32\n\t" + "movs r6, #4\n\t" + "add r8, r8, r6\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif + "movs r6, #32\n\t" "cmp r8, r6\n\t" - "blt 1b\n\t" - "sub %[a], #32\n\t" - "mov r3, r1\n\t" - "sub r1, #1\n\t" + "blt L_sp_256_mont_reduce_8_mod_%=\n\t" +#ifdef __clang__ + "subs %[a], %[a], #32\n\t" +#else + "sub %[a], %[a], #32\n\t" +#endif + "movs r3, r1\n\t" +#ifdef __clang__ + "subs r1, r1, #1\n\t" +#else + "sub r1, r1, #1\n\t" +#endif +#ifdef __clang__ + "mvns r1, r1\n\t" +#else "mvn r1, r1\n\t" - "ldr r5, [%[a],#32]\n\t" - "ldr r4, [%[a],#36]\n\t" - "ldr r6, [%[a],#40]\n\t" - "ldr r7, [%[a],#44]\n\t" - "sub r5, r1\n\t" +#endif + "ldr r5, [%[a], #32]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r6, [%[a], #40]\n\t" + "ldr r7, [%[a], #44]\n\t" +#ifdef __clang__ + "subs r5, r5, r1\n\t" +#else + "sub r5, r5, r1\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r1\n\t" +#else "sbc r4, r1\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r1\n\t" +#else "sbc r6, r1\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r2\n\t" +#else "sbc r7, r2\n\t" - "str r5, [%[a],#0]\n\t" - "str r4, [%[a],#4]\n\t" - "str r6, [%[a],#8]\n\t" - "str r7, [%[a],#12]\n\t" - "ldr r5, [%[a],#48]\n\t" - "ldr r4, [%[a],#52]\n\t" - "ldr r6, [%[a],#56]\n\t" - "ldr r7, [%[a],#60]\n\t" +#endif + "str r5, [%[a]]\n\t" + "str r4, [%[a], #4]\n\t" + "str r6, [%[a], #8]\n\t" + "str r7, [%[a], #12]\n\t" + "ldr r5, [%[a], #48]\n\t" + "ldr r4, [%[a], #52]\n\t" + "ldr r6, [%[a], #56]\n\t" + "ldr r7, [%[a], #60]\n\t" +#ifdef __clang__ + "sbcs r5, r2\n\t" +#else "sbc r5, r2\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r2\n\t" +#else "sbc r4, r2\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r3\n\t" +#else "sbc r6, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r1\n\t" +#else "sbc r7, r1\n\t" - "str r5, [%[a],#16]\n\t" - "str r4, [%[a],#20]\n\t" - "str r6, [%[a],#24]\n\t" - "str r7, [%[a],#28]\n\t" +#endif + "str r5, [%[a], #16]\n\t" + "str r4, [%[a], #20]\n\t" + "str r6, [%[a], #24]\n\t" + "str r7, [%[a], #28]\n\t" : [a] "+r" (a) : : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" ); - - - (void)m; - (void)mp; } /* Reduce the number back to 256 bits using Montgomery reduction. @@ -16484,140 +33271,376 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, - sp_digit mp) +SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, + const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_256_mont_reduce_order_8_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_256_mont_reduce_order_8_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #28\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #28\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_256_mont_reduce_order_8_word_%=\n\t" "# a[i+7] += m[7] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[7] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[7] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #32\n\t" + "movs r4, #32\n\t" "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_256_mont_reduce_order_8_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); } @@ -16756,38 +33779,93 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) */ SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #28\n\t" - "1:\n\t" +#endif + "movs r6, #28\n\t" + "\n" + "L_sp_256_cmp_8_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_256_cmp_8_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -16846,76 +33924,151 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { (void)m; - __asm__ __volatile__ ( - "mov r3, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[b],#0]\n\t" - "ldr r7, [%[b],#4]\n\t" - "add r4, r6\n\t" + "movs r3, #0\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b]]\n\t" + "ldr r7, [%[b], #4]\n\t" +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "ldr r4, [%[a],#8]\n\t" - "ldr r5, [%[a],#12]\n\t" - "ldr r6, [%[b],#8]\n\t" - "ldr r7, [%[b],#12]\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "str r4, [%[r],#8]\n\t" - "str r5, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[b],#16]\n\t" - "ldr r7, [%[b],#20]\n\t" +#endif + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "mov r8, r4\n\t" "mov r9, r5\n\t" - "ldr r4, [%[a],#24]\n\t" - "ldr r5, [%[a],#28]\n\t" - "ldr r6, [%[b],#24]\n\t" - "ldr r7, [%[b],#28]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "mov r10, r4\n\t" "mov r11, r5\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else "adc r3, r3\n\t" - "mov r6, r3\n\t" - "sub r3, #1\n\t" +#endif + "movs r6, r3\n\t" +#ifdef __clang__ + "subs r3, r3, #1\n\t" +#else + "sub r3, r3, #1\n\t" +#endif +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r7, #0\n\t" - "ldr r4, [%[r],#0]\n\t" - "ldr r5, [%[r],#4]\n\t" - "sub r4, r3\n\t" +#endif + "movs r7, #0\n\t" + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" +#ifdef __clang__ + "subs r4, r4, r3\n\t" +#else + "sub r4, r4, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r3\n\t" +#else "sbc r5, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "ldr r4, [%[r],#8]\n\t" - "ldr r5, [%[r],#12]\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#ifdef __clang__ + "sbcs r4, r3\n\t" +#else "sbc r4, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r],#8]\n\t" - "str r5, [%[r],#12]\n\t" +#endif + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" "mov r4, r8\n\t" "mov r5, r9\n\t" +#ifdef __clang__ + "sbcs r4, r7\n\t" +#else "sbc r4, r7\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r],#16]\n\t" - "str r5, [%[r],#20]\n\t" +#endif + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r3\n\t" +#else "sbc r5, r3\n\t" - "str r4, [%[r],#24]\n\t" - "str r5, [%[r],#28]\n\t" +#endif + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -16926,68 +34079,144 @@ SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const * a Number to double in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, + const sp_digit* m) { (void)m; - __asm__ __volatile__ ( - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "add r4, r4\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" +#ifdef __clang__ + "adds r4, r4, r4\n\t" +#else + "add r4, r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else "adc r7, r7\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[a],#24]\n\t" - "ldr r7, [%[a],#28]\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else "adc r7, r7\n\t" +#endif "mov r8, r4\n\t" "mov r9, r5\n\t" "mov r10, r6\n\t" "mov r11, r7\n\t" - "mov r3, #0\n\t" - "mov r7, #0\n\t" + "movs r3, #0\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else "adc r3, r3\n\t" - "mov r2, r3\n\t" - "sub r3, #1\n\t" +#endif + "movs r2, r3\n\t" +#ifdef __clang__ + "subs r3, r3, #1\n\t" +#else + "sub r3, r3, #1\n\t" +#endif +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "ldr r4, [%[r],#0]\n\t" - "ldr r5, [%[r],#4]\n\t" - "ldr r6, [%[r],#8]\n\t" - "sub r4, r3\n\t" +#endif + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" + "ldr r6, [%[r], #8]\n\t" +#ifdef __clang__ + "subs r4, r4, r3\n\t" +#else + "sub r4, r4, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r3\n\t" +#else "sbc r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r3\n\t" +#else "sbc r6, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "ldr r4, [%[r],#12]\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "ldr r4, [%[r], #12]\n\t" "mov r5, r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "sbcs r4, r7\n\t" +#else "sbc r4, r7\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r7\n\t" +#else "sbc r6, r7\n\t" - "str r4, [%[r],#12]\n\t" - "str r5, [%[r],#16]\n\t" - "str r6, [%[r],#20]\n\t" +#endif + "str r4, [%[r], #12]\n\t" + "str r5, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" +#ifdef __clang__ + "sbcs r4, r2\n\t" +#else "sbc r4, r2\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r3\n\t" +#else "sbc r5, r3\n\t" - "str r4, [%[r],#24]\n\t" - "str r5, [%[r],#28]\n\t" +#endif + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r3", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -16997,118 +34226,270 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const * a Number to triple in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, + const sp_digit* m) { (void)m; - __asm__ __volatile__ ( - "ldr r6, [%[a],#0]\n\t" - "ldr r7, [%[a],#4]\n\t" - "ldr r4, [%[a],#8]\n\t" - "ldr r5, [%[a],#12]\n\t" - "add r6, r6\n\t" - "adc r7, r7\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "mov r8, r4\n\t" - "mov r9, r5\n\t" - "ldr r2, [%[a],#16]\n\t" - "ldr r3, [%[a],#20]\n\t" - "ldr r4, [%[a],#24]\n\t" - "ldr r5, [%[a],#28]\n\t" - "adc r2, r2\n\t" - "adc r3, r3\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "mov r10, r2\n\t" - "mov r11, r3\n\t" - "mov r12, r4\n\t" - "mov r14, r5\n\t" - "mov r3, #0\n\t" - "mov r5, #0\n\t" - "adc r3, r3\n\t" - "mov r4, r3\n\t" - "sub r3, #1\n\t" - "mvn r3, r3\n\t" - "sub r6, r3\n\t" - "sbc r7, r3\n\t" - "mov r2, r8\n\t" - "sbc r2, r3\n\t" - "mov r8, r2\n\t" - "mov r2, r9\n\t" - "sbc r2, r5\n\t" - "mov r9, r2\n\t" - "mov r2, r10\n\t" - "sbc r2, r5\n\t" - "mov r10, r2\n\t" - "mov r2, r11\n\t" - "sbc r2, r5\n\t" - "mov r11, r2\n\t" - "mov r2, r12\n\t" - "sbc r2, r4\n\t" - "mov r12, r2\n\t" - "mov r2, r14\n\t" - "sbc r2, r3\n\t" - "mov r14, r2\n\t" - "ldr r2, [%[a],#0]\n\t" - "ldr r3, [%[a],#4]\n\t" - "add r6, r2\n\t" - "adc r7, r3\n\t" - "ldr r2, [%[a],#8]\n\t" - "ldr r3, [%[a],#12]\n\t" - "mov r4, r8\n\t" - "mov r5, r9\n\t" - "adc r2, r4\n\t" - "adc r3, r5\n\t" - "mov r8, r2\n\t" - "mov r9, r3\n\t" - "ldr r2, [%[a],#16]\n\t" - "ldr r3, [%[a],#20]\n\t" - "mov r4, r10\n\t" - "mov r5, r11\n\t" - "adc r2, r4\n\t" - "adc r3, r5\n\t" + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[a], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" +#ifdef __clang__ + "adds r6, r6, r6\n\t" +#else + "add r6, r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif + "mov r8, r4\n\t" + "mov r9, r5\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" +#ifdef __clang__ + "adcs r2, r2\n\t" +#else + "adc r2, r2\n\t" +#endif +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif "mov r10, r2\n\t" "mov r11, r3\n\t" - "ldr r2, [%[a],#24]\n\t" - "ldr r3, [%[a],#28]\n\t" - "mov r4, r12\n\t" - "mov r5, r14\n\t" - "adc r2, r4\n\t" - "adc r3, r5\n\t" - "mov r12, r2\n\t" - "mov r14, r3\n\t" - "mov r3, #0\n\t" - "mov r5, #0\n\t" + "mov r12, r4\n\t" + "mov lr, r5\n\t" + "movs r3, #0\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else "adc r3, r3\n\t" - "mov r4, r3\n\t" - "sub r3, #1\n\t" +#endif + "movs r4, r3\n\t" +#ifdef __clang__ + "subs r3, r3, #1\n\t" +#else + "sub r3, r3, #1\n\t" +#endif +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "sub r6, r3\n\t" - "str r6, [%[r],#0]\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, r3\n\t" +#else + "sub r6, r6, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r3\n\t" +#else "sbc r7, r3\n\t" - "str r7, [%[r],#4]\n\t" - "mov r2, r8\n\t" - "sbc r2, r3\n\t" - "str r2, [%[r],#8]\n\t" - "mov r2, r9\n\t" - "sbc r2, r5\n\t" - "str r2, [%[r],#12]\n\t" - "mov r2, r10\n\t" - "sbc r2, r5\n\t" - "str r2, [%[r],#16]\n\t" - "mov r2, r11\n\t" - "sbc r2, r5\n\t" - "str r2, [%[r],#20]\n\t" - "mov r2, r12\n\t" - "sbc r2, r4\n\t" - "str r2, [%[r],#24]\n\t" - "mov r2, r14\n\t" - "sbc r2, r3\n\t" - "str r2, [%[r],#28]\n\t" +#endif + "mov r2, r8\n\t" +#ifdef __clang__ + "sbcs r2, r3\n\t" +#else + "sbc r2, r3\n\t" +#endif + "mov r8, r2\n\t" + "mov r2, r9\n\t" +#ifdef __clang__ + "sbcs r2, r5\n\t" +#else + "sbc r2, r5\n\t" +#endif + "mov r9, r2\n\t" + "mov r2, r10\n\t" +#ifdef __clang__ + "sbcs r2, r5\n\t" +#else + "sbc r2, r5\n\t" +#endif + "mov r10, r2\n\t" + "mov r2, r11\n\t" +#ifdef __clang__ + "sbcs r2, r5\n\t" +#else + "sbc r2, r5\n\t" +#endif + "mov r11, r2\n\t" + "mov r2, r12\n\t" +#ifdef __clang__ + "sbcs r2, r4\n\t" +#else + "sbc r2, r4\n\t" +#endif + "mov r12, r2\n\t" + "mov r2, lr\n\t" +#ifdef __clang__ + "sbcs r2, r3\n\t" +#else + "sbc r2, r3\n\t" +#endif + "mov lr, r2\n\t" + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#ifdef __clang__ + "adds r6, r6, r2\n\t" +#else + "add r6, r6, r2\n\t" +#endif +#ifdef __clang__ + "adcs r7, r3\n\t" +#else + "adc r7, r3\n\t" +#endif + "ldr r2, [%[a], #8]\n\t" + "ldr r3, [%[a], #12]\n\t" + "mov r4, r8\n\t" + "mov r5, r9\n\t" +#ifdef __clang__ + "adcs r2, r4\n\t" +#else + "adc r2, r4\n\t" +#endif +#ifdef __clang__ + "adcs r3, r5\n\t" +#else + "adc r3, r5\n\t" +#endif + "mov r8, r2\n\t" + "mov r9, r3\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" + "mov r4, r10\n\t" + "mov r5, r11\n\t" +#ifdef __clang__ + "adcs r2, r4\n\t" +#else + "adc r2, r4\n\t" +#endif +#ifdef __clang__ + "adcs r3, r5\n\t" +#else + "adc r3, r5\n\t" +#endif + "mov r10, r2\n\t" + "mov r11, r3\n\t" + "ldr r2, [%[a], #24]\n\t" + "ldr r3, [%[a], #28]\n\t" + "mov r4, r12\n\t" + "mov r5, lr\n\t" +#ifdef __clang__ + "adcs r2, r4\n\t" +#else + "adc r2, r4\n\t" +#endif +#ifdef __clang__ + "adcs r3, r5\n\t" +#else + "adc r3, r5\n\t" +#endif + "mov r12, r2\n\t" + "mov lr, r3\n\t" + "movs r3, #0\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs r4, r3\n\t" +#ifdef __clang__ + "subs r3, r3, #1\n\t" +#else + "sub r3, r3, #1\n\t" +#endif +#ifdef __clang__ + "mvns r3, r3\n\t" +#else + "mvn r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, r3\n\t" +#else + "sub r6, r6, r3\n\t" +#endif + "str r6, [%[r]]\n\t" +#ifdef __clang__ + "sbcs r7, r3\n\t" +#else + "sbc r7, r3\n\t" +#endif + "str r7, [%[r], #4]\n\t" + "mov r2, r8\n\t" +#ifdef __clang__ + "sbcs r2, r3\n\t" +#else + "sbc r2, r3\n\t" +#endif + "str r2, [%[r], #8]\n\t" + "mov r2, r9\n\t" +#ifdef __clang__ + "sbcs r2, r5\n\t" +#else + "sbc r2, r5\n\t" +#endif + "str r2, [%[r], #12]\n\t" + "mov r2, r10\n\t" +#ifdef __clang__ + "sbcs r2, r5\n\t" +#else + "sbc r2, r5\n\t" +#endif + "str r2, [%[r], #16]\n\t" + "mov r2, r11\n\t" +#ifdef __clang__ + "sbcs r2, r5\n\t" +#else + "sbc r2, r5\n\t" +#endif + "str r2, [%[r], #20]\n\t" + "mov r2, r12\n\t" +#ifdef __clang__ + "sbcs r2, r4\n\t" +#else + "sbc r2, r4\n\t" +#endif + "str r2, [%[r], #24]\n\t" + "mov r2, lr\n\t" +#ifdef __clang__ + "sbcs r2, r3\n\t" +#else + "sbc r2, r3\n\t" +#endif + "str r2, [%[r], #28]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); } @@ -17119,73 +34500,144 @@ SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const * b Number to subtract with in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { (void)m; - __asm__ __volatile__ ( - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[b],#0]\n\t" - "ldr r7, [%[b],#4]\n\t" - "sub r4, r6\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b]]\n\t" + "ldr r7, [%[b], #4]\n\t" +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "ldr r4, [%[a],#8]\n\t" - "ldr r5, [%[a],#12]\n\t" - "ldr r6, [%[b],#8]\n\t" - "ldr r7, [%[b],#12]\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r],#8]\n\t" - "str r5, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[b],#16]\n\t" - "ldr r7, [%[b],#20]\n\t" +#endif + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "mov r8, r4\n\t" "mov r9, r5\n\t" - "ldr r4, [%[a],#24]\n\t" - "ldr r5, [%[a],#28]\n\t" - "ldr r6, [%[b],#24]\n\t" - "ldr r7, [%[b],#28]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "mov r10, r4\n\t" "mov r11, r5\n\t" - "sbc r3, r3\n\t" - "lsr r7, r3, #31\n\t" - "mov r6, #0\n\t" - "ldr r4, [%[r],#0]\n\t" - "ldr r5, [%[r],#4]\n\t" - "add r4, r3\n\t" +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r3, #31\n\t" +#else + "lsr r7, r3, #31\n\t" +#endif + "movs r6, #0\n\t" + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" +#ifdef __clang__ + "adds r4, r4, r3\n\t" +#else + "add r4, r4, r3\n\t" +#endif +#ifdef __clang__ + "adcs r5, r3\n\t" +#else "adc r5, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "ldr r4, [%[r],#8]\n\t" - "ldr r5, [%[r],#12]\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[r], #8]\n\t" + "ldr r5, [%[r], #12]\n\t" +#ifdef __clang__ + "adcs r4, r3\n\t" +#else "adc r4, r3\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "str r4, [%[r],#8]\n\t" - "str r5, [%[r],#12]\n\t" +#endif + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" "mov r4, r8\n\t" "mov r5, r9\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "str r4, [%[r],#16]\n\t" - "str r5, [%[r],#20]\n\t" +#endif + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, r3\n\t" +#else "adc r5, r3\n\t" - "str r4, [%[r],#24]\n\t" - "str r5, [%[r],#28]\n\t" +#endif + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -17196,84 +34648,238 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, + const sp_digit* m) { + (void)m; __asm__ __volatile__ ( - "ldr r7, [%[a], #0]\n\t" - "lsl r7, r7, #31\n\t" - "lsr r7, r7, #31\n\t" - "mov r5, #0\n\t" - "sub r5, r7\n\t" - "mov r7, #0\n\t" - "lsl r6, r5, #31\n\t" + "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #31\n\t" +#else + "lsl r6, r6, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #31\n\t" +#else "lsr r6, r6, #31\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "add r3, r5\n\t" - "adc r4, r5\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "adc r3, r5\n\t" - "adc r4, r7\n\t" - "str r3, [%[r], #8]\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "adc r3, r7\n\t" - "adc r4, r7\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" +#endif + "movs r4, #0\n\t" +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif + "movs r6, #0\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else + "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, #31\n\t" +#else + "lsr r5, r5, #31\n\t" +#endif + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif +#ifdef __clang__ + "adcs r3, r4\n\t" +#else + "adc r3, r4\n\t" +#endif + "str r2, [%[r]]\n\t" + "str r3, [%[r], #4]\n\t" + "ldr r2, [%[a], #8]\n\t" + "ldr r3, [%[a], #12]\n\t" +#ifdef __clang__ + "adcs r2, r4\n\t" +#else + "adc r2, r4\n\t" +#endif +#ifdef __clang__ + "adcs r3, r6\n\t" +#else "adc r3, r6\n\t" - "adc r4, r5\n\t" - "adc r7, r7\n\t" - "lsl r7, r7, #31\n\t" +#endif + "str r2, [%[r], #8]\n\t" + "str r3, [%[r], #12]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" +#ifdef __clang__ + "adcs r2, r6\n\t" +#else + "adc r2, r6\n\t" +#endif +#ifdef __clang__ + "adcs r3, r6\n\t" +#else + "adc r3, r6\n\t" +#endif + "str r2, [%[r], #16]\n\t" + "str r3, [%[r], #20]\n\t" + "ldr r2, [%[a], #24]\n\t" + "ldr r3, [%[a], #28]\n\t" +#ifdef __clang__ + "adcs r2, r5\n\t" +#else + "adc r2, r5\n\t" +#endif +#ifdef __clang__ + "adcs r3, r4\n\t" +#else + "adc r3, r4\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #31\n\t" +#else + "lsl r6, r6, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r2, #1\n\t" +#else + "lsr r4, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsls r2, r2, #31\n\t" +#else + "lsl r2, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else "lsl r3, r3, #31\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "mov r7, r3\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" +#endif +#ifdef __clang__ + "orrs r4, r3\n\t" +#else + "orr r4, r3\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "movs r6, r2\n\t" + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "ldr r2, [%[r], #16]\n\t" + "ldr r3, [%[r], #20]\n\t" +#ifdef __clang__ + "lsrs r4, r2, #1\n\t" +#else + "lsr r4, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsls r2, r2, #31\n\t" +#else + "lsl r2, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else "lsl r3, r3, #31\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "mov r7, r3\n\t" - "str r5, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" +#endif +#ifdef __clang__ + "orrs r4, r3\n\t" +#else + "orr r4, r3\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "movs r6, r2\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r2, [%[r], #8]\n\t" + "ldr r3, [%[r], #12]\n\t" +#ifdef __clang__ + "lsrs r4, r2, #1\n\t" +#else + "lsr r4, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsls r2, r2, #31\n\t" +#else + "lsl r2, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else "lsl r3, r3, #31\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "mov r7, r3\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[r], #0]\n\t" - "ldr r4, [%[r], #4]\n\t" +#endif +#ifdef __clang__ + "orrs r4, r3\n\t" +#else + "orr r4, r3\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "movs r6, r2\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r2, [%[r]]\n\t" + "ldr r3, [%[r], #4]\n\t" +#ifdef __clang__ + "lsrs r4, r2, #1\n\t" +#else + "lsr r4, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else "lsr r5, r3, #1\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "str r5, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else + "lsl r3, r3, #31\n\t" +#endif +#ifdef __clang__ + "orrs r4, r3\n\t" +#else + "orr r4, r3\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6" ); } @@ -20754,34 +38360,66 @@ static int sp_256_iszero_8(const sp_digit* a) SP_NOINLINE static void sp_256_add_one_8(sp_digit* a) { __asm__ __volatile__ ( - "mov r2, #1\n\t" - "ldr r1, [%[a], #0]\n\t" - "add r1, r2\n\t" - "mov r2, #0\n\t" - "str r1, [%[a], #0]\n\t" + "movs r2, #1\n\t" + "ldr r1, [%[a]]\n\t" +#ifdef __clang__ + "adds r1, r1, r2\n\t" +#else + "add r1, r1, r2\n\t" +#endif + "movs r2, #0\n\t" + "str r1, [%[a]]\n\t" "ldr r1, [%[a], #4]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #4]\n\t" "ldr r1, [%[a], #8]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #8]\n\t" "ldr r1, [%[a], #12]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #12]\n\t" "ldr r1, [%[a], #16]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #16]\n\t" "ldr r1, [%[a], #20]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #20]\n\t" "ldr r1, [%[a], #24]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #24]\n\t" "ldr r1, [%[a], #28]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #28]\n\t" + : [a] "+r" (a) : - : [a] "r" (a) : "memory", "r1", "r2" ); } @@ -21052,86 +38690,149 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, #32\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #32\n\t" +#else + "add r7, r7, #32\n\t" +#endif + "\n" + "L_sp_256_sub_in_place_8_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_256_sub_in_place_8_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #else -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -21145,59 +38846,184 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #32\n\t" - "add r6, %[a]\n\t" + "movs r6, #32\n\t" +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_256_mul_d_8_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_256_mul_d_8_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -21214,135 +39040,512 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_256_word_8_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_256_word_8_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* AND m into each word of a and store in r. @@ -21978,49 +40181,142 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, #endif /* HAVE_ECC_SIGN */ #ifndef WOLFSSL_SP_SMALL -static void sp_256_rshift1_8(sp_digit* r, sp_digit* a) +/* Right shift a by 1 bit into r. (r = a >> 1) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #12]\n\t" "str r3, [%[r], #4]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #16]\n\t" "str r4, [%[r], #8]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #20]\n\t" "str r2, [%[r], #12]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #24]\n\t" "str r3, [%[r], #16]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #28]\n\t" "str r4, [%[r], #20]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "str r2, [%[r], #24]\n\t" "str r3, [%[r], #28]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5" ); } @@ -22031,102 +40327,242 @@ static void sp_256_rshift1_8(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus. */ -static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, - const sp_digit* m) +static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "ldr r7, [%[a], #0]\n\t" - "lsl r7, r7, #31\n\t" - "beq 1f\n\t" - "lsr r7, r7, #31\n\t" - "ldr r5, [%[m], #0]\n\t" - "ldr r6, [%[m], #4]\n\t" - "ldr r3, [%[a], #0]\n\t" - "ldr r4, [%[a], #4]\n\t" - "add r3, r5\n\t" - "adc r4, r6\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "ldr r5, [%[m], #8]\n\t" - "ldr r6, [%[m], #12]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "adc r3, r5\n\t" - "adc r4, r6\n\t" - "str r3, [%[r], #8]\n\t" - "str r4, [%[r], #12]\n\t" - "ldr r5, [%[m], #16]\n\t" - "ldr r6, [%[m], #20]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "adc r3, r5\n\t" - "adc r4, r6\n\t" - "str r3, [%[r], #16]\n\t" - "str r4, [%[r], #20]\n\t" - "ldr r5, [%[m], #24]\n\t" - "ldr r6, [%[m], #28]\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "adc r3, r5\n\t" - "adc r4, r6\n\t" - "mov r7, #0\n\t" - "adc r7, r7\n\t" - "lsl r7, r7, #31\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r3, [%[a], #24]\n\t" - "ldr r4, [%[a], #28]\n\t" - "\n2:\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, #31\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "mov r7, r3\n\t" - "str r5, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, #31\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "mov r7, r3\n\t" - "str r5, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r3, [%[a], #8]\n\t" - "ldr r4, [%[a], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, #31\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "mov r7, r3\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[r], #0]\n\t" - "ldr r4, [%[r], #4]\n\t" - "lsr r5, r3, #1\n\t" - "lsr r6, r4, #1\n\t" - "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" - "str r5, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" + "ldr r7, [%[a]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #31\n\t" +#else + "lsl r7, r7, #31\n\t" +#endif + "beq L_sp_256_div2_mod_8_no_add_%=\n\t" +#ifdef __clang__ + "lsrs r7, r7, #31\n\t" +#else + "lsr r7, r7, #31\n\t" +#endif + "ldr r5, [%[m]]\n\t" + "ldr r6, [%[m], #4]\n\t" + "ldr r3, [%[a]]\n\t" + "ldr r4, [%[a], #4]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif + "str r3, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + "ldr r5, [%[m], #8]\n\t" + "ldr r6, [%[m], #12]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" +#ifdef __clang__ + "adcs r3, r5\n\t" +#else + "adc r3, r5\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif + "str r3, [%[r], #8]\n\t" + "str r4, [%[r], #12]\n\t" + "ldr r5, [%[m], #16]\n\t" + "ldr r6, [%[m], #20]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" +#ifdef __clang__ + "adcs r3, r5\n\t" +#else + "adc r3, r5\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif + "str r3, [%[r], #16]\n\t" + "str r4, [%[r], #20]\n\t" + "ldr r5, [%[m], #24]\n\t" + "ldr r6, [%[m], #28]\n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" +#ifdef __clang__ + "adcs r3, r5\n\t" +#else + "adc r3, r5\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif + "movs r7, #0\n\t" +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #31\n\t" +#else + "lsl r7, r7, #31\n\t" +#endif + "b L_sp_256_div2_mod_8_div2_%=\n\t" + "\n" + "L_sp_256_div2_mod_8_no_add_%=: \n\t" + "ldr r3, [%[a], #24]\n\t" + "ldr r4, [%[a], #28]\n\t" + "\n" + "L_sp_256_div2_mod_8_div2_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else + "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else + "lsl r3, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, #31\n\t" +#else + "lsl r4, r4, #31\n\t" +#endif +#ifdef __clang__ + "orrs r5, r4\n\t" +#else + "orr r5, r4\n\t" +#endif +#ifdef __clang__ + "orrs r6, r7\n\t" +#else + "orr r6, r7\n\t" +#endif + "movs r7, r3\n\t" + "str r5, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r3, [%[a], #16]\n\t" + "ldr r4, [%[a], #20]\n\t" +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else + "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else + "lsl r3, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, #31\n\t" +#else + "lsl r4, r4, #31\n\t" +#endif +#ifdef __clang__ + "orrs r5, r4\n\t" +#else + "orr r5, r4\n\t" +#endif +#ifdef __clang__ + "orrs r6, r7\n\t" +#else + "orr r6, r7\n\t" +#endif + "movs r7, r3\n\t" + "str r5, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r3, [%[a], #8]\n\t" + "ldr r4, [%[a], #12]\n\t" +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else + "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else + "lsl r3, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, #31\n\t" +#else + "lsl r4, r4, #31\n\t" +#endif +#ifdef __clang__ + "orrs r5, r4\n\t" +#else + "orr r5, r4\n\t" +#endif +#ifdef __clang__ + "orrs r6, r7\n\t" +#else + "orr r6, r7\n\t" +#endif + "movs r7, r3\n\t" + "str r5, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r3, [%[r]]\n\t" + "ldr r4, [%[r], #4]\n\t" +#ifdef __clang__ + "lsrs r5, r3, #1\n\t" +#else + "lsr r5, r3, #1\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r4, #1\n\t" +#else + "lsr r6, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r4, r4, #31\n\t" +#else + "lsl r4, r4, #31\n\t" +#endif +#ifdef __clang__ + "orrs r5, r4\n\t" +#else + "orr r5, r4\n\t" +#endif +#ifdef __clang__ + "orrs r6, r7\n\t" +#else + "orr r6, r7\n\t" +#endif + "str r5, [%[r]]\n\t" + "str r6, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8" ); } static int sp_256_num_bits_8(sp_digit* a) { - int r = 0; - static const byte table[256] = { + static const byte sp_num_bits_table[256] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -22144,332 +40580,692 @@ static int sp_256_num_bits_8(sp_digit* a) 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }; - + const byte* table = sp_num_bits_table; __asm__ __volatile__ ( - "mov r6, #0xff\n\t" - "ldr r3, [%[a], #28]\n\t" - "cmp r3, #0\n\t" - "beq 7f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 93f\n\t" - "mov %[r], #248\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n93:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 92f\n\t" - "mov %[r], #240\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n92:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 91f\n\t" - "mov %[r], #232\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n91:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 90f\n\t" - "mov %[r], #224\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n90:\n\t" - "b 9f\n\t" - "\n7:\n\t" - "ldr r3, [%[a], #24]\n\t" - "cmp r3, #0\n\t" - "beq 6f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 83f\n\t" - "mov %[r], #216\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n83:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 82f\n\t" - "mov %[r], #208\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n82:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 81f\n\t" - "mov %[r], #200\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n81:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 80f\n\t" - "mov %[r], #192\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n80:\n\t" - "b 9f\n\t" - "\n6:\n\t" - "ldr r3, [%[a], #20]\n\t" - "cmp r3, #0\n\t" - "beq 5f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 73f\n\t" - "mov %[r], #184\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n73:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 72f\n\t" - "mov %[r], #176\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n72:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 71f\n\t" - "mov %[r], #168\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n71:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 70f\n\t" - "mov %[r], #160\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n70:\n\t" - "b 9f\n\t" - "\n5:\n\t" - "ldr r3, [%[a], #16]\n\t" - "cmp r3, #0\n\t" - "beq 4f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 63f\n\t" - "mov %[r], #152\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n63:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 62f\n\t" - "mov %[r], #144\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n62:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 61f\n\t" - "mov %[r], #136\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n61:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 60f\n\t" - "mov %[r], #128\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n60:\n\t" - "b 9f\n\t" - "\n4:\n\t" - "ldr r3, [%[a], #12]\n\t" - "cmp r3, #0\n\t" - "beq 3f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 53f\n\t" - "mov %[r], #120\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n53:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 52f\n\t" - "mov %[r], #112\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n52:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 51f\n\t" - "mov %[r], #104\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n51:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 50f\n\t" - "mov %[r], #96\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n50:\n\t" - "b 9f\n\t" - "\n3:\n\t" - "ldr r3, [%[a], #8]\n\t" - "cmp r3, #0\n\t" - "beq 2f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 43f\n\t" - "mov %[r], #88\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n43:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 42f\n\t" - "mov %[r], #80\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n42:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 41f\n\t" - "mov %[r], #72\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n41:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 40f\n\t" - "mov %[r], #64\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n40:\n\t" - "b 9f\n\t" - "\n2:\n\t" - "ldr r3, [%[a], #4]\n\t" - "cmp r3, #0\n\t" - "beq 1f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 33f\n\t" - "mov %[r], #56\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n33:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 32f\n\t" - "mov %[r], #48\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n32:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 31f\n\t" - "mov %[r], #40\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n31:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 30f\n\t" - "mov %[r], #32\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n30:\n\t" - "b 9f\n\t" - "\n1:\n\t" - "ldr r3, [%[a], #0]\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 23f\n\t" - "mov %[r], #24\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n23:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 22f\n\t" - "mov %[r], #16\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n22:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 21f\n\t" - "mov %[r], #8\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n21:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 20f\n\t" - "mov %[r], #0\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 9f\n\t" - "\n20:\n\t" - "\n9:\n\t" - : [r] "+r" (r) - : [a] "r" (a), [table] "r" (table) - : "r3", "r4", "r5", "r6" + "movs r6, #0xff\n\t" + "ldr r3, [%[a], #28]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_93_%=\n\t" + "movs r2, #0xf8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_93_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_92_%=\n\t" + "movs r2, #0xf0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_92_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_91_%=\n\t" + "movs r2, #0xe8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_91_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_90_%=\n\t" + "movs r2, #0xe0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_90_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_%=: \n\t" + "ldr r3, [%[a], #24]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_83_%=\n\t" + "movs r2, #0xd8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_83_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_82_%=\n\t" + "movs r2, #0xd0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_82_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_81_%=\n\t" + "movs r2, #0xc8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_81_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_80_%=\n\t" + "movs r2, #0xc0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_80_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_%=: \n\t" + "ldr r3, [%[a], #20]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_73_%=\n\t" + "movs r2, #0xb8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_73_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_72_%=\n\t" + "movs r2, #0xb0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_72_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_71_%=\n\t" + "movs r2, #0xa8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_71_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_70_%=\n\t" + "movs r2, #0xa0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_70_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_%=: \n\t" + "ldr r3, [%[a], #16]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_63_%=\n\t" + "movs r2, #0x98\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_63_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_62_%=\n\t" + "movs r2, #0x90\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_62_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_61_%=\n\t" + "movs r2, #0x88\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_61_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_60_%=\n\t" + "movs r2, #0x80\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_60_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_%=: \n\t" + "ldr r3, [%[a], #12]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_53_%=\n\t" + "movs r2, #0x78\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_53_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_52_%=\n\t" + "movs r2, #0x70\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_52_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_51_%=\n\t" + "movs r2, #0x68\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_51_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_50_%=\n\t" + "movs r2, #0x60\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_50_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_%=: \n\t" + "ldr r3, [%[a], #8]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_43_%=\n\t" + "movs r2, #0x58\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_43_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_42_%=\n\t" + "movs r2, #0x50\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_42_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_41_%=\n\t" + "movs r2, #0x48\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_41_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_40_%=\n\t" + "movs r2, #0x40\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_40_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_%=: \n\t" + "ldr r3, [%[a], #4]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_33_%=\n\t" + "movs r2, #56\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_33_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_32_%=\n\t" + "movs r2, #48\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_32_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_31_%=\n\t" + "movs r2, #40\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_31_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_30_%=\n\t" + "movs r2, #32\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_30_%=: \n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_%=: \n\t" + "ldr r3, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_23_%=\n\t" + "movs r2, #24\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_23_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_22_%=\n\t" + "movs r2, #16\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_22_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_21_%=\n\t" + "movs r2, #8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_21_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_256_num_bits_8_20_%=\n\t" + "movs r2, #0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_20_%=: \n\t" + "\n" + "L_sp_256_num_bits_8_9_%=: \n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [table] "+r" (table) + : + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return r; + return (uint32_t)(size_t)a; } /* Non-constant time modular inversion. @@ -23554,95 +42350,243 @@ static const sp_digit p384_b[12] = { SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[12 * 2]; + sp_digit t[12 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #48\n\t" - "add r6, r9\n\t" + "movs r6, #48\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #44\n\t" + "\n" + "L_sp_384_mul_12_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #44\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_384_mul_12_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_384_mul_12_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_384_mul_12_mul_%=\n\t" + "\n" + "L_sp_384_mul_12_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #88\n\t" + "movs r6, #0x58\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_384_mul_12_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -23653,142 +42597,420 @@ SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #96\n\t" + "movs r6, #0x60\n\t" +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #44\n\t" + "\n" + "L_sp_384_sqr_12_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #44\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_384_sqr_12_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_384_sqr_12_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_384_sqr_12_done_sqr_%=\n\t" + "\n" + "L_sp_384_sqr_12_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #48\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_384_sqr_12_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #48\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_384_sqr_12_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_384_sqr_12_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_384_sqr_12_mul_%=\n\t" + "\n" + "L_sp_384_sqr_12_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #88\n\t" + "movs r6, #0x58\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_384_sqr_12_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #92\n\t" - "\n4:\n\t" + "movs r3, #0x5c\n\t" + "\n" + "L_sp_384_sqr_12_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #96\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_384_sqr_12_store_%=\n\t" + "movs r6, #0x60\n\t" + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -23803,32 +43025,64 @@ SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "add r6, #48\n\t" - "sub r7, #1\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #48\n\t" +#else + "add r6, r6, #48\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif + "\n" + "L_sp_384_add_12_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_384_add_12_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -23841,65 +43095,115 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -23913,30 +43217,58 @@ SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "add r6, #48\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r6, %[a]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #48\n\t" +#else + "add r6, r6, #48\n\t" +#endif + "\n" + "L_sp_384_sub_12_word_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "sbcs r4, r5\n\t" +#else "sbc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_384_sub_12_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -23949,64 +43281,115 @@ SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "movs r3, #0\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" + "ldr r6, [%[b]]\n\t" "ldr r7, [%[b], #4]\n\t" - "sub r4, r6\n\t" +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[b], #32]\n\t" "ldr r7, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "ldr r6, [%[b], #40]\n\t" "ldr r7, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -24315,30 +43698,50 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #48\n\t" + "movs r4, #0\n\t" + "movs r5, #48\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_384_cond_sub_12_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_384_cond_sub_12_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } #define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12 @@ -24353,136 +43756,372 @@ SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_384_mont_reduce_12_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_384_mont_reduce_12_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #44\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #44\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_384_mont_reduce_12_word_%=\n\t" "# a[i+11] += m[11] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[11] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[11] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #48\n\t" + "movs r4, #48\n\t" "cmp r11, r4\n\t" - "blt 1b\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_384_mont_reduce_12_mod_%=\n\t" + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca); } @@ -24637,38 +44276,93 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) */ SP_NOINLINE static int32_t sp_384_cmp_12(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #44\n\t" - "1:\n\t" +#endif + "movs r6, #44\n\t" + "\n" + "L_sp_384_cmp_12_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_384_cmp_12_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -24727,8 +44421,8 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -24742,7 +44436,8 @@ SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const * a Number to double in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -24756,7 +44451,8 @@ SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const * a Number to triple in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -24774,35 +44470,59 @@ SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #48\n\t" + "movs r4, #0\n\t" + "movs r5, #48\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_384_cond_add_12_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, #1\n\t" - "add r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, #1\n\t" +#else + "sub r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r4\n\t" +#else + "add r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" +#endif + "movs r4, #0\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_384_cond_add_12_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } /* Subtract two Montgomery form numbers (r = a - b % m). @@ -24812,8 +44532,8 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, c * b Number to subtract with in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -24821,69 +44541,210 @@ SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_384_cond_add_12(r, r, m, o); } -static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) +/* Right shift a by 1 bit into r. (r = a >> 1) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #12]\n\t" "str r3, [%[r], #4]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #16]\n\t" "str r4, [%[r], #8]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #20]\n\t" "str r2, [%[r], #12]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #24]\n\t" "str r3, [%[r], #16]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #28]\n\t" "str r4, [%[r], #20]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #32]\n\t" "str r2, [%[r], #24]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #36]\n\t" "str r3, [%[r], #28]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #40]\n\t" "str r4, [%[r], #32]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #44]\n\t" "str r2, [%[r], #36]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "str r3, [%[r], #40]\n\t" "str r4, [%[r], #44]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5" ); } @@ -24894,7 +44755,8 @@ static void sp_384_rshift1_12(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -28438,46 +48300,94 @@ static int sp_384_iszero_12(const sp_digit* a) SP_NOINLINE static void sp_384_add_one_12(sp_digit* a) { __asm__ __volatile__ ( - "mov r2, #1\n\t" - "ldr r1, [%[a], #0]\n\t" - "add r1, r2\n\t" - "mov r2, #0\n\t" - "str r1, [%[a], #0]\n\t" + "movs r2, #1\n\t" + "ldr r1, [%[a]]\n\t" +#ifdef __clang__ + "adds r1, r1, r2\n\t" +#else + "add r1, r1, r2\n\t" +#endif + "movs r2, #0\n\t" + "str r1, [%[a]]\n\t" "ldr r1, [%[a], #4]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #4]\n\t" "ldr r1, [%[a], #8]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #8]\n\t" "ldr r1, [%[a], #12]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #12]\n\t" "ldr r1, [%[a], #16]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #16]\n\t" "ldr r1, [%[a], #20]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #20]\n\t" "ldr r1, [%[a], #24]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #24]\n\t" "ldr r1, [%[a], #28]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #28]\n\t" "ldr r1, [%[a], #32]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #32]\n\t" "ldr r1, [%[a], #36]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #36]\n\t" "ldr r1, [%[a], #40]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #40]\n\t" "ldr r1, [%[a], #44]\n\t" +#ifdef __clang__ + "adcs r1, r2\n\t" +#else "adc r1, r2\n\t" +#endif "str r1, [%[a], #44]\n\t" + : [a] "+r" (a) : - : [a] "r" (a) : "memory", "r1", "r2" ); } @@ -28748,102 +48658,181 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, #48\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #48\n\t" +#else + "add r7, r7, #48\n\t" +#endif + "\n" + "L_sp_384_sub_in_place_12_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_384_sub_in_place_12_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #else -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -28857,59 +48846,184 @@ SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #48\n\t" - "add r6, %[a]\n\t" + "movs r6, #48\n\t" +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_384_mul_d_12_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_384_mul_d_12_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -28926,135 +49040,512 @@ SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_384_word_12_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_384_word_12_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* AND m into each word of a and store in r. @@ -29672,179 +50163,380 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, * m Modulus. */ static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) + const sp_digit* m) { __asm__ __volatile__ ( - "ldr r3, [%[a]]\n\t" - "lsl r3, #31\n\t" - "beq 1f\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[m], #0]\n\t" - "ldr r7, [%[m], #4]\n\t" - "add r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[m], #8]\n\t" - "ldr r7, [%[m], #12]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[m], #16]\n\t" - "ldr r7, [%[m], #20]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[m], #24]\n\t" - "ldr r7, [%[m], #28]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[m], #32]\n\t" - "ldr r7, [%[m], #36]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[m], #40]\n\t" - "ldr r7, [%[m], #44]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" - "mov r3, #0\n\t" - "adc r3, r3\n\t" - "lsl r3, r3, #31\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[a], #16]\n\t" - "str r4, [%[r], #12]\n\t" - "str r5, [%[r], #16]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #24]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[a], #32]\n\t" - "str r4, [%[r], #28]\n\t" - "str r5, [%[r], #32]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[a], #40]\n\t" - "str r4, [%[r], #36]\n\t" - "str r5, [%[r], #40]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" - "\n2:\n\t" - "ldr r4, [%[r]]\n\t" - "ldr r5, [%[r], #4]\n\t" - "lsr r4, r4, #1\n\t" - "lsl r6, r5, #31\n\t" - "lsr r5, r5, #1\n\t" - "orr r4, r4, r6\n\t" - "ldr r7, [%[r], #8]\n\t" - "str r4, [%[r], #0]\n\t" - "lsl r6, r7, #31\n\t" - "lsr r7, r7, #1\n\t" - "orr r5, r5, r6\n\t" - "ldr r4, [%[r], #12]\n\t" - "str r5, [%[r], #4]\n\t" - "lsl r6, r4, #31\n\t" - "lsr r4, r4, #1\n\t" - "orr r7, r7, r6\n\t" - "ldr r5, [%[r], #16]\n\t" - "str r7, [%[r], #8]\n\t" - "lsl r6, r5, #31\n\t" - "lsr r5, r5, #1\n\t" - "orr r4, r4, r6\n\t" - "ldr r7, [%[r], #20]\n\t" - "str r4, [%[r], #12]\n\t" - "lsl r6, r7, #31\n\t" - "lsr r7, r7, #1\n\t" - "orr r5, r5, r6\n\t" - "ldr r4, [%[r], #24]\n\t" - "str r5, [%[r], #16]\n\t" - "lsl r6, r4, #31\n\t" - "lsr r4, r4, #1\n\t" - "orr r7, r7, r6\n\t" - "ldr r5, [%[r], #28]\n\t" - "str r7, [%[r], #20]\n\t" - "lsl r6, r5, #31\n\t" - "lsr r5, r5, #1\n\t" - "orr r4, r4, r6\n\t" - "ldr r7, [%[r], #32]\n\t" - "str r4, [%[r], #24]\n\t" - "lsl r6, r7, #31\n\t" - "lsr r7, r7, #1\n\t" - "orr r5, r5, r6\n\t" - "ldr r4, [%[r], #36]\n\t" - "str r5, [%[r], #28]\n\t" - "lsl r6, r4, #31\n\t" - "lsr r4, r4, #1\n\t" - "orr r7, r7, r6\n\t" - "ldr r5, [%[r], #40]\n\t" - "str r7, [%[r], #32]\n\t" - "lsl r6, r5, #31\n\t" - "lsr r5, r5, #1\n\t" - "orr r4, r4, r6\n\t" - "ldr r7, [%[r], #44]\n\t" - "str r4, [%[r], #36]\n\t" - "lsl r6, r7, #31\n\t" - "lsr r7, r7, #1\n\t" - "orr r5, r5, r6\n\t" - "orr r7, r7, r3\n\t" - "str r5, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" + "ldr r3, [%[a]]\n\t" +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else + "lsl r3, r3, #31\n\t" +#endif + "beq L_sp_384_div2_mod_12_no_add_%=\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[m]]\n\t" + "ldr r7, [%[m], #4]\n\t" +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[m], #8]\n\t" + "ldr r7, [%[m], #12]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[m], #16]\n\t" + "ldr r7, [%[m], #20]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[m], #24]\n\t" + "ldr r7, [%[m], #28]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[m], #32]\n\t" + "ldr r7, [%[m], #36]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[m], #40]\n\t" + "ldr r7, [%[m], #44]\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "lsls r3, r3, #31\n\t" +#else + "lsl r3, r3, #31\n\t" +#endif + "b L_sp_384_div2_mod_12_div2_%=\n\t" + "\n" + "L_sp_384_div2_mod_12_no_add_%=: \n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + "ldr r4, [%[a], #4]\n\t" + "ldr r5, [%[a], #8]\n\t" + "str r4, [%[r], #4]\n\t" + "str r5, [%[r], #8]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #12]\n\t" + "str r5, [%[r], #16]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" + "ldr r4, [%[a], #20]\n\t" + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #20]\n\t" + "str r5, [%[r], #24]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" + "ldr r4, [%[a], #28]\n\t" + "ldr r5, [%[a], #32]\n\t" + "str r4, [%[r], #28]\n\t" + "str r5, [%[r], #32]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" + "ldr r4, [%[a], #36]\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #36]\n\t" + "str r5, [%[r], #40]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" + "\n" + "L_sp_384_div2_mod_12_div2_%=: \n\t" + "ldr r4, [%[r]]\n\t" + "ldr r5, [%[r], #4]\n\t" +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else + "lsr r4, r4, #1\n\t" +#endif +#ifdef __clang__ + "lsls r6, r5, #31\n\t" +#else + "lsl r6, r5, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, #1\n\t" +#else + "lsr r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r7, [%[r], #8]\n\t" + "str r4, [%[r]]\n\t" +#ifdef __clang__ + "lsls r6, r7, #31\n\t" +#else + "lsl r6, r7, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #1\n\t" +#else + "lsr r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r4, [%[r], #12]\n\t" + "str r5, [%[r], #4]\n\t" +#ifdef __clang__ + "lsls r6, r4, #31\n\t" +#else + "lsl r6, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else + "lsr r4, r4, #1\n\t" +#endif +#ifdef __clang__ + "orrs r7, r6\n\t" +#else + "orr r7, r6\n\t" +#endif + "ldr r5, [%[r], #16]\n\t" + "str r7, [%[r], #8]\n\t" +#ifdef __clang__ + "lsls r6, r5, #31\n\t" +#else + "lsl r6, r5, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, #1\n\t" +#else + "lsr r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r7, [%[r], #20]\n\t" + "str r4, [%[r], #12]\n\t" +#ifdef __clang__ + "lsls r6, r7, #31\n\t" +#else + "lsl r6, r7, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #1\n\t" +#else + "lsr r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r4, [%[r], #24]\n\t" + "str r5, [%[r], #16]\n\t" +#ifdef __clang__ + "lsls r6, r4, #31\n\t" +#else + "lsl r6, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else + "lsr r4, r4, #1\n\t" +#endif +#ifdef __clang__ + "orrs r7, r6\n\t" +#else + "orr r7, r6\n\t" +#endif + "ldr r5, [%[r], #28]\n\t" + "str r7, [%[r], #20]\n\t" +#ifdef __clang__ + "lsls r6, r5, #31\n\t" +#else + "lsl r6, r5, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, #1\n\t" +#else + "lsr r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r7, [%[r], #32]\n\t" + "str r4, [%[r], #24]\n\t" +#ifdef __clang__ + "lsls r6, r7, #31\n\t" +#else + "lsl r6, r7, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #1\n\t" +#else + "lsr r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif + "ldr r4, [%[r], #36]\n\t" + "str r5, [%[r], #28]\n\t" +#ifdef __clang__ + "lsls r6, r4, #31\n\t" +#else + "lsl r6, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else + "lsr r4, r4, #1\n\t" +#endif +#ifdef __clang__ + "orrs r7, r6\n\t" +#else + "orr r7, r6\n\t" +#endif + "ldr r5, [%[r], #40]\n\t" + "str r7, [%[r], #32]\n\t" +#ifdef __clang__ + "lsls r6, r5, #31\n\t" +#else + "lsl r6, r5, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r5, r5, #1\n\t" +#else + "lsr r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "orrs r4, r6\n\t" +#else + "orr r4, r6\n\t" +#endif + "ldr r7, [%[r], #44]\n\t" + "str r4, [%[r], #36]\n\t" +#ifdef __clang__ + "lsls r6, r7, #31\n\t" +#else + "lsl r6, r7, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #1\n\t" +#else + "lsr r7, r7, #1\n\t" +#endif +#ifdef __clang__ + "orrs r5, r6\n\t" +#else + "orr r5, r6\n\t" +#endif +#ifdef __clang__ + "orrs r7, r3\n\t" +#else + "orr r7, r3\n\t" +#endif + "str r5, [%[r], #40]\n\t" + "str r7, [%[r], #44]\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r3" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } static int sp_384_num_bits_12(sp_digit* a) { - int r = 0; - static const byte table[256] = { + static const byte sp_num_bits_table[256] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -29862,508 +50554,1112 @@ static int sp_384_num_bits_12(sp_digit* a) 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }; - + const byte* table = sp_num_bits_table; __asm__ __volatile__ ( - "mov r6, #0xff\n\t" - "ldr r3, [%[a], #44]\n\t" - "cmp r3, #0\n\t" - "beq 11f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 133f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #121\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n133:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 132f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #113\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n132:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 131f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #105\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n131:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 130f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #97\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n130:\n\t" - "b 13f\n\t" - "\n11:\n\t" - "ldr r3, [%[a], #40]\n\t" - "cmp r3, #0\n\t" - "beq 10f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 123f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #89\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n123:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 122f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #81\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n122:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 121f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #73\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n121:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 120f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #65\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n120:\n\t" - "b 13f\n\t" - "\n10:\n\t" - "ldr r3, [%[a], #36]\n\t" - "cmp r3, #0\n\t" - "beq 9f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 113f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #57\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n113:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 112f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #49\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n112:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 111f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #41\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n111:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 110f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #33\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n110:\n\t" - "b 13f\n\t" - "\n9:\n\t" - "ldr r3, [%[a], #32]\n\t" - "cmp r3, #0\n\t" - "beq 8f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 103f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #25\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n103:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 102f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #17\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n102:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 101f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #9\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n101:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 100f\n\t" - "mov %[r], #255\n\t" - "add %[r], %[r], #1\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n100:\n\t" - "b 13f\n\t" - "\n8:\n\t" - "ldr r3, [%[a], #28]\n\t" - "cmp r3, #0\n\t" - "beq 7f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 93f\n\t" - "mov %[r], #248\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n93:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 92f\n\t" - "mov %[r], #240\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n92:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 91f\n\t" - "mov %[r], #232\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n91:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 90f\n\t" - "mov %[r], #224\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n90:\n\t" - "b 13f\n\t" - "\n7:\n\t" - "ldr r3, [%[a], #24]\n\t" - "cmp r3, #0\n\t" - "beq 6f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 83f\n\t" - "mov %[r], #216\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n83:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 82f\n\t" - "mov %[r], #208\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n82:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 81f\n\t" - "mov %[r], #200\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n81:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 80f\n\t" - "mov %[r], #192\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n80:\n\t" - "b 13f\n\t" - "\n6:\n\t" - "ldr r3, [%[a], #20]\n\t" - "cmp r3, #0\n\t" - "beq 5f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 73f\n\t" - "mov %[r], #184\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n73:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 72f\n\t" - "mov %[r], #176\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n72:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 71f\n\t" - "mov %[r], #168\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n71:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 70f\n\t" - "mov %[r], #160\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n70:\n\t" - "b 13f\n\t" - "\n5:\n\t" - "ldr r3, [%[a], #16]\n\t" - "cmp r3, #0\n\t" - "beq 4f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 63f\n\t" - "mov %[r], #152\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n63:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 62f\n\t" - "mov %[r], #144\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n62:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 61f\n\t" - "mov %[r], #136\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n61:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 60f\n\t" - "mov %[r], #128\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n60:\n\t" - "b 13f\n\t" - "\n4:\n\t" - "ldr r3, [%[a], #12]\n\t" - "cmp r3, #0\n\t" - "beq 3f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 53f\n\t" - "mov %[r], #120\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n53:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 52f\n\t" - "mov %[r], #112\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n52:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 51f\n\t" - "mov %[r], #104\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n51:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 50f\n\t" - "mov %[r], #96\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n50:\n\t" - "b 13f\n\t" - "\n3:\n\t" - "ldr r3, [%[a], #8]\n\t" - "cmp r3, #0\n\t" - "beq 2f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 43f\n\t" - "mov %[r], #88\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n43:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 42f\n\t" - "mov %[r], #80\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n42:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 41f\n\t" - "mov %[r], #72\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n41:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 40f\n\t" - "mov %[r], #64\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n40:\n\t" - "b 13f\n\t" - "\n2:\n\t" - "ldr r3, [%[a], #4]\n\t" - "cmp r3, #0\n\t" - "beq 1f\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 33f\n\t" - "mov %[r], #56\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n33:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 32f\n\t" - "mov %[r], #48\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n32:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 31f\n\t" - "mov %[r], #40\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n31:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 30f\n\t" - "mov %[r], #32\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n30:\n\t" - "b 13f\n\t" - "\n1:\n\t" - "ldr r3, [%[a], #0]\n\t" - "lsr r5, r3, #24\n\t" - "cmp r5, #0\n\t" - "beq 23f\n\t" - "mov %[r], #24\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n23:\n\t" - "lsr r5, r3, #16\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 22f\n\t" - "mov %[r], #16\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n22:\n\t" - "lsr r5, r3, #8\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 21f\n\t" - "mov %[r], #8\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n21:\n\t" - "lsr r5, r3, #0\n\t" - "and r5, r6\n\t" - "cmp r5, #0\n\t" - "beq 20f\n\t" - "mov %[r], #0\n\t" - "ldrb r4, [%[table], r5]\n\t" - "add %[r], %[r], r4\n\t" - "b 13f\n\t" - "\n20:\n\t" - "\n13:\n\t" - : [r] "+r" (r) - : [a] "r" (a), [table] "r" (table) - : "r3", "r4", "r5", "r6" + "movs r6, #0xff\n\t" + "ldr r3, [%[a], #44]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_133_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x79\n\t" +#else + "add r2, r2, #0x79\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_133_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_132_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x71\n\t" +#else + "add r2, r2, #0x71\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_132_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_131_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x69\n\t" +#else + "add r2, r2, #0x69\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_131_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_130_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x61\n\t" +#else + "add r2, r2, #0x61\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_130_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_%=: \n\t" + "ldr r3, [%[a], #40]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_123_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x59\n\t" +#else + "add r2, r2, #0x59\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_123_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_122_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x51\n\t" +#else + "add r2, r2, #0x51\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_122_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_121_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x49\n\t" +#else + "add r2, r2, #0x49\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_121_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_120_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #0x41\n\t" +#else + "add r2, r2, #0x41\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_120_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_%=: \n\t" + "ldr r3, [%[a], #36]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_113_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #57\n\t" +#else + "add r2, r2, #57\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_113_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_112_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #49\n\t" +#else + "add r2, r2, #49\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_112_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_111_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #41\n\t" +#else + "add r2, r2, #41\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_111_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_110_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #33\n\t" +#else + "add r2, r2, #33\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_110_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_%=: \n\t" + "ldr r3, [%[a], #32]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_103_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #25\n\t" +#else + "add r2, r2, #25\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_103_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_102_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #17\n\t" +#else + "add r2, r2, #17\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_102_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_101_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #9\n\t" +#else + "add r2, r2, #9\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_101_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_100_%=\n\t" + "movs r2, #0xff\n\t" +#ifdef __clang__ + "adds r2, r2, #1\n\t" +#else + "add r2, r2, #1\n\t" +#endif + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_100_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_%=: \n\t" + "ldr r3, [%[a], #28]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_93_%=\n\t" + "movs r2, #0xf8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_93_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_92_%=\n\t" + "movs r2, #0xf0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_92_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_91_%=\n\t" + "movs r2, #0xe8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_91_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_90_%=\n\t" + "movs r2, #0xe0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_90_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_%=: \n\t" + "ldr r3, [%[a], #24]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_83_%=\n\t" + "movs r2, #0xd8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_83_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_82_%=\n\t" + "movs r2, #0xd0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_82_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_81_%=\n\t" + "movs r2, #0xc8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_81_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_80_%=\n\t" + "movs r2, #0xc0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_80_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_%=: \n\t" + "ldr r3, [%[a], #20]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_73_%=\n\t" + "movs r2, #0xb8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_73_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_72_%=\n\t" + "movs r2, #0xb0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_72_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_71_%=\n\t" + "movs r2, #0xa8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_71_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_70_%=\n\t" + "movs r2, #0xa0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_70_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_%=: \n\t" + "ldr r3, [%[a], #16]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_63_%=\n\t" + "movs r2, #0x98\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_63_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_62_%=\n\t" + "movs r2, #0x90\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_62_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_61_%=\n\t" + "movs r2, #0x88\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_61_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_60_%=\n\t" + "movs r2, #0x80\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_60_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_%=: \n\t" + "ldr r3, [%[a], #12]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_53_%=\n\t" + "movs r2, #0x78\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_53_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_52_%=\n\t" + "movs r2, #0x70\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_52_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_51_%=\n\t" + "movs r2, #0x68\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_51_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_50_%=\n\t" + "movs r2, #0x60\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_50_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_%=: \n\t" + "ldr r3, [%[a], #8]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_43_%=\n\t" + "movs r2, #0x58\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_43_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_42_%=\n\t" + "movs r2, #0x50\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_42_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_41_%=\n\t" + "movs r2, #0x48\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_41_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_40_%=\n\t" + "movs r2, #0x40\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_40_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_%=: \n\t" + "ldr r3, [%[a], #4]\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_%=\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_33_%=\n\t" + "movs r2, #56\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_33_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_32_%=\n\t" + "movs r2, #48\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_32_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_31_%=\n\t" + "movs r2, #40\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_31_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_30_%=\n\t" + "movs r2, #32\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_30_%=: \n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_%=: \n\t" + "ldr r3, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r5, r3, #24\n\t" +#else + "lsr r5, r3, #24\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_23_%=\n\t" + "movs r2, #24\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_23_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #16\n\t" +#else + "lsr r5, r3, #16\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_22_%=\n\t" + "movs r2, #16\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_22_%=: \n\t" +#ifdef __clang__ + "lsrs r5, r3, #8\n\t" +#else + "lsr r5, r3, #8\n\t" +#endif +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_21_%=\n\t" + "movs r2, #8\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_21_%=: \n\t" + "movs r5, r3\n\t" +#ifdef __clang__ + "ands r5, r6\n\t" +#else + "and r5, r6\n\t" +#endif + "cmp r5, #0\n\t" + "beq L_sp_384_num_bits_12_20_%=\n\t" + "movs r2, #0\n\t" + "ldrb r4, [%[table], r5]\n\t" +#ifdef __clang__ + "adds r2, r2, r4\n\t" +#else + "add r2, r2, r4\n\t" +#endif + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_20_%=: \n\t" + "\n" + "L_sp_384_num_bits_12_13_%=: \n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [table] "+r" (table) + : + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return r; + return (uint32_t)(size_t)a; } /* Non-constant time modular inversion. @@ -31411,95 +52707,243 @@ typedef struct sp_point_1024 { SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[16 * 2]; + sp_digit t[16 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #64\n\t" - "add r6, r9\n\t" + "movs r6, #0x40\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #60\n\t" + "\n" + "L_sp_1024_mul_16_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #60\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_1024_mul_16_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_1024_mul_16_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_1024_mul_16_mul_%=\n\t" + "\n" + "L_sp_1024_mul_16_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #120\n\t" + "movs r6, #0x78\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_1024_mul_16_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -31510,142 +52954,420 @@ SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #128\n\t" + "movs r6, #0x80\n\t" +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #60\n\t" + "\n" + "L_sp_1024_sqr_16_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #60\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_1024_sqr_16_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_1024_sqr_16_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_1024_sqr_16_done_sqr_%=\n\t" + "\n" + "L_sp_1024_sqr_16_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #64\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_1024_sqr_16_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #0x40\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_1024_sqr_16_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_1024_sqr_16_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_1024_sqr_16_mul_%=\n\t" + "\n" + "L_sp_1024_sqr_16_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #120\n\t" + "movs r6, #0x78\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_1024_sqr_16_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #124\n\t" - "\n4:\n\t" + "movs r3, #0x7c\n\t" + "\n" + "L_sp_1024_sqr_16_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #128\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_1024_sqr_16_store_%=\n\t" + "movs r6, #0x80\n\t" + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -31659,230 +53381,426 @@ SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) SP_NOINLINE static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } -/* Sub b from a into r. (r = a - b) +/* Sub b from a into a. (a -= b) * - * r A single precision integer. * a A single precision integer. * b A single precision integer. */ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r3, [%[a], #0]\n\t" + "movs r2, #0\n\t" + "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" + "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sub r3, r5\n\t" +#ifdef __clang__ + "subs r3, r3, r5\n\t" +#else + "sub r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" - "str r3, [%[a], #0]\n\t" +#endif + "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6" + : "memory", "r2", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -31894,145 +53812,275 @@ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "add r4, r5\n\t" - "str r4, [%[r], #0]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adds r4, r4, r5\n\t" +#else + "add r4, r4, r5\n\t" +#endif + "str r4, [%[r]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r], #124]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5" + : "memory", "r3", "r4", "r5" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -32137,95 +54185,243 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit tmp[32 * 2]; + sp_digit t[32 * 2]; + sp_digit* tmp = t; __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r11, %[tmp]\n\t" "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" - "mov r6, #128\n\t" - "add r6, r9\n\t" + "movs r6, #0x80\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" - "mov r6, #124\n\t" + "\n" + "L_sp_1024_mul_32_words_%=: \n\t" + "movs %[tmp], #0\n\t" + "movs r5, #0\n\t" + "movs r6, #0x7c\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs %[b], %[b], %[a]\n\t" +#else + "sub %[b], %[b], %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" + "\n" + "L_sp_1024_mul_32_mul_%=: \n\t" "# Multiply Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" - "adc r4, %[r]\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[tmp]\n\t" +#else + "adc r4, %[tmp]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "ldr r7, [%[b]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" - "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[tmp]\n\t" +#else + "adc r5, %[tmp]\n\t" +#endif "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs %[b], %[b], #4\n\t" +#else + "sub %[b], %[b], #4\n\t" +#endif "cmp %[a], r12\n\t" - "beq 3f\n\t" + "beq L_sp_1024_mul_32_done_mul_%=\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "ble 2b\n\t" - "\n3:\n\t" - "mov %[r], r11\n\t" + "ble L_sp_1024_mul_32_mul_%=\n\t" + "\n" + "L_sp_1024_mul_32_done_mul_%=: \n\t" + "mov %[tmp], r11\n\t" "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add r7, #4\n\t" + "str r3, [%[tmp], r7]\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #248\n\t" + "movs r6, #0xf8\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" + "ble L_sp_1024_mul_32_words_%=\n\t" + "str r3, [%[tmp], r7]\n\t" "mov %[a], r9\n\t" "mov %[b], r10\n\t" + : [a] "+r" (a), [b] "+r" (b), [tmp] "+r" (tmp) : - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); - XMEMCPY(r, tmp, sizeof(tmp)); + XMEMCPY(r, t, sizeof(t)); } /* Square a and put result in r. (r = a * a) @@ -32236,144 +54432,430 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "movs r5, #0\n\t" "mov r8, r3\n\t" "mov r11, %[r]\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif +#ifdef __clang__ + "negs r6, r6\n\t" +#else "neg r6, r6\n\t" - "add sp, r6\n\t" +#endif + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" - "\n1:\n\t" - "mov %[r], #0\n\t" - "mov r6, #124\n\t" + "\n" + "L_sp_1024_sqr_32_words_%=: \n\t" + "movs %[r], #0\n\t" + "movs r6, #0x7c\n\t" "mov %[a], r8\n\t" - "sub %[a], r6\n\t" +#ifdef __clang__ + "subs %[a], %[a], r6\n\t" +#else + "sub %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "mvns r6, r6\n\t" +#else "mvn r6, r6\n\t" +#endif +#ifdef __clang__ + "ands %[a], r6\n\t" +#else "and %[a], r6\n\t" +#endif "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" - "\n2:\n\t" +#ifdef __clang__ + "subs r2, r2, %[a]\n\t" +#else + "sub r2, r2, %[a]\n\t" +#endif + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" + "\n" + "L_sp_1024_sqr_32_mul_%=: \n\t" "cmp r2, %[a]\n\t" - "beq 4f\n\t" + "beq L_sp_1024_sqr_32_sqr_%=\n\t" "# Multiply * 2: Start\n\t" "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r7, [r2]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Multiply * 2: Done\n\t" - "bal 5f\n\t" - "\n4:\n\t" + "bal L_sp_1024_sqr_32_done_sqr_%=\n\t" + "\n" + "L_sp_1024_sqr_32_sqr_%=: \n\t" "# Square: Start\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r6\n\t" +#else "mul r6, r6\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "muls r7, r7\n\t" +#else "mul r7, r7\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #15\n\t" +#else "lsr r7, r6, #15\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #17\n\t" +#else "lsl r6, r6, #17\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# Square: Done\n\t" - "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" - "mov r6, #128\n\t" - "add r6, r9\n\t" + "\n" + "L_sp_1024_sqr_32_done_sqr_%=: \n\t" +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, #4\n\t" +#else + "sub r2, r2, #4\n\t" +#endif + "movs r6, #0x80\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" - "beq 3f\n\t" + "beq L_sp_1024_sqr_32_done_mul_%=\n\t" "cmp %[a], r2\n\t" - "bgt 3f\n\t" + "bgt L_sp_1024_sqr_32_done_mul_%=\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_1024_sqr_32_mul_%=\n\t" + "\n" + "L_sp_1024_sqr_32_done_mul_%=: \n\t" "mov %[r], r10\n\t" "mov r7, r8\n\t" "str r3, [%[r], r7]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" - "add r7, #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "mov r8, r7\n\t" - "mov r6, #248\n\t" + "movs r6, #0xf8\n\t" "cmp r7, r6\n\t" - "ble 1b\n\t" + "ble L_sp_1024_sqr_32_words_%=\n\t" "mov %[a], r9\n\t" "str r3, [%[r], r7]\n\t" "mov %[r], r11\n\t" "mov %[a], r10\n\t" - "mov r3, #252\n\t" - "\n4:\n\t" + "movs r3, #0xfc\n\t" + "\n" + "L_sp_1024_sqr_32_store_%=: \n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "sub r3, #4\n\t" - "bge 4b\n\t" - "mov r6, #1\n\t" - "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" +#ifdef __clang__ + "subs r3, r3, #4\n\t" +#else + "sub r3, r3, #4\n\t" +#endif + "bge L_sp_1024_sqr_32_store_%=\n\t" + "movs r6, #0xff\n\t" +#ifdef __clang__ + "adds r6, r6, #1\n\t" +#else + "add r6, r6, #1\n\t" +#endif + "add sp, sp, r6\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); } @@ -32473,32 +54955,61 @@ static const sp_point_1024 p1024_base = { SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, #128\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r7, %[a]\n\t" + "movs r2, #0\n\t" +#ifdef __clang__ + "adds r7, r7, #0x80\n\t" +#else + "add r7, r7, #0x80\n\t" +#endif + "\n" + "L_sp_1024_sub_in_place_32_words_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r2\n\t" +#else + "sub r5, r5, r2\n\t" +#endif "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" +#ifdef __clang__ + "sbcs r3, r5\n\t" +#else "sbc r3, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" +#ifdef __clang__ + "sbcs r2, r2\n\t" +#else + "sbc r2, r2\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #8\n\t" +#else + "add %[a], %[a], #8\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #8\n\t" +#else + "add %[b], %[b], #8\n\t" +#endif "cmp %[a], r7\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_1024_sub_in_place_32_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -32513,30 +55024,50 @@ SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a, SP_NOINLINE static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #128\n\t" + "movs r4, #0\n\t" + "movs r5, #0x80\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_1024_cond_sub_32_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "sbcs r5, r6\n\t" +#else "sbc r5, r6\n\t" - "sbc %[c], %[c]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_1024_cond_sub_32_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } #ifdef WOLFSSL_SP_SMALL @@ -32549,32 +55080,64 @@ SP_NOINLINE static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "mov r7, #0\n\t" - "add r6, #128\n\t" - "sub r7, #1\n\t" - "\n1:\n\t" - "add %[c], r7\n\t" + "movs r6, %[a]\n\t" + "movs r7, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #0x80\n\t" +#else + "add r6, r6, #0x80\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, #1\n\t" +#else + "sub r7, r7, #1\n\t" +#endif + "\n" + "L_sp_1024_add_32_word_%=: \n\t" +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "adcs r4, r5\n\t" +#else "adc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_1024_add_32_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -32588,59 +55151,184 @@ SP_NOINLINE static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "mov r6, #128\n\t" - "add r6, %[a]\n\t" + "movs r6, #0x80\n\t" +#ifdef __clang__ + "adds r6, r6, %[a]\n\t" +#else + "add r6, r6, %[a]\n\t" +#endif "mov r8, %[r]\n\t" "mov r9, r6\n\t" - "mov r3, #0\n\t" - "mov r4, #0\n\t" - "1:\n\t" - "mov %[r], #0\n\t" - "mov r5, #0\n\t" + "movs r3, #0\n\t" + "movs r4, #0\n\t" + "\n" + "L_sp_1024_mul_d_32_%=: \n\t" + "movs %[r], #0\n\t" + "movs r5, #0\n\t" "# A[] * B\n\t" "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r3, r7\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r7\n\t" +#else + "add r3, r3, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[r]\n\t" +#else "adc r4, %[r]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "ldr r6, [%[a]]\n\t" +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, %[b], #16\n\t" +#else "lsr r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif +#ifdef __clang__ + "lsls r7, %[b], #16\n\t" +#else "lsl r7, %[b], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r3, r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r6\n\t" +#else + "add r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[r]\n\t" +#else "adc r5, %[r]\n\t" +#endif "# A[] * B - Done\n\t" "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" - "mov r3, r4\n\t" - "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "movs r3, r4\n\t" + "movs r4, r5\n\t" +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" - "blt 1b\n\t" + "blt L_sp_1024_mul_d_32_%=\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); } @@ -32657,135 +55345,512 @@ SP_NOINLINE static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( + "movs r3, #0\n\t" +#ifdef __clang__ + "lsrs r5, %[div], #1\n\t" +#else "lsr r5, %[div], #1\n\t" - "add r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, #1\n\t" +#else + "add r5, r5, #1\n\t" +#endif "mov r8, %[d0]\n\t" "mov r9, %[d1]\n\t" "# Do top 32\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif + "\n\t" + "movs r4, #29\n\t" + "\n" + "L_div_1024_word_32_loop_%=: \n\t" +#ifdef __clang__ + "lsls %[d0], %[d0], #1\n\t" +#else "lsl %[d0], %[d0], #1\n\t" +#endif +#ifdef __clang__ + "adcs %[d1], %[d1]\n\t" +#else "adc %[d1], %[d1]\n\t" - "mov r6, r5\n\t" - "sub r6, %[d1]\n\t" +#endif + "movs r6, r5\n\t" +#ifdef __clang__ + "subs r6, r6, %[d1]\n\t" +#else + "sub r6, r6, %[d1]\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "add %[r], %[r]\n\t" - "sub %[r], r6\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif +#ifdef __clang__ + "ands r6, r5\n\t" +#else "and r6, r5\n\t" - "sub %[d1], r6\n\t" - "sub r4, #1\n\t" - "bpl 1b\n\t" - "mov r7, #0\n\t" - "add %[r], %[r]\n\t" - "add %[r], #1\n\t" +#endif +#ifdef __clang__ + "subs %[d1], %[d1], r6\n\t" +#else + "sub %[d1], %[d1], r6\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, #1\n\t" +#else + "sub r4, r4, #1\n\t" +#endif + "bpl L_div_1024_word_32_loop_%=\n\t" + "movs r7, #0\n\t" +#ifdef __clang__ + "adds r3, r3, r3\n\t" +#else + "add r3, r3, r3\n\t" +#endif +#ifdef __clang__ + "adds r3, r3, #1\n\t" +#else + "add r3, r3, #1\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" - "sub %[d1], r4\n\t" - "mov r4, %[d1]\n\t" +#ifdef __clang__ + "subs %[d1], %[d1], r4\n\t" +#else + "sub %[d1], %[d1], r4\n\t" +#endif + "movs r4, %[d1]\n\t" "mov %[d1], r9\n\t" +#ifdef __clang__ + "sbcs %[d1], r5\n\t" +#else "sbc %[d1], r5\n\t" - "mov r5, %[d1]\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, %[d1]\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif "# r * div - Start\n\t" - "lsl %[d1], %[r], #16\n\t" +#ifdef __clang__ + "lsls %[d1], r3, #16\n\t" +#else + "lsl %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "lsls r4, %[div], #16\n\t" +#else "lsl r4, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], %[d1], #16\n\t" +#else "lsr %[d1], %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #16\n\t" +#else "lsr r4, r4, #16\n\t" +#endif +#ifdef __clang__ + "muls r4, %[d1]\n\t" +#else "mul r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[div], #16\n\t" +#else "lsr r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r5, %[d1], #16\n\t" +#else "lsr r5, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" - "lsr %[d1], %[r], #16\n\t" +#endif +#ifdef __clang__ + "lsrs %[d1], r3, #16\n\t" +#else + "lsr %[d1], r3, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, %[d1]\n\t" +#else "mul r6, %[d1]\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "lsls r6, %[div], #16\n\t" +#else "lsl r6, %[div], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "muls %[d1], r6\n\t" +#else "mul %[d1], r6\n\t" +#endif +#ifdef __clang__ + "lsrs r6, %[d1], #16\n\t" +#else "lsr r6, %[d1], #16\n\t" +#endif +#ifdef __clang__ + "lsls %[d1], %[d1], #16\n\t" +#else "lsl %[d1], %[d1], #16\n\t" - "add r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, %[d1]\n\t" +#else + "add r4, r4, %[d1]\n\t" +#endif +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" +#endif "# r * div - Done\n\t" "mov %[d1], r8\n\t" "mov r6, r9\n\t" +#ifdef __clang__ + "subs r4, %[d1], r4\n\t" +#else "sub r4, %[d1], r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r5\n\t" +#else "sbc r6, r5\n\t" - "mov r5, r6\n\t" - "add %[r], r5\n\t" - "mov r6, %[div]\n\t" - "sub r6, r4\n\t" +#endif + "movs r5, r6\n\t" +#ifdef __clang__ + "adds r3, r3, r5\n\t" +#else + "add r3, r3, r5\n\t" +#endif + "movs r6, %[div]\n\t" +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else "sbc r6, r6\n\t" - "sub %[r], r6\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r7", "r6", "r8", "r9" +#endif +#ifdef __clang__ + "subs r3, r3, r6\n\t" +#else + "sub r3, r3, r6\n\t" +#endif + "movs %[d1], r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - return r; + return (uint32_t)(size_t)d1; } /* AND m into each word of a and store in r. @@ -32827,38 +55892,93 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) */ SP_NOINLINE static int32_t sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) { - sp_digit r = 0; - - __asm__ __volatile__ ( - "mov r3, #0\n\t" + "movs r2, #0\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "mvns r3, r3\n\t" +#else "mvn r3, r3\n\t" - "mov r6, #124\n\t" - "1:\n\t" +#endif + "movs r6, #0x7c\n\t" + "\n" + "L_sp_1024_cmp_32_words_%=: \n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" +#ifdef __clang__ + "ands r7, r3\n\t" +#else "and r7, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else "and r5, r3\n\t" - "mov r4, r7\n\t" - "sub r7, r5\n\t" +#endif + "movs r4, r7\n\t" +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "add %[r], r7\n\t" +#endif +#ifdef __clang__ + "adds r2, r2, r7\n\t" +#else + "add r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r5, r4\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r4\n\t" +#else + "sub r5, r5, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else "sbc r7, r7\n\t" - "sub %[r], r7\n\t" +#endif +#ifdef __clang__ + "subs r2, r2, r7\n\t" +#else + "sub r2, r2, r7\n\t" +#endif +#ifdef __clang__ + "mvns r7, r7\n\t" +#else "mvn r7, r7\n\t" +#endif +#ifdef __clang__ + "ands r3, r7\n\t" +#else "and r3, r7\n\t" - "sub r6, #4\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, #4\n\t" +#else + "sub r6, r6, #4\n\t" +#endif "cmp r6, #0\n\t" - "bge 1b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + "bge L_sp_1024_cmp_32_words_%=\n\t" + "movs %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7" ); - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -33189,141 +56309,393 @@ SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) { sp_digit ca = 0; - __asm__ __volatile__ ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" - "mov r14, %[m]\n\t" + "mov lr, %[m]\n\t" "mov r9, %[a]\n\t" - "mov r4, #0\n\t" + "movs r4, #0\n\t" "# i = 0\n\t" "mov r11, r4\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "mov %[ca], #0\n\t" + "\n" + "L_sp_1024_mont_reduce_32_mod_%=: \n\t" + "movs r5, #0\n\t" + "movs %[ca], #0\n\t" "# mu = a[i] * mp\n\t" "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" +#ifdef __clang__ + "muls %[mp], %[a]\n\t" +#else "mul %[mp], %[a]\n\t" - "mov %[m], r14\n\t" +#endif + "mov %[m], lr\n\t" "mov r10, r9\n\t" - "\n2:\n\t" + "\n" + "L_sp_1024_mont_reduce_32_word_%=: \n\t" "# a[i+j] += m[j] * mu\n\t" "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" - "mov %[ca], #0\n\t" - "mov r4, r5\n\t" - "mov r5, #0\n\t" + "movs %[ca], #0\n\t" + "movs r4, r5\n\t" + "movs r5, #0\n\t" "# Multiply m[j] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add %[a], r7\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r7\n\t" +#else + "add %[a], %[a], r7\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add %[a], r6\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], r6\n\t" +#else + "add %[a], %[a], r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else "adc r5, r7\n\t" +#endif "# Multiply m[j] and mu - Done\n\t" - "add r4, %[a]\n\t" +#ifdef __clang__ + "adds r4, r4, %[a]\n\t" +#else + "add r4, r4, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r5, %[ca]\n\t" +#else "adc r5, %[ca]\n\t" +#endif "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" - "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" - "mov r4, #124\n\t" - "add r4, r9\n\t" + "movs r6, #4\n\t" +#ifdef __clang__ + "adds %[m], %[m], #4\n\t" +#else + "add %[m], %[m], #4\n\t" +#endif + "add r10, r10, r6\n\t" + "movs r4, #0x7c\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" - "blt 2b\n\t" + "blt L_sp_1024_mont_reduce_32_word_%=\n\t" "# a[i+31] += m[31] * mu\n\t" - "mov %[ca], #0\n\t" + "movs %[ca], #0\n\t" "mov r4, r12\n\t" - "mov %[a], #0\n\t" + "movs %[a], #0\n\t" "# Multiply m[31] and mu - Start\n\t" "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r6, %[mp], #16\n\t" +#else "lsl r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r6, r6, #16\n\t" +#else "lsr r6, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r5, r7\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r7\n\t" +#else + "add r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "adcs r4, %[ca]\n\t" +#else "adc r4, %[ca]\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsrs r6, %[mp], #16\n\t" +#else "lsr r6, %[mp], #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r7, r6\n\t" +#else "mul r7, r6\n\t" - "add r4, r7\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r7\n\t" +#else + "add r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "ldr r7, [%[m]]\n\t" +#ifdef __clang__ + "lsls r7, r7, #16\n\t" +#else "lsl r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r7, #16\n\t" +#else "lsr r7, r7, #16\n\t" +#endif +#ifdef __clang__ + "muls r6, r7\n\t" +#else "mul r6, r7\n\t" +#endif +#ifdef __clang__ + "lsrs r7, r6, #16\n\t" +#else "lsr r7, r6, #16\n\t" +#endif +#ifdef __clang__ + "lsls r6, r6, #16\n\t" +#else "lsl r6, r6, #16\n\t" - "add r5, r6\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r6\n\t" +#else + "add r5, r5, r6\n\t" +#endif +#ifdef __clang__ + "adcs r4, r7\n\t" +#else "adc r4, r7\n\t" +#endif +#ifdef __clang__ + "adcs %[a], %[ca]\n\t" +#else "adc %[a], %[ca]\n\t" +#endif "# Multiply m[31] and mu - Done\n\t" - "mov %[ca], %[a]\n\t" + "movs %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" - "mov r6, #0\n\t" - "add r5, %[a]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adds r5, r5, %[a]\n\t" +#else + "add r5, r5, %[a]\n\t" +#endif +#ifdef __clang__ + "adcs r7, r4\n\t" +#else "adc r7, r4\n\t" +#endif +#ifdef __clang__ + "adcs %[ca], r6\n\t" +#else "adc %[ca], r6\n\t" +#endif "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" "# i += 1\n\t" - "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "movs r6, #4\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" - "mov r4, #128\n\t" + "movs r4, #0x80\n\t" "cmp r11, r4\n\t" - "blt 1b\n\t" - "ldr r6, [%[m]]\n\t" - "neg %[ca], %[ca]\n\t" - "sub r6, r7\n\t" - "sbc r6, r6\n\t" - "orr %[ca], r6\n\t" - "mov %[m], r14\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + "blt L_sp_1024_mont_reduce_32_mod_%=\n\t" + "ldr r6, [%[m]]\n\t" +#ifdef __clang__ + "negs %[ca], %[ca]\n\t" +#else + "neg %[ca], %[ca]\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, r7\n\t" +#else + "sub r6, r6, r7\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r6\n\t" +#else + "sbc r6, r6\n\t" +#endif +#ifdef __clang__ + "orrs %[ca], r6\n\t" +#else + "orr %[ca], r6\n\t" +#endif + "mov %[m], lr\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp), [ca] "+r" (ca) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - sp_1024_cond_sub_32(a - 32, a, m, ca); } @@ -33471,303 +56843,707 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" + "ldr r6, [%[b]]\n\t" "ldr r7, [%[b], #4]\n\t" - "add r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[b], #32]\n\t" "ldr r7, [%[b], #36]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "ldr r6, [%[b], #40]\n\t" "ldr r7, [%[b], #44]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[b], #48]\n\t" "ldr r7, [%[b], #52]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[a], #60]\n\t" "ldr r6, [%[b], #56]\n\t" "ldr r7, [%[b], #60]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[b], #64]\n\t" "ldr r7, [%[b], #68]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[a], #76]\n\t" "ldr r6, [%[b], #72]\n\t" "ldr r7, [%[b], #76]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[b], #80]\n\t" "ldr r7, [%[b], #84]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[a], #92]\n\t" "ldr r6, [%[b], #88]\n\t" "ldr r7, [%[b], #92]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[b], #96]\n\t" "ldr r7, [%[b], #100]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[a], #108]\n\t" "ldr r6, [%[b], #104]\n\t" "ldr r7, [%[b], #108]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[b], #112]\n\t" "ldr r7, [%[b], #116]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[a], #124]\n\t" "ldr r6, [%[b], #120]\n\t" "ldr r7, [%[b], #124]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" - "mov %[b], #0\n\t" - "ldr r7, [%[m], #124]\n\t" - "adc %[b], %[b]\n\t" - "sub r7, r5\n\t" - "neg %[b], %[b]\n\t" - "sbc r7, r7\n\t" - "orr %[b], r7\n\t" - "ldr r4, [%[r], #0]\n\t" + "movs %[b], #0\n\t" + "ldr r7, [%[m], #124]\n\t" +#ifdef __clang__ + "adcs %[b], %[b]\n\t" +#else + "adc %[b], %[b]\n\t" +#endif +#ifdef __clang__ + "subs r7, r7, r5\n\t" +#else + "sub r7, r7, r5\n\t" +#endif +#ifdef __clang__ + "negs %[b], %[b]\n\t" +#else + "neg %[b], %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r7\n\t" +#else + "sbc r7, r7\n\t" +#endif +#ifdef __clang__ + "orrs %[b], r7\n\t" +#else + "orr %[b], r7\n\t" +#endif + "ldr r4, [%[r]]\n\t" "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[m], #0]\n\t" + "ldr r6, [%[m]]\n\t" "ldr r7, [%[m], #4]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sub r4, r6\n\t" - "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[r], #8]\n\t" "ldr r5, [%[r], #12]\n\t" "ldr r6, [%[m], #8]\n\t" "ldr r7, [%[m], #12]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[r], #16]\n\t" "ldr r5, [%[r], #20]\n\t" "ldr r6, [%[m], #16]\n\t" "ldr r7, [%[m], #20]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[r], #24]\n\t" "ldr r5, [%[r], #28]\n\t" "ldr r6, [%[m], #24]\n\t" "ldr r7, [%[m], #28]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[r], #32]\n\t" "ldr r5, [%[r], #36]\n\t" "ldr r6, [%[m], #32]\n\t" "ldr r7, [%[m], #36]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[r], #40]\n\t" "ldr r5, [%[r], #44]\n\t" "ldr r6, [%[m], #40]\n\t" "ldr r7, [%[m], #44]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[r], #48]\n\t" "ldr r5, [%[r], #52]\n\t" "ldr r6, [%[m], #48]\n\t" "ldr r7, [%[m], #52]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[r], #56]\n\t" "ldr r5, [%[r], #60]\n\t" "ldr r6, [%[m], #56]\n\t" "ldr r7, [%[m], #60]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[r], #64]\n\t" "ldr r5, [%[r], #68]\n\t" "ldr r6, [%[m], #64]\n\t" "ldr r7, [%[m], #68]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[r], #72]\n\t" "ldr r5, [%[r], #76]\n\t" "ldr r6, [%[m], #72]\n\t" "ldr r7, [%[m], #76]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[r], #80]\n\t" "ldr r5, [%[r], #84]\n\t" "ldr r6, [%[m], #80]\n\t" "ldr r7, [%[m], #84]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[r], #88]\n\t" "ldr r5, [%[r], #92]\n\t" "ldr r6, [%[m], #88]\n\t" "ldr r7, [%[m], #92]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[r], #96]\n\t" "ldr r5, [%[r], #100]\n\t" "ldr r6, [%[m], #96]\n\t" "ldr r7, [%[m], #100]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[r], #104]\n\t" "ldr r5, [%[r], #108]\n\t" "ldr r6, [%[m], #104]\n\t" "ldr r7, [%[m], #108]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[r], #112]\n\t" "ldr r5, [%[r], #116]\n\t" "ldr r6, [%[m], #112]\n\t" "ldr r7, [%[m], #116]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[r], #120]\n\t" "ldr r5, [%[r], #124]\n\t" "ldr r6, [%[m], #120]\n\t" "ldr r7, [%[m], #124]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -33782,18 +57558,35 @@ SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, cons * a Number to double in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #12]\n\t" - "add r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "adds r4, r4, r4\n\t" +#else + "add r4, r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "str r6, [%[r], #8]\n\t" "str r7, [%[r], #12]\n\t" @@ -33801,10 +57594,26 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[a], #28]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "str r6, [%[r], #24]\n\t" @@ -33813,10 +57622,26 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[a], #40]\n\t" "ldr r7, [%[a], #44]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "str r6, [%[r], #40]\n\t" @@ -33825,10 +57650,26 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[a], #56]\n\t" "ldr r7, [%[a], #60]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "str r6, [%[r], #56]\n\t" @@ -33837,10 +57678,26 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[a], #72]\n\t" "ldr r7, [%[a], #76]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "str r6, [%[r], #72]\n\t" @@ -33849,10 +57706,26 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[a], #88]\n\t" "ldr r7, [%[a], #92]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "str r6, [%[r], #88]\n\t" @@ -33861,10 +57734,26 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[a], #104]\n\t" "ldr r7, [%[a], #108]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "str r6, [%[r], #104]\n\t" @@ -33873,184 +57762,476 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[a], #120]\n\t" "ldr r7, [%[a], #124]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "str r6, [%[r], #120]\n\t" "str r7, [%[r], #124]\n\t" - "mov r3, #0\n\t" - "ldr r4, [%[m], #124]\n\t" - "adc r3, r3\n\t" - "sub r4, r7\n\t" - "neg r3, r3\n\t" - "sbc r4, r4\n\t" - "orr r3, r4\n\t" - "ldr r4, [%[r], #0]\n\t" + "movs r3, #0\n\t" + "ldr r4, [%[m], #124]\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, r7\n\t" +#else + "sub r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "negs r3, r3\n\t" +#else + "neg r3, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif +#ifdef __clang__ + "orrs r3, r4\n\t" +#else + "orr r3, r4\n\t" +#endif + "ldr r4, [%[r]]\n\t" "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[m], #0]\n\t" + "ldr r6, [%[m]]\n\t" "ldr r7, [%[m], #4]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sub r4, r6\n\t" - "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[r], #8]\n\t" "ldr r5, [%[r], #12]\n\t" "ldr r6, [%[m], #8]\n\t" "ldr r7, [%[m], #12]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[r], #16]\n\t" "ldr r5, [%[r], #20]\n\t" "ldr r6, [%[m], #16]\n\t" "ldr r7, [%[m], #20]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[r], #24]\n\t" "ldr r5, [%[r], #28]\n\t" "ldr r6, [%[m], #24]\n\t" "ldr r7, [%[m], #28]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[r], #32]\n\t" "ldr r5, [%[r], #36]\n\t" "ldr r6, [%[m], #32]\n\t" "ldr r7, [%[m], #36]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[r], #40]\n\t" "ldr r5, [%[r], #44]\n\t" "ldr r6, [%[m], #40]\n\t" "ldr r7, [%[m], #44]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[r], #48]\n\t" "ldr r5, [%[r], #52]\n\t" "ldr r6, [%[m], #48]\n\t" "ldr r7, [%[m], #52]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[r], #56]\n\t" "ldr r5, [%[r], #60]\n\t" "ldr r6, [%[m], #56]\n\t" "ldr r7, [%[m], #60]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[r], #64]\n\t" "ldr r5, [%[r], #68]\n\t" "ldr r6, [%[m], #64]\n\t" "ldr r7, [%[m], #68]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[r], #72]\n\t" "ldr r5, [%[r], #76]\n\t" "ldr r6, [%[m], #72]\n\t" "ldr r7, [%[m], #76]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[r], #80]\n\t" "ldr r5, [%[r], #84]\n\t" "ldr r6, [%[m], #80]\n\t" "ldr r7, [%[m], #84]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[r], #88]\n\t" "ldr r5, [%[r], #92]\n\t" "ldr r6, [%[m], #88]\n\t" "ldr r7, [%[m], #92]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[r], #96]\n\t" "ldr r5, [%[r], #100]\n\t" "ldr r6, [%[m], #96]\n\t" "ldr r7, [%[m], #100]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[r], #104]\n\t" "ldr r5, [%[r], #108]\n\t" "ldr r6, [%[m], #104]\n\t" "ldr r7, [%[m], #108]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[r], #112]\n\t" "ldr r5, [%[r], #116]\n\t" "ldr r6, [%[m], #112]\n\t" "ldr r7, [%[m], #116]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[r], #120]\n\t" "ldr r5, [%[r], #124]\n\t" "ldr r6, [%[m], #120]\n\t" "ldr r7, [%[m], #124]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r3" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -34060,18 +58241,35 @@ SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, cons * a Number to triple in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #12]\n\t" - "add r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "adds r4, r4, r4\n\t" +#else + "add r4, r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "str r6, [%[r], #8]\n\t" "str r7, [%[r], #12]\n\t" @@ -34079,10 +58277,26 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[a], #28]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "str r6, [%[r], #24]\n\t" @@ -34091,10 +58305,26 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[a], #40]\n\t" "ldr r7, [%[a], #44]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "str r6, [%[r], #40]\n\t" @@ -34103,10 +58333,26 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[a], #56]\n\t" "ldr r7, [%[a], #60]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "str r6, [%[r], #56]\n\t" @@ -34115,10 +58361,26 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[a], #72]\n\t" "ldr r7, [%[a], #76]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "str r6, [%[r], #72]\n\t" @@ -34127,10 +58389,26 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[a], #88]\n\t" "ldr r7, [%[a], #92]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "str r6, [%[r], #88]\n\t" @@ -34139,10 +58417,26 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[a], #104]\n\t" "ldr r7, [%[a], #108]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "str r6, [%[r], #104]\n\t" @@ -34151,479 +58445,1175 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[a], #120]\n\t" "ldr r7, [%[a], #124]\n\t" - "adc r4, r4\n\t" - "adc r5, r5\n\t" - "adc r6, r6\n\t" - "adc r7, r7\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif +#ifdef __clang__ + "adcs r5, r5\n\t" +#else + "adc r5, r5\n\t" +#endif +#ifdef __clang__ + "adcs r6, r6\n\t" +#else + "adc r6, r6\n\t" +#endif +#ifdef __clang__ + "adcs r7, r7\n\t" +#else + "adc r7, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "str r6, [%[r], #120]\n\t" "str r7, [%[r], #124]\n\t" - "mov r3, #0\n\t" - "ldr r4, [%[m], #124]\n\t" - "adc r3, r3\n\t" - "sub r4, r7\n\t" - "neg r3, r3\n\t" - "sbc r4, r4\n\t" - "orr r3, r4\n\t" - "ldr r4, [%[r], #0]\n\t" + "movs r3, #0\n\t" + "ldr r4, [%[m], #124]\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, r7\n\t" +#else + "sub r4, r4, r7\n\t" +#endif +#ifdef __clang__ + "negs r3, r3\n\t" +#else + "neg r3, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r4\n\t" +#else + "sbc r4, r4\n\t" +#endif +#ifdef __clang__ + "orrs r3, r4\n\t" +#else + "orr r3, r4\n\t" +#endif + "ldr r4, [%[r]]\n\t" "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[m], #0]\n\t" + "ldr r6, [%[m]]\n\t" "ldr r7, [%[m], #4]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sub r4, r6\n\t" - "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[r], #8]\n\t" "ldr r5, [%[r], #12]\n\t" "ldr r6, [%[m], #8]\n\t" "ldr r7, [%[m], #12]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[r], #16]\n\t" "ldr r5, [%[r], #20]\n\t" "ldr r6, [%[m], #16]\n\t" "ldr r7, [%[m], #20]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[r], #24]\n\t" "ldr r5, [%[r], #28]\n\t" "ldr r6, [%[m], #24]\n\t" "ldr r7, [%[m], #28]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[r], #32]\n\t" "ldr r5, [%[r], #36]\n\t" "ldr r6, [%[m], #32]\n\t" "ldr r7, [%[m], #36]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[r], #40]\n\t" "ldr r5, [%[r], #44]\n\t" "ldr r6, [%[m], #40]\n\t" "ldr r7, [%[m], #44]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[r], #48]\n\t" "ldr r5, [%[r], #52]\n\t" "ldr r6, [%[m], #48]\n\t" "ldr r7, [%[m], #52]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[r], #56]\n\t" "ldr r5, [%[r], #60]\n\t" "ldr r6, [%[m], #56]\n\t" "ldr r7, [%[m], #60]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[r], #64]\n\t" "ldr r5, [%[r], #68]\n\t" "ldr r6, [%[m], #64]\n\t" "ldr r7, [%[m], #68]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[r], #72]\n\t" "ldr r5, [%[r], #76]\n\t" "ldr r6, [%[m], #72]\n\t" "ldr r7, [%[m], #76]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[r], #80]\n\t" "ldr r5, [%[r], #84]\n\t" "ldr r6, [%[m], #80]\n\t" "ldr r7, [%[m], #84]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[r], #88]\n\t" "ldr r5, [%[r], #92]\n\t" "ldr r6, [%[m], #88]\n\t" "ldr r7, [%[m], #92]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[r], #96]\n\t" "ldr r5, [%[r], #100]\n\t" "ldr r6, [%[m], #96]\n\t" "ldr r7, [%[m], #100]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[r], #104]\n\t" "ldr r5, [%[r], #108]\n\t" "ldr r6, [%[m], #104]\n\t" "ldr r7, [%[m], #108]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[r], #112]\n\t" "ldr r5, [%[r], #116]\n\t" "ldr r6, [%[m], #112]\n\t" "ldr r7, [%[m], #116]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[r], #120]\n\t" "ldr r5, [%[r], #124]\n\t" "ldr r6, [%[m], #120]\n\t" "ldr r7, [%[m], #124]\n\t" - "and r6, r3\n\t" - "and r7, r3\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "ands r6, r3\n\t" +#else + "and r6, r3\n\t" +#endif +#ifdef __clang__ + "ands r7, r3\n\t" +#else + "and r7, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" - "ldr r4, [%[a], #0]\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[r], #0]\n\t" + "ldr r6, [%[r]]\n\t" "ldr r7, [%[r], #4]\n\t" - "add r6, r4\n\t" - "adc r7, r5\n\t" - "str r6, [%[r], #0]\n\t" +#ifdef __clang__ + "adds r6, r6, r4\n\t" +#else + "add r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif + "str r6, [%[r]]\n\t" "str r7, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[r], #8]\n\t" "ldr r7, [%[r], #12]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #8]\n\t" "str r7, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[r], #16]\n\t" "ldr r7, [%[r], #20]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #16]\n\t" "str r7, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[r], #24]\n\t" "ldr r7, [%[r], #28]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #24]\n\t" "str r7, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[r], #32]\n\t" "ldr r7, [%[r], #36]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #32]\n\t" "str r7, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "ldr r6, [%[r], #40]\n\t" "ldr r7, [%[r], #44]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #40]\n\t" "str r7, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[r], #48]\n\t" "ldr r7, [%[r], #52]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #48]\n\t" "str r7, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[a], #60]\n\t" "ldr r6, [%[r], #56]\n\t" "ldr r7, [%[r], #60]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #56]\n\t" "str r7, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[r], #64]\n\t" "ldr r7, [%[r], #68]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #64]\n\t" "str r7, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[a], #76]\n\t" "ldr r6, [%[r], #72]\n\t" "ldr r7, [%[r], #76]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #72]\n\t" "str r7, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[r], #80]\n\t" "ldr r7, [%[r], #84]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #80]\n\t" "str r7, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[a], #92]\n\t" "ldr r6, [%[r], #88]\n\t" "ldr r7, [%[r], #92]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #88]\n\t" "str r7, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[r], #96]\n\t" "ldr r7, [%[r], #100]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #96]\n\t" "str r7, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[a], #108]\n\t" "ldr r6, [%[r], #104]\n\t" "ldr r7, [%[r], #108]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #104]\n\t" "str r7, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[r], #112]\n\t" "ldr r7, [%[r], #116]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #112]\n\t" "str r7, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[a], #124]\n\t" "ldr r6, [%[r], #120]\n\t" "ldr r7, [%[r], #124]\n\t" - "adc r6, r4\n\t" - "adc r7, r5\n\t" +#ifdef __clang__ + "adcs r6, r4\n\t" +#else + "adc r6, r4\n\t" +#endif +#ifdef __clang__ + "adcs r7, r5\n\t" +#else + "adc r7, r5\n\t" +#endif "str r6, [%[r], #120]\n\t" "str r7, [%[r], #124]\n\t" - "mov r3, #0\n\t" - "ldr r5, [%[m], #124]\n\t" - "adc r3, r3\n\t" - "sub r5, r7\n\t" - "neg r3, r3\n\t" - "sbc r5, r5\n\t" - "orr r3, r5\n\t" - "ldr r6, [%[r], #0]\n\t" + "movs r3, #0\n\t" + "ldr r5, [%[m], #124]\n\t" +#ifdef __clang__ + "adcs r3, r3\n\t" +#else + "adc r3, r3\n\t" +#endif +#ifdef __clang__ + "subs r5, r5, r7\n\t" +#else + "sub r5, r5, r7\n\t" +#endif +#ifdef __clang__ + "negs r3, r3\n\t" +#else + "neg r3, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r5\n\t" +#else + "sbc r5, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif + "ldr r6, [%[r]]\n\t" "ldr r7, [%[r], #4]\n\t" - "ldr r4, [%[m], #0]\n\t" + "ldr r4, [%[m]]\n\t" "ldr r5, [%[m], #4]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sub r6, r4\n\t" - "sbc r7, r5\n\t" - "str r6, [%[r], #0]\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "subs r6, r6, r4\n\t" +#else + "sub r6, r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif + "str r6, [%[r]]\n\t" "str r7, [%[r], #4]\n\t" "ldr r6, [%[r], #8]\n\t" "ldr r7, [%[r], #12]\n\t" "ldr r4, [%[m], #8]\n\t" "ldr r5, [%[m], #12]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #8]\n\t" "str r7, [%[r], #12]\n\t" "ldr r6, [%[r], #16]\n\t" "ldr r7, [%[r], #20]\n\t" "ldr r4, [%[m], #16]\n\t" "ldr r5, [%[m], #20]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #16]\n\t" "str r7, [%[r], #20]\n\t" "ldr r6, [%[r], #24]\n\t" "ldr r7, [%[r], #28]\n\t" "ldr r4, [%[m], #24]\n\t" "ldr r5, [%[m], #28]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #24]\n\t" "str r7, [%[r], #28]\n\t" "ldr r6, [%[r], #32]\n\t" "ldr r7, [%[r], #36]\n\t" "ldr r4, [%[m], #32]\n\t" "ldr r5, [%[m], #36]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #32]\n\t" "str r7, [%[r], #36]\n\t" "ldr r6, [%[r], #40]\n\t" "ldr r7, [%[r], #44]\n\t" "ldr r4, [%[m], #40]\n\t" "ldr r5, [%[m], #44]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #40]\n\t" "str r7, [%[r], #44]\n\t" "ldr r6, [%[r], #48]\n\t" "ldr r7, [%[r], #52]\n\t" "ldr r4, [%[m], #48]\n\t" "ldr r5, [%[m], #52]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #48]\n\t" "str r7, [%[r], #52]\n\t" "ldr r6, [%[r], #56]\n\t" "ldr r7, [%[r], #60]\n\t" "ldr r4, [%[m], #56]\n\t" "ldr r5, [%[m], #60]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #56]\n\t" "str r7, [%[r], #60]\n\t" "ldr r6, [%[r], #64]\n\t" "ldr r7, [%[r], #68]\n\t" "ldr r4, [%[m], #64]\n\t" "ldr r5, [%[m], #68]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #64]\n\t" "str r7, [%[r], #68]\n\t" "ldr r6, [%[r], #72]\n\t" "ldr r7, [%[r], #76]\n\t" "ldr r4, [%[m], #72]\n\t" "ldr r5, [%[m], #76]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #72]\n\t" "str r7, [%[r], #76]\n\t" "ldr r6, [%[r], #80]\n\t" "ldr r7, [%[r], #84]\n\t" "ldr r4, [%[m], #80]\n\t" "ldr r5, [%[m], #84]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #80]\n\t" "str r7, [%[r], #84]\n\t" "ldr r6, [%[r], #88]\n\t" "ldr r7, [%[r], #92]\n\t" "ldr r4, [%[m], #88]\n\t" "ldr r5, [%[m], #92]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #88]\n\t" "str r7, [%[r], #92]\n\t" "ldr r6, [%[r], #96]\n\t" "ldr r7, [%[r], #100]\n\t" "ldr r4, [%[m], #96]\n\t" "ldr r5, [%[m], #100]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #96]\n\t" "str r7, [%[r], #100]\n\t" "ldr r6, [%[r], #104]\n\t" "ldr r7, [%[r], #108]\n\t" "ldr r4, [%[m], #104]\n\t" "ldr r5, [%[m], #108]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #104]\n\t" "str r7, [%[r], #108]\n\t" "ldr r6, [%[r], #112]\n\t" "ldr r7, [%[r], #116]\n\t" "ldr r4, [%[m], #112]\n\t" "ldr r5, [%[m], #116]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #112]\n\t" "str r7, [%[r], #116]\n\t" "ldr r6, [%[r], #120]\n\t" "ldr r7, [%[r], #124]\n\t" "ldr r4, [%[m], #120]\n\t" "ldr r5, [%[m], #124]\n\t" - "and r4, r3\n\t" - "and r5, r3\n\t" - "sbc r6, r4\n\t" - "sbc r7, r5\n\t" +#ifdef __clang__ + "ands r4, r3\n\t" +#else + "and r4, r3\n\t" +#endif +#ifdef __clang__ + "ands r5, r3\n\t" +#else + "and r5, r3\n\t" +#endif +#ifdef __clang__ + "sbcs r6, r4\n\t" +#else + "sbc r6, r4\n\t" +#endif +#ifdef __clang__ + "sbcs r7, r5\n\t" +#else + "sbc r7, r5\n\t" +#endif "str r6, [%[r], #120]\n\t" "str r7, [%[r], #124]\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r3" + : "memory", "r3", "r4", "r5", "r6", "r7" ); } @@ -34634,459 +59624,1107 @@ SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, cons * b Number to subtract with in Montogmery form. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, + const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" + "ldr r6, [%[b]]\n\t" "ldr r7, [%[b], #4]\n\t" - "sub r4, r6\n\t" - "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[b], #32]\n\t" "ldr r7, [%[b], #36]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "ldr r6, [%[b], #40]\n\t" "ldr r7, [%[b], #44]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[b], #48]\n\t" "ldr r7, [%[b], #52]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[a], #60]\n\t" "ldr r6, [%[b], #56]\n\t" "ldr r7, [%[b], #60]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[b], #64]\n\t" "ldr r7, [%[b], #68]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[a], #76]\n\t" "ldr r6, [%[b], #72]\n\t" "ldr r7, [%[b], #76]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[b], #80]\n\t" "ldr r7, [%[b], #84]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[a], #92]\n\t" "ldr r6, [%[b], #88]\n\t" "ldr r7, [%[b], #92]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[b], #96]\n\t" "ldr r7, [%[b], #100]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[a], #108]\n\t" "ldr r6, [%[b], #104]\n\t" "ldr r7, [%[b], #108]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[b], #112]\n\t" "ldr r7, [%[b], #116]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[a], #124]\n\t" "ldr r6, [%[b], #120]\n\t" "ldr r7, [%[b], #124]\n\t" - "sbc r4, r6\n\t" - "sbc r5, r7\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else + "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else + "sbc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" - "sbc %[b], %[b]\n\t" - "ldr r4, [%[r], #0]\n\t" +#ifdef __clang__ + "sbcs %[b], %[b]\n\t" +#else + "sbc %[b], %[b]\n\t" +#endif + "ldr r4, [%[r]]\n\t" "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[m], #0]\n\t" + "ldr r6, [%[m]]\n\t" "ldr r7, [%[m], #4]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "add r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[r], #8]\n\t" "ldr r5, [%[r], #12]\n\t" "ldr r6, [%[m], #8]\n\t" "ldr r7, [%[m], #12]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[r], #16]\n\t" "ldr r5, [%[r], #20]\n\t" "ldr r6, [%[m], #16]\n\t" "ldr r7, [%[m], #20]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[r], #24]\n\t" "ldr r5, [%[r], #28]\n\t" "ldr r6, [%[m], #24]\n\t" "ldr r7, [%[m], #28]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[r], #32]\n\t" "ldr r5, [%[r], #36]\n\t" "ldr r6, [%[m], #32]\n\t" "ldr r7, [%[m], #36]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[r], #40]\n\t" "ldr r5, [%[r], #44]\n\t" "ldr r6, [%[m], #40]\n\t" "ldr r7, [%[m], #44]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[r], #48]\n\t" "ldr r5, [%[r], #52]\n\t" "ldr r6, [%[m], #48]\n\t" "ldr r7, [%[m], #52]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[r], #56]\n\t" "ldr r5, [%[r], #60]\n\t" "ldr r6, [%[m], #56]\n\t" "ldr r7, [%[m], #60]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[r], #64]\n\t" "ldr r5, [%[r], #68]\n\t" "ldr r6, [%[m], #64]\n\t" "ldr r7, [%[m], #68]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[r], #72]\n\t" "ldr r5, [%[r], #76]\n\t" "ldr r6, [%[m], #72]\n\t" "ldr r7, [%[m], #76]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[r], #80]\n\t" "ldr r5, [%[r], #84]\n\t" "ldr r6, [%[m], #80]\n\t" "ldr r7, [%[m], #84]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[r], #88]\n\t" "ldr r5, [%[r], #92]\n\t" "ldr r6, [%[m], #88]\n\t" "ldr r7, [%[m], #92]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[r], #96]\n\t" "ldr r5, [%[r], #100]\n\t" "ldr r6, [%[m], #96]\n\t" "ldr r7, [%[m], #100]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[r], #104]\n\t" "ldr r5, [%[r], #108]\n\t" "ldr r6, [%[m], #104]\n\t" "ldr r7, [%[m], #108]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[r], #112]\n\t" "ldr r5, [%[r], #116]\n\t" "ldr r6, [%[m], #112]\n\t" "ldr r7, [%[m], #116]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[r], #120]\n\t" "ldr r5, [%[r], #124]\n\t" "ldr r6, [%[m], #120]\n\t" "ldr r7, [%[m], #124]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" - "mov r6, #0\n\t" - "adc %[b], r6\n\t" - "ldr r4, [%[r], #0]\n\t" + "movs r6, #0\n\t" +#ifdef __clang__ + "adcs %[b], r6\n\t" +#else + "adc %[b], r6\n\t" +#endif + "ldr r4, [%[r]]\n\t" "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[m], #0]\n\t" + "ldr r6, [%[m]]\n\t" "ldr r7, [%[m], #4]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "add r4, r6\n\t" - "adc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adds r4, r4, r6\n\t" +#else + "add r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[r], #8]\n\t" "ldr r5, [%[r], #12]\n\t" "ldr r6, [%[m], #8]\n\t" "ldr r7, [%[m], #12]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[r], #16]\n\t" "ldr r5, [%[r], #20]\n\t" "ldr r6, [%[m], #16]\n\t" "ldr r7, [%[m], #20]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[r], #24]\n\t" "ldr r5, [%[r], #28]\n\t" "ldr r6, [%[m], #24]\n\t" "ldr r7, [%[m], #28]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[r], #32]\n\t" "ldr r5, [%[r], #36]\n\t" "ldr r6, [%[m], #32]\n\t" "ldr r7, [%[m], #36]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[r], #40]\n\t" "ldr r5, [%[r], #44]\n\t" "ldr r6, [%[m], #40]\n\t" "ldr r7, [%[m], #44]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[r], #48]\n\t" "ldr r5, [%[r], #52]\n\t" "ldr r6, [%[m], #48]\n\t" "ldr r7, [%[m], #52]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[r], #56]\n\t" "ldr r5, [%[r], #60]\n\t" "ldr r6, [%[m], #56]\n\t" "ldr r7, [%[m], #60]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[r], #64]\n\t" "ldr r5, [%[r], #68]\n\t" "ldr r6, [%[m], #64]\n\t" "ldr r7, [%[m], #68]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[r], #72]\n\t" "ldr r5, [%[r], #76]\n\t" "ldr r6, [%[m], #72]\n\t" "ldr r7, [%[m], #76]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[r], #80]\n\t" "ldr r5, [%[r], #84]\n\t" "ldr r6, [%[m], #80]\n\t" "ldr r7, [%[m], #84]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[r], #88]\n\t" "ldr r5, [%[r], #92]\n\t" "ldr r6, [%[m], #88]\n\t" "ldr r7, [%[m], #92]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[r], #96]\n\t" "ldr r5, [%[r], #100]\n\t" "ldr r6, [%[m], #96]\n\t" "ldr r7, [%[m], #100]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[r], #104]\n\t" "ldr r5, [%[r], #108]\n\t" "ldr r6, [%[m], #104]\n\t" "ldr r7, [%[m], #108]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[r], #112]\n\t" "ldr r5, [%[r], #116]\n\t" "ldr r6, [%[m], #112]\n\t" "ldr r7, [%[m], #116]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[r], #120]\n\t" "ldr r5, [%[r], #124]\n\t" "ldr r6, [%[m], #120]\n\t" "ldr r7, [%[m], #124]\n\t" - "and r6, %[b]\n\t" - "and r7, %[b]\n\t" - "adc r4, r6\n\t" - "adc r5, r7\n\t" +#ifdef __clang__ + "ands r6, %[b]\n\t" +#else + "and r6, %[b]\n\t" +#endif +#ifdef __clang__ + "ands r7, %[b]\n\t" +#else + "and r7, %[b]\n\t" +#endif +#ifdef __clang__ + "adcs r4, r6\n\t" +#else + "adc r4, r6\n\t" +#endif +#ifdef __clang__ + "adcs r5, r7\n\t" +#else + "adc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) @@ -35103,200 +60741,605 @@ SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, cons * b A single precision number to add. * m Mask value to apply. */ -SP_NOINLINE static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +SP_NOINLINE static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, + const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r5, #128\n\t" + "movs r4, #0\n\t" + "movs r5, #0x80\n\t" "mov r8, r5\n\t" - "mov r7, #0\n\t" - "1:\n\t" + "movs r7, #0\n\t" + "\n" + "L_sp_1024_cond_add_32_words_%=: \n\t" "ldr r6, [%[b], r7]\n\t" +#ifdef __clang__ + "ands r6, %[m]\n\t" +#else "and r6, %[m]\n\t" - "mov r5, #0\n\t" - "sub r5, #1\n\t" - "add r5, %[c]\n\t" +#endif + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, #1\n\t" +#else + "sub r5, r5, #1\n\t" +#endif +#ifdef __clang__ + "adds r5, r5, r4\n\t" +#else + "add r5, r5, r4\n\t" +#endif "ldr r5, [%[a], r7]\n\t" +#ifdef __clang__ + "adcs r5, r6\n\t" +#else "adc r5, r6\n\t" - "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" +#endif + "movs r4, #0\n\t" +#ifdef __clang__ + "adcs r4, r4\n\t" +#else + "adc r4, r4\n\t" +#endif "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" +#ifdef __clang__ + "adds r7, r7, #4\n\t" +#else + "add r7, r7, #4\n\t" +#endif "cmp r7, r8\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + "blt L_sp_1024_cond_add_32_words_%=\n\t" + "movs %[r], r4\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } -static void sp_1024_rshift1_32(sp_digit* r, sp_digit* a) +/* Right shift a by 1 bit into r. (r = a >> 1) + * + * r A single precision integer. + * a A single precision integer. + */ +static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "ldr r2, [%[a]]\n\t" "ldr r3, [%[a], #4]\n\t" +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" +#endif +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #12]\n\t" "str r3, [%[r], #4]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #16]\n\t" "str r4, [%[r], #8]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #20]\n\t" "str r2, [%[r], #12]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #24]\n\t" "str r3, [%[r], #16]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #28]\n\t" "str r4, [%[r], #20]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #32]\n\t" "str r2, [%[r], #24]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #36]\n\t" "str r3, [%[r], #28]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #40]\n\t" "str r4, [%[r], #32]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #44]\n\t" "str r2, [%[r], #36]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #48]\n\t" "str r3, [%[r], #40]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #52]\n\t" "str r4, [%[r], #44]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #56]\n\t" "str r2, [%[r], #48]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #60]\n\t" "str r3, [%[r], #52]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #64]\n\t" "str r4, [%[r], #56]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #68]\n\t" "str r2, [%[r], #60]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #72]\n\t" "str r3, [%[r], #64]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #76]\n\t" "str r4, [%[r], #68]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #80]\n\t" "str r2, [%[r], #72]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #84]\n\t" "str r3, [%[r], #76]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #88]\n\t" "str r4, [%[r], #80]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #92]\n\t" "str r2, [%[r], #84]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #96]\n\t" "str r3, [%[r], #88]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #100]\n\t" "str r4, [%[r], #92]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #104]\n\t" "str r2, [%[r], #96]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #108]\n\t" "str r3, [%[r], #100]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #112]\n\t" "str r4, [%[r], #104]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "ldr r4, [%[a], #116]\n\t" "str r2, [%[r], #108]\n\t" +#ifdef __clang__ + "lsls r5, r4, #31\n\t" +#else "lsl r5, r4, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r4, r4, #1\n\t" +#else "lsr r4, r4, #1\n\t" - "orr r3, r3, r5\n\t" +#endif +#ifdef __clang__ + "orrs r3, r5\n\t" +#else + "orr r3, r5\n\t" +#endif "ldr r2, [%[a], #120]\n\t" "str r3, [%[r], #112]\n\t" +#ifdef __clang__ + "lsls r5, r2, #31\n\t" +#else "lsl r5, r2, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r2, r2, #1\n\t" +#else "lsr r2, r2, #1\n\t" - "orr r4, r4, r5\n\t" +#endif +#ifdef __clang__ + "orrs r4, r5\n\t" +#else + "orr r4, r5\n\t" +#endif "ldr r3, [%[a], #124]\n\t" "str r4, [%[r], #116]\n\t" +#ifdef __clang__ + "lsls r5, r3, #31\n\t" +#else "lsl r5, r3, #31\n\t" +#endif +#ifdef __clang__ + "lsrs r3, r3, #1\n\t" +#else "lsr r3, r3, #1\n\t" - "orr r2, r2, r5\n\t" +#endif +#ifdef __clang__ + "orrs r2, r5\n\t" +#else + "orr r2, r5\n\t" +#endif "str r2, [%[r], #120]\n\t" "str r3, [%[r], #124]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5" ); } @@ -35307,7 +61350,8 @@ static void sp_1024_rshift1_32(sp_digit* r, sp_digit* a) * a Number to divide. * m Modulus (prime). */ -SP_NOINLINE static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m) +SP_NOINLINE static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, + const sp_digit* m) { sp_digit o; @@ -35521,30 +61565,58 @@ static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r6, %[a]\n\t" - "add r6, #128\n\t" - "\n1:\n\t" - "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "movs r6, %[a]\n\t" + "movs r3, #0\n\t" +#ifdef __clang__ + "adds r6, r6, #0x80\n\t" +#else + "add r6, r6, #0x80\n\t" +#endif + "\n" + "L_sp_1024_sub_32_word_%=: \n\t" + "movs r5, #0\n\t" +#ifdef __clang__ + "subs r5, r5, r3\n\t" +#else + "sub r5, r5, r3\n\t" +#endif "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" +#ifdef __clang__ + "sbcs r4, r5\n\t" +#else "sbc r4, r5\n\t" +#endif "str r4, [%[r]]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif +#ifdef __clang__ + "adds %[a], %[a], #4\n\t" +#else + "add %[a], %[a], #4\n\t" +#endif +#ifdef __clang__ + "adds %[b], %[b], #4\n\t" +#else + "add %[b], %[b], #4\n\t" +#endif +#ifdef __clang__ + "adds %[r], %[r], #4\n\t" +#else + "add %[r], %[r], #4\n\t" +#endif "cmp %[a], r6\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "bne L_sp_1024_sub_32_word_%=\n\t" + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6" + : "memory", "r3", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -35557,144 +61629,275 @@ SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldr r4, [%[a], #0]\n\t" + "movs r3, #0\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" + "ldr r6, [%[b]]\n\t" "ldr r7, [%[b], #4]\n\t" - "sub r4, r6\n\t" +#ifdef __clang__ + "subs r4, r4, r6\n\t" +#else + "sub r4, r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" - "str r4, [%[r], #0]\n\t" +#endif + "str r4, [%[r]]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[b], #32]\n\t" "ldr r7, [%[b], #36]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "ldr r6, [%[b], #40]\n\t" "ldr r7, [%[b], #44]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[b], #48]\n\t" "ldr r7, [%[b], #52]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[a], #60]\n\t" "ldr r6, [%[b], #56]\n\t" "ldr r7, [%[b], #60]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[b], #64]\n\t" "ldr r7, [%[b], #68]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[a], #76]\n\t" "ldr r6, [%[b], #72]\n\t" "ldr r7, [%[b], #76]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[b], #80]\n\t" "ldr r7, [%[b], #84]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[a], #92]\n\t" "ldr r6, [%[b], #88]\n\t" "ldr r7, [%[b], #92]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[b], #96]\n\t" "ldr r7, [%[b], #100]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[a], #108]\n\t" "ldr r6, [%[b], #104]\n\t" "ldr r7, [%[b], #108]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[b], #112]\n\t" "ldr r7, [%[b], #116]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[a], #124]\n\t" "ldr r6, [%[b], #120]\n\t" "ldr r7, [%[b], #124]\n\t" +#ifdef __clang__ + "sbcs r4, r6\n\t" +#else "sbc r4, r6\n\t" +#endif +#ifdef __clang__ + "sbcs r5, r7\n\t" +#else "sbc r5, r7\n\t" +#endif "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" - "sbc %[c], %[c]\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) +#ifdef __clang__ + "sbcs r3, r3\n\t" +#else + "sbc r3, r3\n\t" +#endif + "movs %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r7" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */