Remove unused 4096-bit functions

This commit is contained in:
Sean Parkinson
2020-05-13 10:23:05 +10:00
parent 479b54e78e
commit 57756bfa8d
5 changed files with 24 additions and 45626 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -11498,292 +11498,6 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
}
#ifndef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
*
* r A single precision integer.
* a A single precision integer.
* b A single precision integer.
*/
SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
const sp_digit* b)
{
sp_digit c = 0;
__asm__ __volatile__ (
"mov r7, #0\n\t"
"mvn r7, r7\n\t"
"ldr r4, [%[a], #0]\n\t"
"ldr r5, [%[b], #0]\n\t"
"add r4, r5\n\t"
"str r4, [%[r], #0]\n\t"
"ldr r4, [%[a], #4]\n\t"
"ldr r5, [%[b], #4]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #4]\n\t"
"ldr r4, [%[a], #8]\n\t"
"ldr r5, [%[b], #8]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #8]\n\t"
"ldr r4, [%[a], #12]\n\t"
"ldr r5, [%[b], #12]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #12]\n\t"
"ldr r4, [%[a], #16]\n\t"
"ldr r5, [%[b], #16]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #16]\n\t"
"ldr r4, [%[a], #20]\n\t"
"ldr r5, [%[b], #20]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #20]\n\t"
"ldr r4, [%[a], #24]\n\t"
"ldr r5, [%[b], #24]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #24]\n\t"
"ldr r4, [%[a], #28]\n\t"
"ldr r5, [%[b], #28]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #28]\n\t"
"ldr r4, [%[a], #32]\n\t"
"ldr r5, [%[b], #32]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #32]\n\t"
"ldr r4, [%[a], #36]\n\t"
"ldr r5, [%[b], #36]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #36]\n\t"
"ldr r4, [%[a], #40]\n\t"
"ldr r5, [%[b], #40]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #40]\n\t"
"ldr r4, [%[a], #44]\n\t"
"ldr r5, [%[b], #44]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #44]\n\t"
"ldr r4, [%[a], #48]\n\t"
"ldr r5, [%[b], #48]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #48]\n\t"
"ldr r4, [%[a], #52]\n\t"
"ldr r5, [%[b], #52]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #52]\n\t"
"ldr r4, [%[a], #56]\n\t"
"ldr r5, [%[b], #56]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #56]\n\t"
"ldr r4, [%[a], #60]\n\t"
"ldr r5, [%[b], #60]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #60]\n\t"
"ldr r4, [%[a], #64]\n\t"
"ldr r5, [%[b], #64]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #64]\n\t"
"ldr r4, [%[a], #68]\n\t"
"ldr r5, [%[b], #68]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #68]\n\t"
"ldr r4, [%[a], #72]\n\t"
"ldr r5, [%[b], #72]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #72]\n\t"
"ldr r4, [%[a], #76]\n\t"
"ldr r5, [%[b], #76]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #76]\n\t"
"ldr r4, [%[a], #80]\n\t"
"ldr r5, [%[b], #80]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #80]\n\t"
"ldr r4, [%[a], #84]\n\t"
"ldr r5, [%[b], #84]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #84]\n\t"
"ldr r4, [%[a], #88]\n\t"
"ldr r5, [%[b], #88]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #88]\n\t"
"ldr r4, [%[a], #92]\n\t"
"ldr r5, [%[b], #92]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #92]\n\t"
"ldr r4, [%[a], #96]\n\t"
"ldr r5, [%[b], #96]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #96]\n\t"
"ldr r4, [%[a], #100]\n\t"
"ldr r5, [%[b], #100]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #100]\n\t"
"ldr r4, [%[a], #104]\n\t"
"ldr r5, [%[b], #104]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #104]\n\t"
"ldr r4, [%[a], #108]\n\t"
"ldr r5, [%[b], #108]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #108]\n\t"
"ldr r4, [%[a], #112]\n\t"
"ldr r5, [%[b], #112]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #112]\n\t"
"ldr r4, [%[a], #116]\n\t"
"ldr r5, [%[b], #116]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #116]\n\t"
"ldr r4, [%[a], #120]\n\t"
"ldr r5, [%[b], #120]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #120]\n\t"
"ldr r4, [%[a], #124]\n\t"
"ldr r5, [%[b], #124]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #124]\n\t"
"mov %[c], #0\n\t"
"adc %[c], %[c]\n\t"
"add %[a], #0x80\n\t"
"add %[b], #0x80\n\t"
"add %[r], #0x80\n\t"
"add %[c], r7\n\t"
"ldr r4, [%[a], #0]\n\t"
"ldr r5, [%[b], #0]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #0]\n\t"
"ldr r4, [%[a], #4]\n\t"
"ldr r5, [%[b], #4]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #4]\n\t"
"ldr r4, [%[a], #8]\n\t"
"ldr r5, [%[b], #8]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #8]\n\t"
"ldr r4, [%[a], #12]\n\t"
"ldr r5, [%[b], #12]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #12]\n\t"
"ldr r4, [%[a], #16]\n\t"
"ldr r5, [%[b], #16]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #16]\n\t"
"ldr r4, [%[a], #20]\n\t"
"ldr r5, [%[b], #20]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #20]\n\t"
"ldr r4, [%[a], #24]\n\t"
"ldr r5, [%[b], #24]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #24]\n\t"
"ldr r4, [%[a], #28]\n\t"
"ldr r5, [%[b], #28]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #28]\n\t"
"ldr r4, [%[a], #32]\n\t"
"ldr r5, [%[b], #32]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #32]\n\t"
"ldr r4, [%[a], #36]\n\t"
"ldr r5, [%[b], #36]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #36]\n\t"
"ldr r4, [%[a], #40]\n\t"
"ldr r5, [%[b], #40]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #40]\n\t"
"ldr r4, [%[a], #44]\n\t"
"ldr r5, [%[b], #44]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #44]\n\t"
"ldr r4, [%[a], #48]\n\t"
"ldr r5, [%[b], #48]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #48]\n\t"
"ldr r4, [%[a], #52]\n\t"
"ldr r5, [%[b], #52]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #52]\n\t"
"ldr r4, [%[a], #56]\n\t"
"ldr r5, [%[b], #56]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #56]\n\t"
"ldr r4, [%[a], #60]\n\t"
"ldr r5, [%[b], #60]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #60]\n\t"
"ldr r4, [%[a], #64]\n\t"
"ldr r5, [%[b], #64]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #64]\n\t"
"ldr r4, [%[a], #68]\n\t"
"ldr r5, [%[b], #68]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #68]\n\t"
"ldr r4, [%[a], #72]\n\t"
"ldr r5, [%[b], #72]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #72]\n\t"
"ldr r4, [%[a], #76]\n\t"
"ldr r5, [%[b], #76]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #76]\n\t"
"ldr r4, [%[a], #80]\n\t"
"ldr r5, [%[b], #80]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #80]\n\t"
"ldr r4, [%[a], #84]\n\t"
"ldr r5, [%[b], #84]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #84]\n\t"
"ldr r4, [%[a], #88]\n\t"
"ldr r5, [%[b], #88]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #88]\n\t"
"ldr r4, [%[a], #92]\n\t"
"ldr r5, [%[b], #92]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #92]\n\t"
"ldr r4, [%[a], #96]\n\t"
"ldr r5, [%[b], #96]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #96]\n\t"
"ldr r4, [%[a], #100]\n\t"
"ldr r5, [%[b], #100]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #100]\n\t"
"ldr r4, [%[a], #104]\n\t"
"ldr r5, [%[b], #104]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #104]\n\t"
"ldr r4, [%[a], #108]\n\t"
"ldr r5, [%[b], #108]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #108]\n\t"
"ldr r4, [%[a], #112]\n\t"
"ldr r5, [%[b], #112]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #112]\n\t"
"ldr r4, [%[a], #116]\n\t"
"ldr r5, [%[b], #116]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #116]\n\t"
"ldr r4, [%[a], #120]\n\t"
"ldr r5, [%[b], #120]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #120]\n\t"
"ldr r4, [%[a], #124]\n\t"
"ldr r5, [%[b], #124]\n\t"
"adc r4, r5\n\t"
"str r4, [%[r], #124]\n\t"
"mov %[c], #0\n\t"
"adc %[c], %[c]\n\t"
: [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r4", "r5", "r7"
);
return c;
}
/* Sub b from a into r. (r = a - b)
*
* r A single precision integer.
@@ -12886,139 +12600,6 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
return c;
}
/* Multiply a and b into r. (r = a * b)
*
* r A single precision integer.
* a A single precision integer.
* b A single precision integer.
*/
SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
const sp_digit* b)
{
sp_digit tmp[64 * 2];
__asm__ __volatile__ (
"mov r3, #0\n\t"
"mov r4, #0\n\t"
"mov r8, r3\n\t"
"mov r11, %[r]\n\t"
"mov r9, %[a]\n\t"
"mov r10, %[b]\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, r9\n\t"
"mov r12, r6\n\t"
"\n1:\n\t"
"mov %[r], #0\n\t"
"mov r5, #0\n\t"
"mov r6, #252\n\t"
"mov %[a], r8\n\t"
"sub %[a], r6\n\t"
"sbc r6, r6\n\t"
"mvn r6, r6\n\t"
"and %[a], r6\n\t"
"mov %[b], r8\n\t"
"sub %[b], %[a]\n\t"
"add %[a], r9\n\t"
"add %[b], r10\n\t"
"\n2:\n\t"
"# Multiply Start\n\t"
"ldr r6, [%[a]]\n\t"
"ldr r7, [%[b]]\n\t"
"lsl r6, r6, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6\n\t"
"add r3, r7\n\t"
"adc r4, %[r]\n\t"
"adc r5, %[r]\n\t"
"ldr r7, [%[b]]\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"ldr r6, [%[a]]\n\t"
"ldr r7, [%[b]]\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6\n\t"
"add r4, r7\n\t"
"adc r5, %[r]\n\t"
"ldr r7, [%[b]]\n\t"
"lsl r7, r7, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"# Multiply Done\n\t"
"add %[a], #4\n\t"
"sub %[b], #4\n\t"
"cmp %[a], r12\n\t"
"beq 3f\n\t"
"mov r6, r8\n\t"
"add r6, r9\n\t"
"cmp %[a], r6\n\t"
"ble 2b\n\t"
"\n3:\n\t"
"mov %[r], r11\n\t"
"mov r7, r8\n\t"
"str r3, [%[r], r7]\n\t"
"mov r3, r4\n\t"
"mov r4, r5\n\t"
"add r7, #4\n\t"
"mov r8, r7\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, #248\n\t"
"cmp r7, r6\n\t"
"ble 1b\n\t"
"str r3, [%[r], r7]\n\t"
"mov %[a], r9\n\t"
"mov %[b], r10\n\t"
:
: [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
: "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12"
);
XMEMCPY(r, tmp, sizeof(tmp));
}
/* AND m into each word of a and store in r.
*
* r A single precision integer.
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
for (i=0; i<64; i++) {
r[i] = a[i] & m;
}
#else
int i;
for (i = 0; i < 64; i += 8) {
r[i+0] = a[i+0] & m;
r[i+1] = a[i+1] & m;
r[i+2] = a[i+2] & m;
r[i+3] = a[i+3] & m;
r[i+4] = a[i+4] & m;
r[i+5] = a[i+5] & m;
r[i+6] = a[i+6] & m;
r[i+7] = a[i+7] & m;
}
#endif
}
/* Multiply a and b into r. (r = a * b)
*
* r A single precision integer.
@@ -13052,161 +12633,6 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
(void)sp_4096_add_128(r + 128, r + 128, z2);
}
/* Square a and put result in r. (r = a * a)
*
* r A single precision integer.
* a A single precision integer.
*/
SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
{
__asm__ __volatile__ (
"mov r3, #0\n\t"
"mov r4, #0\n\t"
"mov r5, #0\n\t"
"mov r8, r3\n\t"
"mov r11, %[r]\n\t"
"mov r6, #2\n\t"
"lsl r6, r6, #8\n\t"
"neg r6, r6\n\t"
"add sp, r6\n\t"
"mov r10, sp\n\t"
"mov r9, %[a]\n\t"
"\n1:\n\t"
"mov %[r], #0\n\t"
"mov r6, #252\n\t"
"mov %[a], r8\n\t"
"sub %[a], r6\n\t"
"sbc r6, r6\n\t"
"mvn r6, r6\n\t"
"and %[a], r6\n\t"
"mov r2, r8\n\t"
"sub r2, %[a]\n\t"
"add %[a], r9\n\t"
"add r2, r9\n\t"
"\n2:\n\t"
"cmp r2, %[a]\n\t"
"beq 4f\n\t"
"# Multiply * 2: Start\n\t"
"ldr r6, [%[a]]\n\t"
"ldr r7, [r2]\n\t"
"lsl r6, r6, #16\n\t"
"lsl r7, r7, #16\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6\n\t"
"add r3, r7\n\t"
"adc r4, %[r]\n\t"
"adc r5, %[r]\n\t"
"add r3, r7\n\t"
"adc r4, %[r]\n\t"
"adc r5, %[r]\n\t"
"ldr r7, [r2]\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"ldr r6, [%[a]]\n\t"
"ldr r7, [r2]\n\t"
"lsr r6, r6, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r7, r6\n\t"
"add r4, r7\n\t"
"adc r5, %[r]\n\t"
"add r4, r7\n\t"
"adc r5, %[r]\n\t"
"ldr r7, [r2]\n\t"
"lsl r7, r7, #16\n\t"
"lsr r7, r7, #16\n\t"
"mul r6, r7\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"# Multiply * 2: Done\n\t"
"bal 5f\n\t"
"\n4:\n\t"
"# Square: Start\n\t"
"ldr r6, [%[a]]\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r6, r6\n\t"
"add r3, r6\n\t"
"adc r4, %[r]\n\t"
"adc r5, %[r]\n\t"
"mul r7, r7\n\t"
"add r4, r7\n\t"
"adc r5, %[r]\n\t"
"ldr r6, [%[a]]\n\t"
"lsr r7, r6, #16\n\t"
"lsl r6, r6, #16\n\t"
"lsr r6, r6, #16\n\t"
"mul r6, r7\n\t"
"lsr r7, r6, #15\n\t"
"lsl r6, r6, #17\n\t"
"add r3, r6\n\t"
"adc r4, r7\n\t"
"adc r5, %[r]\n\t"
"# Square: Done\n\t"
"\n5:\n\t"
"add %[a], #4\n\t"
"sub r2, #4\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, r9\n\t"
"cmp %[a], r6\n\t"
"beq 3f\n\t"
"cmp %[a], r2\n\t"
"bgt 3f\n\t"
"mov r7, r8\n\t"
"add r7, r9\n\t"
"cmp %[a], r7\n\t"
"ble 2b\n\t"
"\n3:\n\t"
"mov %[r], r10\n\t"
"mov r7, r8\n\t"
"str r3, [%[r], r7]\n\t"
"mov r3, r4\n\t"
"mov r4, r5\n\t"
"mov r5, #0\n\t"
"add r7, #4\n\t"
"mov r8, r7\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, #248\n\t"
"cmp r7, r6\n\t"
"ble 1b\n\t"
"mov %[a], r9\n\t"
"str r3, [%[r], r7]\n\t"
"mov %[r], r11\n\t"
"mov %[a], r10\n\t"
"mov r3, #1\n\t"
"lsl r3, r3, #8\n\t"
"add r3, #252\n\t"
"\n4:\n\t"
"ldr r6, [%[a], r3]\n\t"
"str r6, [%[r], r3]\n\t"
"sub r3, #4\n\t"
"bge 4b\n\t"
"mov r6, #2\n\t"
"lsl r6, r6, #8\n\t"
"add sp, r6\n\t"
:
: [r] "r" (r), [a] "r" (a)
: "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11"
);
}
/* Square a and put result in r. (r = a * a)
*
* r A single precision integer.

View File

@@ -1990,7 +1990,7 @@ static int sp_2048_mod_exp_45(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 90];
sp_digit td[90];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -2014,7 +2014,7 @@ static int sp_2048_mod_exp_45(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 45 * 2);
#else
t[i] = &td[i * 45 * 2];
t[i] = &td[i * 45 * 2)];
#endif
}
@@ -2943,7 +2943,7 @@ static int sp_2048_mod_exp_90(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 180];
sp_digit td[180];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -2967,7 +2967,7 @@ static int sp_2048_mod_exp_90(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 90 * 2);
#else
t[i] = &td[i * 90 * 2];
t[i] = &td[i * 90 * 2)];
#endif
}
@@ -5831,7 +5831,7 @@ static int sp_3072_mod_exp_67(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 134];
sp_digit td[134];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -5855,7 +5855,7 @@ static int sp_3072_mod_exp_67(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 67 * 2);
#else
t[i] = &td[i * 67 * 2];
t[i] = &td[i * 67 * 2)];
#endif
}
@@ -6820,7 +6820,7 @@ static int sp_3072_mod_exp_134(sp_digit* r, const sp_digit* a, const sp_digit* e
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 268];
sp_digit td[268];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -6844,7 +6844,7 @@ static int sp_3072_mod_exp_134(sp_digit* r, const sp_digit* a, const sp_digit* e
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 134 * 2);
#else
t[i] = &td[i * 134 * 2];
t[i] = &td[i * 134 * 2)];
#endif
}
@@ -9895,7 +9895,7 @@ static int sp_4096_mod_exp_98(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 196];
sp_digit td[196];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -9919,7 +9919,7 @@ static int sp_4096_mod_exp_98(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 98 * 2);
#else
t[i] = &td[i * 98 * 2];
t[i] = &td[i * 98 * 2)];
#endif
}
@@ -10861,7 +10861,7 @@ static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 392];
sp_digit td[392];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -10885,7 +10885,7 @@ static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 196 * 2);
#else
t[i] = &td[i * 196 * 2];
t[i] = &td[i * 196 * 2)];
#endif
}

View File

@@ -1638,7 +1638,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 36];
sp_digit td[36];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -1662,7 +1662,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 18 * 2);
#else
t[i] = &td[i * 18 * 2];
t[i] = &td[i * 18 * 2)];
#endif
}
@@ -2583,7 +2583,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 72];
sp_digit td[72];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -2607,7 +2607,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 36 * 2);
#else
t[i] = &td[i * 36 * 2];
t[i] = &td[i * 36 * 2)];
#endif
}
@@ -5819,7 +5819,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 54];
sp_digit td[54];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -5843,7 +5843,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 27 * 2);
#else
t[i] = &td[i * 27 * 2];
t[i] = &td[i * 27 * 2)];
#endif
}
@@ -6734,7 +6734,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 108];
sp_digit td[108];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -6758,7 +6758,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 54 * 2);
#else
t[i] = &td[i * 54 * 2];
t[i] = &td[i * 54 * 2)];
#endif
}
@@ -10116,7 +10116,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 78];
sp_digit td[78];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -10140,7 +10140,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 39 * 2);
#else
t[i] = &td[i * 39 * 2];
t[i] = &td[i * 39 * 2)];
#endif
}
@@ -11130,7 +11130,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* td;
#else
sp_digit td[3 * 156];
sp_digit td[156];
#endif
sp_digit* t[3];
sp_digit* norm;
@@ -11154,7 +11154,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e,
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t[i] = td + (i * 78 * 2);
#else
t[i] = &td[i * 78 * 2];
t[i] = &td[i * 78 * 2)];
#endif
}

View File

@@ -10097,188 +10097,6 @@ static void sp_4096_to_bin(sp_digit* r, byte* a)
}
#ifndef WOLFSSL_SP_SMALL
/* Add b to a into r. (r = a + b)
*
* r A single precision integer.
* a A single precision integer.
* b A single precision integer.
*/
SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a,
const sp_digit* b)
{
sp_digit c = 0;
__asm__ __volatile__ (
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adds r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"ldm %[a]!, {r4, r5}\n\t"
"ldm %[b]!, {r6, r8}\n\t"
"adcs r4, r4, r6\n\t"
"adcs r5, r5, r8\n\t"
"stm %[r]!, {r4, r5}\n\t"
"mov %[c], #0\n\t"
"adc %[c], %[c], %[c]\n\t"
: [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
:
: "memory", "r4", "r5", "r6", "r8"
);
return c;
}
/* Sub b from a into r. (r = a - b)
*
* r A single precision integer.
@@ -10962,111 +10780,6 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
return c;
}
/* Multiply a and b into r. (r = a * b)
*
* r A single precision integer.
* a A single precision integer.
* b A single precision integer.
*/
SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a,
const sp_digit* b)
{
sp_digit tmp[64 * 2];
__asm__ __volatile__ (
"mov r3, #0\n\t"
"mov r4, #0\n\t"
"mov r9, r3\n\t"
"mov r12, %[r]\n\t"
"mov r10, %[a]\n\t"
"mov r11, %[b]\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, r6, r10\n\t"
"mov r14, r6\n\t"
"\n1:\n\t"
"mov %[r], #0\n\t"
"mov r5, #0\n\t"
"mov r6, #252\n\t"
"mov %[a], r9\n\t"
"subs %[a], %[a], r6\n\t"
"sbc r6, r6, r6\n\t"
"mvn r6, r6\n\t"
"and %[a], %[a], r6\n\t"
"mov %[b], r9\n\t"
"sub %[b], %[b], %[a]\n\t"
"add %[a], %[a], r10\n\t"
"add %[b], %[b], r11\n\t"
"\n2:\n\t"
/* Multiply Start */
"ldr r6, [%[a]]\n\t"
"ldr r8, [%[b]]\n\t"
"umull r6, r8, r6, r8\n\t"
"adds r3, r3, r6\n\t"
"adcs r4, r4, r8\n\t"
"adc r5, r5, %[r]\n\t"
/* Multiply Done */
"add %[a], %[a], #4\n\t"
"sub %[b], %[b], #4\n\t"
"cmp %[a], r14\n\t"
"beq 3f\n\t"
"mov r6, r9\n\t"
"add r6, r6, r10\n\t"
"cmp %[a], r6\n\t"
"ble 2b\n\t"
"\n3:\n\t"
"mov %[r], r12\n\t"
"mov r8, r9\n\t"
"str r3, [%[r], r8]\n\t"
"mov r3, r4\n\t"
"mov r4, r5\n\t"
"add r8, r8, #4\n\t"
"mov r9, r8\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, r6, #248\n\t"
"cmp r8, r6\n\t"
"ble 1b\n\t"
"str r3, [%[r], r8]\n\t"
"mov %[a], r10\n\t"
"mov %[b], r11\n\t"
:
: [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
: "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
);
XMEMCPY(r, tmp, sizeof(tmp));
}
/* AND m into each word of a and store in r.
*
* r A single precision integer.
* a A single precision integer.
* m Mask to AND against each digit.
*/
static void sp_4096_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
{
#ifdef WOLFSSL_SP_SMALL
int i;
for (i=0; i<64; i++) {
r[i] = a[i] & m;
}
#else
int i;
for (i = 0; i < 64; i += 8) {
r[i+0] = a[i+0] & m;
r[i+1] = a[i+1] & m;
r[i+2] = a[i+2] & m;
r[i+3] = a[i+3] & m;
r[i+4] = a[i+4] & m;
r[i+5] = a[i+5] & m;
r[i+6] = a[i+6] & m;
r[i+7] = a[i+7] & m;
}
#endif
}
/* Multiply a and b into r. (r = a * b)
*
* r A single precision integer.
@@ -11100,109 +10813,6 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
(void)sp_4096_add_128(r + 128, r + 128, z2);
}
/* Square a and put result in r. (r = a * a)
*
* r A single precision integer.
* a A single precision integer.
*/
SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a)
{
__asm__ __volatile__ (
"mov r3, #0\n\t"
"mov r4, #0\n\t"
"mov r5, #0\n\t"
"mov r9, r3\n\t"
"mov r12, %[r]\n\t"
"mov r6, #2\n\t"
"lsl r6, r6, #8\n\t"
"neg r6, r6\n\t"
"add sp, sp, r6\n\t"
"mov r11, sp\n\t"
"mov r10, %[a]\n\t"
"\n1:\n\t"
"mov %[r], #0\n\t"
"mov r6, #252\n\t"
"mov %[a], r9\n\t"
"subs %[a], %[a], r6\n\t"
"sbc r6, r6, r6\n\t"
"mvn r6, r6\n\t"
"and %[a], %[a], r6\n\t"
"mov r2, r9\n\t"
"sub r2, r2, %[a]\n\t"
"add %[a], %[a], r10\n\t"
"add r2, r2, r10\n\t"
"\n2:\n\t"
"cmp r2, %[a]\n\t"
"beq 4f\n\t"
/* Multiply * 2: Start */
"ldr r6, [%[a]]\n\t"
"ldr r8, [r2]\n\t"
"umull r6, r8, r6, r8\n\t"
"adds r3, r3, r6\n\t"
"adcs r4, r4, r8\n\t"
"adc r5, r5, %[r]\n\t"
"adds r3, r3, r6\n\t"
"adcs r4, r4, r8\n\t"
"adc r5, r5, %[r]\n\t"
/* Multiply * 2: Done */
"bal 5f\n\t"
"\n4:\n\t"
/* Square: Start */
"ldr r6, [%[a]]\n\t"
"umull r6, r8, r6, r6\n\t"
"adds r3, r3, r6\n\t"
"adcs r4, r4, r8\n\t"
"adc r5, r5, %[r]\n\t"
/* Square: Done */
"\n5:\n\t"
"add %[a], %[a], #4\n\t"
"sub r2, r2, #4\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, r6, r10\n\t"
"cmp %[a], r6\n\t"
"beq 3f\n\t"
"cmp %[a], r2\n\t"
"bgt 3f\n\t"
"mov r8, r9\n\t"
"add r8, r8, r10\n\t"
"cmp %[a], r8\n\t"
"ble 2b\n\t"
"\n3:\n\t"
"mov %[r], r11\n\t"
"mov r8, r9\n\t"
"str r3, [%[r], r8]\n\t"
"mov r3, r4\n\t"
"mov r4, r5\n\t"
"mov r5, #0\n\t"
"add r8, r8, #4\n\t"
"mov r9, r8\n\t"
"mov r6, #1\n\t"
"lsl r6, r6, #8\n\t"
"add r6, r6, #248\n\t"
"cmp r8, r6\n\t"
"ble 1b\n\t"
"mov %[a], r10\n\t"
"str r3, [%[r], r8]\n\t"
"mov %[r], r12\n\t"
"mov %[a], r11\n\t"
"mov r3, #1\n\t"
"lsl r3, r3, #8\n\t"
"add r3, r3, #252\n\t"
"\n4:\n\t"
"ldr r6, [%[a], r3]\n\t"
"str r6, [%[r], r3]\n\t"
"subs r3, r3, #4\n\t"
"bge 4b\n\t"
"mov r6, #2\n\t"
"lsl r6, r6, #8\n\t"
"add sp, sp, r6\n\t"
:
: [r] "r" (r), [a] "r" (a)
: "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
);
}
/* Square a and put result in r. (r = a * a)
*
* r A single precision integer.